gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2017 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "backend.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "memmodel.h"
  31 #include "cfghooks.h"
  32 #include "df.h"
  33 #include "tm_p.h"
  34 #include "stringpool.h"
  35 #include "attribs.h"
  36 #include "optabs.h"
  37 #include "regs.h"
  38 #include "emit-rtl.h"
  39 #include "recog.h"
  40 #include "cgraph.h"
  41 #include "diagnostic-core.h"
  42 #include "alias.h"
  43 #include "fold-const.h"
  44 #include "stor-layout.h"
  45 #include "calls.h"
  46 #include "varasm.h"
  47 #include "output.h"
  48 #include "insn-attr.h"
  49 #include "flags.h"
  50 #include "reload.h"
  51 #include "explow.h"
  52 #include "expr.h"
  53 #include "cfgrtl.h"
  54 #include "sched-int.h"
  55 #include "common/common-target.h"
  56 #include "langhooks.h"
  57 #include "intl.h"
  58 #include "libfuncs.h"
  59 #include "params.h"
  60 #include "opts.h"
  61 #include "dumpfile.h"
  62 #include "target-globals.h"
  63 #include "builtins.h"
  64 #include "tm-constrs.h"
  65 #include "rtl-iter.h"
  66 #include "optabs-libfuncs.h"
  67 #include "gimplify.h"
  68 #include "gimple.h"
  69 #include "selftest.h"
  70
  71 /* This file should be included last.  */
  72 #include "target-def.h"
  73
  74 /* Forward definitions of types.  */
  75 typedef struct minipool_node    Mnode;
  76 typedef struct minipool_fixup   Mfix;
  77
  78 void (*arm_lang_output_object_attributes_hook)(void);
  79
  80 struct four_ints
  81 {
  82   int i[4];
  83 };
  84
  85 /* Forward function declarations.  */
  86 static bool arm_const_not_ok_for_debug_p (rtx);
  87 static int arm_needs_doubleword_align (machine_mode, const_tree);
  88 static int arm_compute_static_chain_stack_bytes (void);
  89 static arm_stack_offsets *arm_get_frame_offsets (void);
  90 static void arm_compute_frame_layout (void);
  91 static void arm_add_gc_roots (void);
  92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  93                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
  94 static unsigned bit_count (unsigned long);
  95 static unsigned bitmap_popcount (const sbitmap);
  96 static int arm_address_register_rtx_p (rtx, int);
  97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
  98 static bool is_called_in_ARM_mode (tree);
  99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 104 inline static int thumb1_index_register_rtx_p (rtx, int);
 105 static int thumb_far_jump_used_p (void);
 106 static bool thumb_force_lr_save (void);
 107 static unsigned arm_size_return_regs (void);
 108 static bool arm_assemble_integer (rtx, unsigned int, int);
 109 static void arm_print_operand (FILE *, rtx, int);
 110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 111 static bool arm_print_operand_punct_valid_p (unsigned char code);
 112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 113 static arm_cc get_arm_condition_code (rtx);
 114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 115 static const char *output_multi_immediate (rtx *, const char *, const char *,
 116                                            int, HOST_WIDE_INT);
 117 static const char *shift_op (rtx, HOST_WIDE_INT *);
 118 static struct machine_function *arm_init_machine_status (void);
 119 static void thumb_exit (FILE *, int);
 120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 122 static Mnode *add_minipool_forward_ref (Mfix *);
 123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 124 static Mnode *add_minipool_backward_ref (Mfix *);
 125 static void assign_minipool_offsets (Mfix *);
 126 static void arm_print_value (FILE *, rtx);
 127 static void dump_minipool (rtx_insn *);
 128 static int arm_barrier_cost (rtx_insn *);
 129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 132                                machine_mode, rtx);
 133 static void arm_reorg (void);
 134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 136 static unsigned long arm_compute_save_core_reg_mask (void);
 137 static unsigned long arm_isr_value (tree);
 138 static unsigned long arm_compute_func_type (void);
 139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 144 #endif
 145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 147 static void arm_output_function_epilogue (FILE *);
 148 static void arm_output_function_prologue (FILE *);
 149 static int arm_comp_type_attributes (const_tree, const_tree);
 150 static void arm_set_default_type_attributes (tree);
 151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 153 static int optimal_immediate_sequence (enum rtx_code code,
 154                                        unsigned HOST_WIDE_INT val,
 155                                        struct four_ints *return_sequence);
 156 static int optimal_immediate_sequence_1 (enum rtx_code code,
 157                                          unsigned HOST_WIDE_INT val,
 158                                          struct four_ints *return_sequence,
 159                                          int i);
 160 static int arm_get_strip_length (int);
 161 static bool arm_function_ok_for_sibcall (tree, tree);
 162 static machine_mode arm_promote_function_mode (const_tree,
 163                                                     machine_mode, int *,
 164                                                     const_tree, int);
 165 static bool arm_return_in_memory (const_tree, const_tree);
 166 static rtx arm_function_value (const_tree, const_tree, bool);
 167 static rtx arm_libcall_value_1 (machine_mode);
 168 static rtx arm_libcall_value (machine_mode, const_rtx);
 169 static bool arm_function_value_regno_p (const unsigned int);
 170 static void arm_internal_label (FILE *, const char *, unsigned long);
 171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 172                                  tree);
 173 static bool arm_have_conditional_execution (void);
 174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 175 static bool arm_legitimate_constant_p (machine_mode, rtx);
 176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 180 static void emit_constant_insn (rtx cond, rtx pattern);
 181 static rtx_insn *emit_set_insn (rtx, rtx);
 182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 184                                   tree, bool);
 185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 186                              const_tree, bool);
 187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 188                                       const_tree, bool);
 189 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 190 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 191                                       const_tree);
 192 static rtx aapcs_libcall_value (machine_mode);
 193 static int aapcs_select_return_coproc (const_tree, const_tree);
 194
 195 #ifdef OBJECT_FORMAT_ELF
 196 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 197 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 198 #endif
 199 #ifndef ARM_PE
 200 static void arm_encode_section_info (tree, rtx, int);
 201 #endif
 202
 203 static void arm_file_end (void);
 204 static void arm_file_start (void);
 205 static void arm_insert_attributes (tree, tree *);
 206
 207 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 208                                         tree, int *, int);
 209 static bool arm_pass_by_reference (cumulative_args_t,
 210                                    machine_mode, const_tree, bool);
 211 static bool arm_promote_prototypes (const_tree);
 212 static bool arm_default_short_enums (void);
 213 static bool arm_align_anon_bitfield (void);
 214 static bool arm_return_in_msb (const_tree);
 215 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 216 static bool arm_return_in_memory (const_tree, const_tree);
 217 #if ARM_UNWIND_INFO
 218 static void arm_unwind_emit (FILE *, rtx_insn *);
 219 static bool arm_output_ttype (rtx);
 220 static void arm_asm_emit_except_personality (rtx);
 221 #endif
 222 static void arm_asm_init_sections (void);
 223 static rtx arm_dwarf_register_span (rtx);
 224
 225 static tree arm_cxx_guard_type (void);
 226 static bool arm_cxx_guard_mask_bit (void);
 227 static tree arm_get_cookie_size (tree);
 228 static bool arm_cookie_has_size (void);
 229 static bool arm_cxx_cdtor_returns_this (void);
 230 static bool arm_cxx_key_method_may_be_inline (void);
 231 static void arm_cxx_determine_class_data_visibility (tree);
 232 static bool arm_cxx_class_data_always_comdat (void);
 233 static bool arm_cxx_use_aeabi_atexit (void);
 234 static void arm_init_libfuncs (void);
 235 static tree arm_build_builtin_va_list (void);
 236 static void arm_expand_builtin_va_start (tree, rtx);
 237 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 238 static void arm_option_override (void);
 239 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
 240 static void arm_option_restore (struct gcc_options *,
 241                                 struct cl_target_option *);
 242 static void arm_override_options_after_change (void);
 243 static void arm_option_print (FILE *, int, struct cl_target_option *);
 244 static void arm_set_current_function (tree);
 245 static bool arm_can_inline_p (tree, tree);
 246 static void arm_relayout_function (tree);
 247 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 248 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 249 static bool arm_sched_can_speculate_insn (rtx_insn *);
 250 static bool arm_macro_fusion_p (void);
 251 static bool arm_cannot_copy_insn_p (rtx_insn *);
 252 static int arm_issue_rate (void);
 253 static int arm_first_cycle_multipass_dfa_lookahead (void);
 254 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 255 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 256 static bool arm_output_addr_const_extra (FILE *, rtx);
 257 static bool arm_allocate_stack_slots_for_args (void);
 258 static bool arm_warn_func_return (tree);
 259 static tree arm_promoted_type (const_tree t);
 260 static bool arm_scalar_mode_supported_p (scalar_mode);
 261 static bool arm_frame_pointer_required (void);
 262 static bool arm_can_eliminate (const int, const int);
 263 static void arm_asm_trampoline_template (FILE *);
 264 static void arm_trampoline_init (rtx, tree, rtx);
 265 static rtx arm_trampoline_adjust_address (rtx);
 266 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 267 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 268 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 269 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 270 static bool arm_array_mode_supported_p (machine_mode,
 271                                         unsigned HOST_WIDE_INT);
 272 static machine_mode arm_preferred_simd_mode (scalar_mode);
 273 static bool arm_class_likely_spilled_p (reg_class_t);
 274 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 275 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 276 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 277                                                      const_tree type,
 278                                                      int misalignment,
 279                                                      bool is_packed);
 280 static void arm_conditional_register_usage (void);
 281 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 282 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 283 static unsigned int arm_autovectorize_vector_sizes (void);
 284 static int arm_default_branch_cost (bool, bool);
 285 static int arm_cortex_a5_branch_cost (bool, bool);
 286 static int arm_cortex_m_branch_cost (bool, bool);
 287 static int arm_cortex_m7_branch_cost (bool, bool);
 288
 289 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
 290                                              const unsigned char *sel);
 291
 292 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 293
 294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 295                                            tree vectype,
 296                                            int misalign ATTRIBUTE_UNUSED);
 297 static unsigned arm_add_stmt_cost (void *data, int count,
 298                                    enum vect_cost_for_stmt kind,
 299                                    struct _stmt_vec_info *stmt_info,
 300                                    int misalign,
 301                                    enum vect_cost_model_location where);
 302
 303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 304                                          bool op0_preserve_value);
 305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 306
 307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 308 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 309                                      const_tree);
 310 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 311 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 312 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 313                                                 int reloc);
 314 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 315 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 316 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 317 \f
 318 /* Table of machine attributes.  */
 319 static const struct attribute_spec arm_attribute_table[] =
 320 {
 321   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 322        affects_type_identity } */
 323   /* Function calls made to this symbol must be done indirectly, because
 324      it may lie outside of the 26 bit addressing range of a normal function
 325      call.  */
 326   { "long_call",    0, 0, false, true,  true,  NULL, false },
 327   /* Whereas these functions are always known to reside within the 26 bit
 328      addressing range.  */
 329   { "short_call",   0, 0, false, true,  true,  NULL, false },
 330   /* Specify the procedure call conventions for a function.  */
 331   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 332     false },
 333   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 334   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 335     false },
 336   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 337     false },
 338   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 339     false },
 340 #ifdef ARM_PE
 341   /* ARM/PE has three new attributes:
 342      interfacearm - ?
 343      dllexport - for exporting a function/variable that will live in a dll
 344      dllimport - for importing a function/variable from a dll
 345
 346      Microsoft allows multiple declspecs in one __declspec, separating
 347      them with spaces.  We do NOT support this.  Instead, use __declspec
 348      multiple times.
 349   */
 350   { "dllimport",    0, 0, true,  false, false, NULL, false },
 351   { "dllexport",    0, 0, true,  false, false, NULL, false },
 352   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 353     false },
 354 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 355   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 356   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 357   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 358     false },
 359 #endif
 360   /* ARMv8-M Security Extensions support.  */
 361   { "cmse_nonsecure_entry", 0, 0, true, false, false,
 362     arm_handle_cmse_nonsecure_entry, false },
 363   { "cmse_nonsecure_call", 0, 0, true, false, false,
 364     arm_handle_cmse_nonsecure_call, true },
 365   { NULL,           0, 0, false, false, false, NULL, false }
 366 };
 367 \f
 368 /* Initialize the GCC target structure.  */
 369 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 370 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 371 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 372 #endif
 373
 374 #undef TARGET_LEGITIMIZE_ADDRESS
 375 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 376
 377 #undef  TARGET_ATTRIBUTE_TABLE
 378 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 379
 380 #undef  TARGET_INSERT_ATTRIBUTES
 381 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 382
 383 #undef TARGET_ASM_FILE_START
 384 #define TARGET_ASM_FILE_START arm_file_start
 385 #undef TARGET_ASM_FILE_END
 386 #define TARGET_ASM_FILE_END arm_file_end
 387
 388 #undef  TARGET_ASM_ALIGNED_SI_OP
 389 #define TARGET_ASM_ALIGNED_SI_OP NULL
 390 #undef  TARGET_ASM_INTEGER
 391 #define TARGET_ASM_INTEGER arm_assemble_integer
 392
 393 #undef TARGET_PRINT_OPERAND
 394 #define TARGET_PRINT_OPERAND arm_print_operand
 395 #undef TARGET_PRINT_OPERAND_ADDRESS
 396 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 397 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 398 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 399
 400 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 401 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 402
 403 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 404 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 405
 406 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 407 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 408
 409 #undef TARGET_CAN_INLINE_P
 410 #define TARGET_CAN_INLINE_P arm_can_inline_p
 411
 412 #undef TARGET_RELAYOUT_FUNCTION
 413 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 414
 415 #undef  TARGET_OPTION_OVERRIDE
 416 #define TARGET_OPTION_OVERRIDE arm_option_override
 417
 418 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 419 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 420
 421 #undef TARGET_OPTION_SAVE
 422 #define TARGET_OPTION_SAVE arm_option_save
 423
 424 #undef TARGET_OPTION_RESTORE
 425 #define TARGET_OPTION_RESTORE arm_option_restore
 426
 427 #undef TARGET_OPTION_PRINT
 428 #define TARGET_OPTION_PRINT arm_option_print
 429
 430 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 431 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 432
 433 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 434 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 435
 436 #undef TARGET_SCHED_MACRO_FUSION_P
 437 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 438
 439 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 440 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 441
 442 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 443 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 444
 445 #undef  TARGET_SCHED_ADJUST_COST
 446 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 447
 448 #undef TARGET_SET_CURRENT_FUNCTION
 449 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 450
 451 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 452 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 453
 454 #undef TARGET_SCHED_REORDER
 455 #define TARGET_SCHED_REORDER arm_sched_reorder
 456
 457 #undef TARGET_REGISTER_MOVE_COST
 458 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 459
 460 #undef TARGET_MEMORY_MOVE_COST
 461 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 462
 463 #undef TARGET_ENCODE_SECTION_INFO
 464 #ifdef ARM_PE
 465 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 466 #else
 467 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 468 #endif
 469
 470 #undef  TARGET_STRIP_NAME_ENCODING
 471 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 472
 473 #undef  TARGET_ASM_INTERNAL_LABEL
 474 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 475
 476 #undef TARGET_FLOATN_MODE
 477 #define TARGET_FLOATN_MODE arm_floatn_mode
 478
 479 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 480 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 481
 482 #undef  TARGET_FUNCTION_VALUE
 483 #define TARGET_FUNCTION_VALUE arm_function_value
 484
 485 #undef  TARGET_LIBCALL_VALUE
 486 #define TARGET_LIBCALL_VALUE arm_libcall_value
 487
 488 #undef TARGET_FUNCTION_VALUE_REGNO_P
 489 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 490
 491 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 492 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 493 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 494 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 495
 496 #undef  TARGET_RTX_COSTS
 497 #define TARGET_RTX_COSTS arm_rtx_costs
 498 #undef  TARGET_ADDRESS_COST
 499 #define TARGET_ADDRESS_COST arm_address_cost
 500
 501 #undef TARGET_SHIFT_TRUNCATION_MASK
 502 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 503 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 504 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 505 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 506 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 507 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 508 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 509 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 510 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 511   arm_autovectorize_vector_sizes
 512
 513 #undef  TARGET_MACHINE_DEPENDENT_REORG
 514 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 515
 516 #undef  TARGET_INIT_BUILTINS
 517 #define TARGET_INIT_BUILTINS  arm_init_builtins
 518 #undef  TARGET_EXPAND_BUILTIN
 519 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 520 #undef  TARGET_BUILTIN_DECL
 521 #define TARGET_BUILTIN_DECL arm_builtin_decl
 522
 523 #undef TARGET_INIT_LIBFUNCS
 524 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 525
 526 #undef TARGET_PROMOTE_FUNCTION_MODE
 527 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 528 #undef TARGET_PROMOTE_PROTOTYPES
 529 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 530 #undef TARGET_PASS_BY_REFERENCE
 531 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 532 #undef TARGET_ARG_PARTIAL_BYTES
 533 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 534 #undef TARGET_FUNCTION_ARG
 535 #define TARGET_FUNCTION_ARG arm_function_arg
 536 #undef TARGET_FUNCTION_ARG_ADVANCE
 537 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 538 #undef TARGET_FUNCTION_ARG_BOUNDARY
 539 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 540
 541 #undef  TARGET_SETUP_INCOMING_VARARGS
 542 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 543
 544 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 545 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 546
 547 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 548 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 549 #undef TARGET_TRAMPOLINE_INIT
 550 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 551 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 552 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 553
 554 #undef TARGET_WARN_FUNC_RETURN
 555 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 556
 557 #undef TARGET_DEFAULT_SHORT_ENUMS
 558 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 559
 560 #undef TARGET_ALIGN_ANON_BITFIELD
 561 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 562
 563 #undef TARGET_NARROW_VOLATILE_BITFIELD
 564 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 565
 566 #undef TARGET_CXX_GUARD_TYPE
 567 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 568
 569 #undef TARGET_CXX_GUARD_MASK_BIT
 570 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 571
 572 #undef TARGET_CXX_GET_COOKIE_SIZE
 573 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 574
 575 #undef TARGET_CXX_COOKIE_HAS_SIZE
 576 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 577
 578 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 579 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 580
 581 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 582 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 583
 584 #undef TARGET_CXX_USE_AEABI_ATEXIT
 585 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 586
 587 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 588 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 589   arm_cxx_determine_class_data_visibility
 590
 591 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 592 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 593
 594 #undef TARGET_RETURN_IN_MSB
 595 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 596
 597 #undef TARGET_RETURN_IN_MEMORY
 598 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 599
 600 #undef TARGET_MUST_PASS_IN_STACK
 601 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 602
 603 #if ARM_UNWIND_INFO
 604 #undef TARGET_ASM_UNWIND_EMIT
 605 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 606
 607 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 608 #undef TARGET_ASM_TTYPE
 609 #define TARGET_ASM_TTYPE arm_output_ttype
 610
 611 #undef TARGET_ARM_EABI_UNWINDER
 612 #define TARGET_ARM_EABI_UNWINDER true
 613
 614 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 615 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 616
 617 #endif /* ARM_UNWIND_INFO */
 618
 619 #undef TARGET_ASM_INIT_SECTIONS
 620 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 621
 622 #undef TARGET_DWARF_REGISTER_SPAN
 623 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 624
 625 #undef  TARGET_CANNOT_COPY_INSN_P
 626 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 627
 628 #ifdef HAVE_AS_TLS
 629 #undef TARGET_HAVE_TLS
 630 #define TARGET_HAVE_TLS true
 631 #endif
 632
 633 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 634 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 635
 636 #undef TARGET_LEGITIMATE_CONSTANT_P
 637 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 638
 639 #undef TARGET_CANNOT_FORCE_CONST_MEM
 640 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 641
 642 #undef TARGET_MAX_ANCHOR_OFFSET
 643 #define TARGET_MAX_ANCHOR_OFFSET 4095
 644
 645 /* The minimum is set such that the total size of the block
 646    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 647    divisible by eight, ensuring natural spacing of anchors.  */
 648 #undef TARGET_MIN_ANCHOR_OFFSET
 649 #define TARGET_MIN_ANCHOR_OFFSET -4088
 650
 651 #undef TARGET_SCHED_ISSUE_RATE
 652 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 653
 654 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 655 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 656   arm_first_cycle_multipass_dfa_lookahead
 657
 658 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 659 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 660   arm_first_cycle_multipass_dfa_lookahead_guard
 661
 662 #undef TARGET_MANGLE_TYPE
 663 #define TARGET_MANGLE_TYPE arm_mangle_type
 664
 665 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 666 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 667
 668 #undef TARGET_BUILD_BUILTIN_VA_LIST
 669 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 670 #undef TARGET_EXPAND_BUILTIN_VA_START
 671 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 672 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 673 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 674
 675 #ifdef HAVE_AS_TLS
 676 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 677 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 678 #endif
 679
 680 #undef TARGET_LEGITIMATE_ADDRESS_P
 681 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 682
 683 #undef TARGET_PREFERRED_RELOAD_CLASS
 684 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 685
 686 #undef TARGET_PROMOTED_TYPE
 687 #define TARGET_PROMOTED_TYPE arm_promoted_type
 688
 689 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 690 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 691
 692 #undef TARGET_COMPUTE_FRAME_LAYOUT
 693 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 694
 695 #undef TARGET_FRAME_POINTER_REQUIRED
 696 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 697
 698 #undef TARGET_CAN_ELIMINATE
 699 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 700
 701 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 702 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 703
 704 #undef TARGET_CLASS_LIKELY_SPILLED_P
 705 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 706
 707 #undef TARGET_VECTORIZE_BUILTINS
 708 #define TARGET_VECTORIZE_BUILTINS
 709
 710 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 711 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 712   arm_builtin_vectorized_function
 713
 714 #undef TARGET_VECTOR_ALIGNMENT
 715 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 716
 717 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 718 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 719   arm_vector_alignment_reachable
 720
 721 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 722 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 723   arm_builtin_support_vector_misalignment
 724
 725 #undef TARGET_PREFERRED_RENAME_CLASS
 726 #define TARGET_PREFERRED_RENAME_CLASS \
 727   arm_preferred_rename_class
 728
 729 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 730 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 731   arm_vectorize_vec_perm_const_ok
 732
 733 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 734 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 735   arm_builtin_vectorization_cost
 736 #undef TARGET_VECTORIZE_ADD_STMT_COST
 737 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 738
 739 #undef TARGET_CANONICALIZE_COMPARISON
 740 #define TARGET_CANONICALIZE_COMPARISON \
 741   arm_canonicalize_comparison
 742
 743 #undef TARGET_ASAN_SHADOW_OFFSET
 744 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 745
 746 #undef MAX_INSN_PER_IT_BLOCK
 747 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 748
 749 #undef TARGET_CAN_USE_DOLOOP_P
 750 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 751
 752 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 753 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 754
 755 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 756 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 757
 758 #undef TARGET_SCHED_FUSION_PRIORITY
 759 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 760
 761 #undef  TARGET_ASM_FUNCTION_SECTION
 762 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 763
 764 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 765 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 766
 767 #undef TARGET_SECTION_TYPE_FLAGS
 768 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 769
 770 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 771 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 772
 773 #undef TARGET_C_EXCESS_PRECISION
 774 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 775
 776 /* Although the architecture reserves bits 0 and 1, only the former is
 777    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 778 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 779 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 780
 781 #undef TARGET_FIXED_CONDITION_CODE_REGS
 782 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 783
 784 #undef TARGET_HARD_REGNO_MODE_OK
 785 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 786 \f
 787 /* Obstack for minipool constant handling.  */
 788 static struct obstack minipool_obstack;
 789 static char *         minipool_startobj;
 790
 791 /* The maximum number of insns skipped which
 792    will be conditionalised if possible.  */
 793 static int max_insns_skipped = 5;
 794
 795 extern FILE * asm_out_file;
 796
 797 /* True if we are currently building a constant table.  */
 798 int making_const_table;
 799
 800 /* The processor for which instructions should be scheduled.  */
 801 enum processor_type arm_tune = TARGET_CPU_arm_none;
 802
 803 /* The current tuning set.  */
 804 const struct tune_params *current_tune;
 805
 806 /* Which floating point hardware to schedule for.  */
 807 int arm_fpu_attr;
 808
 809 /* Used for Thumb call_via trampolines.  */
 810 rtx thumb_call_via_label[14];
 811 static int thumb_call_reg_needed;
 812
 813 /* The bits in this mask specify which instruction scheduling options should
 814    be used.  */
 815 unsigned int tune_flags = 0;
 816
 817 /* The highest ARM architecture version supported by the
 818    target.  */
 819 enum base_architecture arm_base_arch = BASE_ARCH_0;
 820
 821 /* Active target architecture and tuning.  */
 822
 823 struct arm_build_target arm_active_target;
 824
 825 /* The following are used in the arm.md file as equivalents to bits
 826    in the above two flag variables.  */
 827
 828 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 829 int arm_arch3m = 0;
 830
 831 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 832 int arm_arch4 = 0;
 833
 834 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 835 int arm_arch4t = 0;
 836
 837 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 838 int arm_arch5 = 0;
 839
 840 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 841 int arm_arch5e = 0;
 842
 843 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 844 int arm_arch5te = 0;
 845
 846 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 847 int arm_arch6 = 0;
 848
 849 /* Nonzero if this chip supports the ARM 6K extensions.  */
 850 int arm_arch6k = 0;
 851
 852 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 853 int arm_arch6kz = 0;
 854
 855 /* Nonzero if instructions present in ARMv6-M can be used.  */
 856 int arm_arch6m = 0;
 857
 858 /* Nonzero if this chip supports the ARM 7 extensions.  */
 859 int arm_arch7 = 0;
 860
 861 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 862 int arm_arch_lpae = 0;
 863
 864 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 865 int arm_arch_notm = 0;
 866
 867 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 868 int arm_arch7em = 0;
 869
 870 /* Nonzero if instructions present in ARMv8 can be used.  */
 871 int arm_arch8 = 0;
 872
 873 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 874 int arm_arch8_1 = 0;
 875
 876 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 877 int arm_arch8_2 = 0;
 878
 879 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 880    Architecture 8.2.  */
 881 int arm_fp16_inst = 0;
 882
 883 /* Nonzero if this chip can benefit from load scheduling.  */
 884 int arm_ld_sched = 0;
 885
 886 /* Nonzero if this chip is a StrongARM.  */
 887 int arm_tune_strongarm = 0;
 888
 889 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 890 int arm_arch_iwmmxt = 0;
 891
 892 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 893 int arm_arch_iwmmxt2 = 0;
 894
 895 /* Nonzero if this chip is an XScale.  */
 896 int arm_arch_xscale = 0;
 897
 898 /* Nonzero if tuning for XScale  */
 899 int arm_tune_xscale = 0;
 900
 901 /* Nonzero if we want to tune for stores that access the write-buffer.
 902    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 903 int arm_tune_wbuf = 0;
 904
 905 /* Nonzero if tuning for Cortex-A9.  */
 906 int arm_tune_cortex_a9 = 0;
 907
 908 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 909    preprocessor.
 910    XXX This is a bit of a hack, it's intended to help work around
 911    problems in GLD which doesn't understand that armv5t code is
 912    interworking clean.  */
 913 int arm_cpp_interwork = 0;
 914
 915 /* Nonzero if chip supports Thumb 1.  */
 916 int arm_arch_thumb1;
 917
 918 /* Nonzero if chip supports Thumb 2.  */
 919 int arm_arch_thumb2;
 920
 921 /* Nonzero if chip supports integer division instruction.  */
 922 int arm_arch_arm_hwdiv;
 923 int arm_arch_thumb_hwdiv;
 924
 925 /* Nonzero if chip disallows volatile memory access in IT block.  */
 926 int arm_arch_no_volatile_ce;
 927
 928 /* Nonzero if we should use Neon to handle 64-bits operations rather
 929    than core registers.  */
 930 int prefer_neon_for_64bits = 0;
 931
 932 /* Nonzero if we shouldn't use literal pools.  */
 933 bool arm_disable_literal_pool = false;
 934
 935 /* The register number to be used for the PIC offset register.  */
 936 unsigned arm_pic_register = INVALID_REGNUM;
 937
 938 enum arm_pcs arm_pcs_default;
 939
 940 /* For an explanation of these variables, see final_prescan_insn below.  */
 941 int arm_ccfsm_state;
 942 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 943 enum arm_cond_code arm_current_cc;
 944
 945 rtx arm_target_insn;
 946 int arm_target_label;
 947 /* The number of conditionally executed insns, including the current insn.  */
 948 int arm_condexec_count = 0;
 949 /* A bitmask specifying the patterns for the IT block.
 950    Zero means do not output an IT block before this insn. */
 951 int arm_condexec_mask = 0;
 952 /* The number of bits used in arm_condexec_mask.  */
 953 int arm_condexec_masklen = 0;
 954
 955 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 956 int arm_arch_crc = 0;
 957
 958 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 959 int arm_arch_cmse = 0;
 960
 961 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 962 int arm_m_profile_small_mul = 0;
 963
 964 /* The condition codes of the ARM, and the inverse function.  */
 965 static const char * const arm_condition_codes[] =
 966 {
 967   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 968   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 969 };
 970
 971 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 972 int arm_regs_in_sequence[] =
 973 {
 974   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 975 };
 976
 977 #define ARM_LSL_NAME "lsl"
 978 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 979
 980 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 981                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 982                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 983 \f
 984 /* Initialization code.  */
 985
 986 struct cpu_tune
 987 {
 988   enum processor_type scheduler;
 989   unsigned int tune_flags;
 990   const struct tune_params *tune;
 991 };
 992
 993 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
 994 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
 995   {                                                             \
 996     num_slots,                                                  \
 997     l1_size,                                                    \
 998     l1_line_size                                                \
 999   }
1000
1001 /* arm generic vectorizer costs.  */
1002 static const
1003 struct cpu_vec_costs arm_default_vec_cost = {
1004   1,                                    /* scalar_stmt_cost.  */
1005   1,                                    /* scalar load_cost.  */
1006   1,                                    /* scalar_store_cost.  */
1007   1,                                    /* vec_stmt_cost.  */
1008   1,                                    /* vec_to_scalar_cost.  */
1009   1,                                    /* scalar_to_vec_cost.  */
1010   1,                                    /* vec_align_load_cost.  */
1011   1,                                    /* vec_unalign_load_cost.  */
1012   1,                                    /* vec_unalign_store_cost.  */
1013   1,                                    /* vec_store_cost.  */
1014   3,                                    /* cond_taken_branch_cost.  */
1015   1,                                    /* cond_not_taken_branch_cost.  */
1016 };
1017
1018 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1019 #include "aarch-cost-tables.h"
1020
1021
1022
1023 const struct cpu_cost_table cortexa9_extra_costs =
1024 {
1025   /* ALU */
1026   {
1027     0,                  /* arith.  */
1028     0,                  /* logical.  */
1029     0,                  /* shift.  */
1030     COSTS_N_INSNS (1),  /* shift_reg.  */
1031     COSTS_N_INSNS (1),  /* arith_shift.  */
1032     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1033     0,                  /* log_shift.  */
1034     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1035     COSTS_N_INSNS (1),  /* extend.  */
1036     COSTS_N_INSNS (2),  /* extend_arith.  */
1037     COSTS_N_INSNS (1),  /* bfi.  */
1038     COSTS_N_INSNS (1),  /* bfx.  */
1039     0,                  /* clz.  */
1040     0,                  /* rev.  */
1041     0,                  /* non_exec.  */
1042     true                /* non_exec_costs_exec.  */
1043   },
1044   {
1045     /* MULT SImode */
1046     {
1047       COSTS_N_INSNS (3),        /* simple.  */
1048       COSTS_N_INSNS (3),        /* flag_setting.  */
1049       COSTS_N_INSNS (2),        /* extend.  */
1050       COSTS_N_INSNS (3),        /* add.  */
1051       COSTS_N_INSNS (2),        /* extend_add.  */
1052       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1053     },
1054     /* MULT DImode */
1055     {
1056       0,                        /* simple (N/A).  */
1057       0,                        /* flag_setting (N/A).  */
1058       COSTS_N_INSNS (4),        /* extend.  */
1059       0,                        /* add (N/A).  */
1060       COSTS_N_INSNS (4),        /* extend_add.  */
1061       0                         /* idiv (N/A).  */
1062     }
1063   },
1064   /* LD/ST */
1065   {
1066     COSTS_N_INSNS (2),  /* load.  */
1067     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1068     COSTS_N_INSNS (2),  /* ldrd.  */
1069     COSTS_N_INSNS (2),  /* ldm_1st.  */
1070     1,                  /* ldm_regs_per_insn_1st.  */
1071     2,                  /* ldm_regs_per_insn_subsequent.  */
1072     COSTS_N_INSNS (5),  /* loadf.  */
1073     COSTS_N_INSNS (5),  /* loadd.  */
1074     COSTS_N_INSNS (1),  /* load_unaligned.  */
1075     COSTS_N_INSNS (2),  /* store.  */
1076     COSTS_N_INSNS (2),  /* strd.  */
1077     COSTS_N_INSNS (2),  /* stm_1st.  */
1078     1,                  /* stm_regs_per_insn_1st.  */
1079     2,                  /* stm_regs_per_insn_subsequent.  */
1080     COSTS_N_INSNS (1),  /* storef.  */
1081     COSTS_N_INSNS (1),  /* stored.  */
1082     COSTS_N_INSNS (1),  /* store_unaligned.  */
1083     COSTS_N_INSNS (1),  /* loadv.  */
1084     COSTS_N_INSNS (1)   /* storev.  */
1085   },
1086   {
1087     /* FP SFmode */
1088     {
1089       COSTS_N_INSNS (14),       /* div.  */
1090       COSTS_N_INSNS (4),        /* mult.  */
1091       COSTS_N_INSNS (7),        /* mult_addsub. */
1092       COSTS_N_INSNS (30),       /* fma.  */
1093       COSTS_N_INSNS (3),        /* addsub.  */
1094       COSTS_N_INSNS (1),        /* fpconst.  */
1095       COSTS_N_INSNS (1),        /* neg.  */
1096       COSTS_N_INSNS (3),        /* compare.  */
1097       COSTS_N_INSNS (3),        /* widen.  */
1098       COSTS_N_INSNS (3),        /* narrow.  */
1099       COSTS_N_INSNS (3),        /* toint.  */
1100       COSTS_N_INSNS (3),        /* fromint.  */
1101       COSTS_N_INSNS (3)         /* roundint.  */
1102     },
1103     /* FP DFmode */
1104     {
1105       COSTS_N_INSNS (24),       /* div.  */
1106       COSTS_N_INSNS (5),        /* mult.  */
1107       COSTS_N_INSNS (8),        /* mult_addsub.  */
1108       COSTS_N_INSNS (30),       /* fma.  */
1109       COSTS_N_INSNS (3),        /* addsub.  */
1110       COSTS_N_INSNS (1),        /* fpconst.  */
1111       COSTS_N_INSNS (1),        /* neg.  */
1112       COSTS_N_INSNS (3),        /* compare.  */
1113       COSTS_N_INSNS (3),        /* widen.  */
1114       COSTS_N_INSNS (3),        /* narrow.  */
1115       COSTS_N_INSNS (3),        /* toint.  */
1116       COSTS_N_INSNS (3),        /* fromint.  */
1117       COSTS_N_INSNS (3)         /* roundint.  */
1118     }
1119   },
1120   /* Vector */
1121   {
1122     COSTS_N_INSNS (1)   /* alu.  */
1123   }
1124 };
1125
1126 const struct cpu_cost_table cortexa8_extra_costs =
1127 {
1128   /* ALU */
1129   {
1130     0,                  /* arith.  */
1131     0,                  /* logical.  */
1132     COSTS_N_INSNS (1),  /* shift.  */
1133     0,                  /* shift_reg.  */
1134     COSTS_N_INSNS (1),  /* arith_shift.  */
1135     0,                  /* arith_shift_reg.  */
1136     COSTS_N_INSNS (1),  /* log_shift.  */
1137     0,                  /* log_shift_reg.  */
1138     0,                  /* extend.  */
1139     0,                  /* extend_arith.  */
1140     0,                  /* bfi.  */
1141     0,                  /* bfx.  */
1142     0,                  /* clz.  */
1143     0,                  /* rev.  */
1144     0,                  /* non_exec.  */
1145     true                /* non_exec_costs_exec.  */
1146   },
1147   {
1148     /* MULT SImode */
1149     {
1150       COSTS_N_INSNS (1),        /* simple.  */
1151       COSTS_N_INSNS (1),        /* flag_setting.  */
1152       COSTS_N_INSNS (1),        /* extend.  */
1153       COSTS_N_INSNS (1),        /* add.  */
1154       COSTS_N_INSNS (1),        /* extend_add.  */
1155       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1156     },
1157     /* MULT DImode */
1158     {
1159       0,                        /* simple (N/A).  */
1160       0,                        /* flag_setting (N/A).  */
1161       COSTS_N_INSNS (2),        /* extend.  */
1162       0,                        /* add (N/A).  */
1163       COSTS_N_INSNS (2),        /* extend_add.  */
1164       0                         /* idiv (N/A).  */
1165     }
1166   },
1167   /* LD/ST */
1168   {
1169     COSTS_N_INSNS (1),  /* load.  */
1170     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1171     COSTS_N_INSNS (1),  /* ldrd.  */
1172     COSTS_N_INSNS (1),  /* ldm_1st.  */
1173     1,                  /* ldm_regs_per_insn_1st.  */
1174     2,                  /* ldm_regs_per_insn_subsequent.  */
1175     COSTS_N_INSNS (1),  /* loadf.  */
1176     COSTS_N_INSNS (1),  /* loadd.  */
1177     COSTS_N_INSNS (1),  /* load_unaligned.  */
1178     COSTS_N_INSNS (1),  /* store.  */
1179     COSTS_N_INSNS (1),  /* strd.  */
1180     COSTS_N_INSNS (1),  /* stm_1st.  */
1181     1,                  /* stm_regs_per_insn_1st.  */
1182     2,                  /* stm_regs_per_insn_subsequent.  */
1183     COSTS_N_INSNS (1),  /* storef.  */
1184     COSTS_N_INSNS (1),  /* stored.  */
1185     COSTS_N_INSNS (1),  /* store_unaligned.  */
1186     COSTS_N_INSNS (1),  /* loadv.  */
1187     COSTS_N_INSNS (1)   /* storev.  */
1188   },
1189   {
1190     /* FP SFmode */
1191     {
1192       COSTS_N_INSNS (36),       /* div.  */
1193       COSTS_N_INSNS (11),       /* mult.  */
1194       COSTS_N_INSNS (20),       /* mult_addsub. */
1195       COSTS_N_INSNS (30),       /* fma.  */
1196       COSTS_N_INSNS (9),        /* addsub.  */
1197       COSTS_N_INSNS (3),        /* fpconst.  */
1198       COSTS_N_INSNS (3),        /* neg.  */
1199       COSTS_N_INSNS (6),        /* compare.  */
1200       COSTS_N_INSNS (4),        /* widen.  */
1201       COSTS_N_INSNS (4),        /* narrow.  */
1202       COSTS_N_INSNS (8),        /* toint.  */
1203       COSTS_N_INSNS (8),        /* fromint.  */
1204       COSTS_N_INSNS (8)         /* roundint.  */
1205     },
1206     /* FP DFmode */
1207     {
1208       COSTS_N_INSNS (64),       /* div.  */
1209       COSTS_N_INSNS (16),       /* mult.  */
1210       COSTS_N_INSNS (25),       /* mult_addsub.  */
1211       COSTS_N_INSNS (30),       /* fma.  */
1212       COSTS_N_INSNS (9),        /* addsub.  */
1213       COSTS_N_INSNS (3),        /* fpconst.  */
1214       COSTS_N_INSNS (3),        /* neg.  */
1215       COSTS_N_INSNS (6),        /* compare.  */
1216       COSTS_N_INSNS (6),        /* widen.  */
1217       COSTS_N_INSNS (6),        /* narrow.  */
1218       COSTS_N_INSNS (8),        /* toint.  */
1219       COSTS_N_INSNS (8),        /* fromint.  */
1220       COSTS_N_INSNS (8)         /* roundint.  */
1221     }
1222   },
1223   /* Vector */
1224   {
1225     COSTS_N_INSNS (1)   /* alu.  */
1226   }
1227 };
1228
1229 const struct cpu_cost_table cortexa5_extra_costs =
1230 {
1231   /* ALU */
1232   {
1233     0,                  /* arith.  */
1234     0,                  /* logical.  */
1235     COSTS_N_INSNS (1),  /* shift.  */
1236     COSTS_N_INSNS (1),  /* shift_reg.  */
1237     COSTS_N_INSNS (1),  /* arith_shift.  */
1238     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1239     COSTS_N_INSNS (1),  /* log_shift.  */
1240     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1241     COSTS_N_INSNS (1),  /* extend.  */
1242     COSTS_N_INSNS (1),  /* extend_arith.  */
1243     COSTS_N_INSNS (1),  /* bfi.  */
1244     COSTS_N_INSNS (1),  /* bfx.  */
1245     COSTS_N_INSNS (1),  /* clz.  */
1246     COSTS_N_INSNS (1),  /* rev.  */
1247     0,                  /* non_exec.  */
1248     true                /* non_exec_costs_exec.  */
1249   },
1250
1251   {
1252     /* MULT SImode */
1253     {
1254       0,                        /* simple.  */
1255       COSTS_N_INSNS (1),        /* flag_setting.  */
1256       COSTS_N_INSNS (1),        /* extend.  */
1257       COSTS_N_INSNS (1),        /* add.  */
1258       COSTS_N_INSNS (1),        /* extend_add.  */
1259       COSTS_N_INSNS (7)         /* idiv.  */
1260     },
1261     /* MULT DImode */
1262     {
1263       0,                        /* simple (N/A).  */
1264       0,                        /* flag_setting (N/A).  */
1265       COSTS_N_INSNS (1),        /* extend.  */
1266       0,                        /* add.  */
1267       COSTS_N_INSNS (2),        /* extend_add.  */
1268       0                         /* idiv (N/A).  */
1269     }
1270   },
1271   /* LD/ST */
1272   {
1273     COSTS_N_INSNS (1),  /* load.  */
1274     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1275     COSTS_N_INSNS (6),  /* ldrd.  */
1276     COSTS_N_INSNS (1),  /* ldm_1st.  */
1277     1,                  /* ldm_regs_per_insn_1st.  */
1278     2,                  /* ldm_regs_per_insn_subsequent.  */
1279     COSTS_N_INSNS (2),  /* loadf.  */
1280     COSTS_N_INSNS (4),  /* loadd.  */
1281     COSTS_N_INSNS (1),  /* load_unaligned.  */
1282     COSTS_N_INSNS (1),  /* store.  */
1283     COSTS_N_INSNS (3),  /* strd.  */
1284     COSTS_N_INSNS (1),  /* stm_1st.  */
1285     1,                  /* stm_regs_per_insn_1st.  */
1286     2,                  /* stm_regs_per_insn_subsequent.  */
1287     COSTS_N_INSNS (2),  /* storef.  */
1288     COSTS_N_INSNS (2),  /* stored.  */
1289     COSTS_N_INSNS (1),  /* store_unaligned.  */
1290     COSTS_N_INSNS (1),  /* loadv.  */
1291     COSTS_N_INSNS (1)   /* storev.  */
1292   },
1293   {
1294     /* FP SFmode */
1295     {
1296       COSTS_N_INSNS (15),       /* div.  */
1297       COSTS_N_INSNS (3),        /* mult.  */
1298       COSTS_N_INSNS (7),        /* mult_addsub. */
1299       COSTS_N_INSNS (7),        /* fma.  */
1300       COSTS_N_INSNS (3),        /* addsub.  */
1301       COSTS_N_INSNS (3),        /* fpconst.  */
1302       COSTS_N_INSNS (3),        /* neg.  */
1303       COSTS_N_INSNS (3),        /* compare.  */
1304       COSTS_N_INSNS (3),        /* widen.  */
1305       COSTS_N_INSNS (3),        /* narrow.  */
1306       COSTS_N_INSNS (3),        /* toint.  */
1307       COSTS_N_INSNS (3),        /* fromint.  */
1308       COSTS_N_INSNS (3)         /* roundint.  */
1309     },
1310     /* FP DFmode */
1311     {
1312       COSTS_N_INSNS (30),       /* div.  */
1313       COSTS_N_INSNS (6),        /* mult.  */
1314       COSTS_N_INSNS (10),       /* mult_addsub.  */
1315       COSTS_N_INSNS (7),        /* fma.  */
1316       COSTS_N_INSNS (3),        /* addsub.  */
1317       COSTS_N_INSNS (3),        /* fpconst.  */
1318       COSTS_N_INSNS (3),        /* neg.  */
1319       COSTS_N_INSNS (3),        /* compare.  */
1320       COSTS_N_INSNS (3),        /* widen.  */
1321       COSTS_N_INSNS (3),        /* narrow.  */
1322       COSTS_N_INSNS (3),        /* toint.  */
1323       COSTS_N_INSNS (3),        /* fromint.  */
1324       COSTS_N_INSNS (3)         /* roundint.  */
1325     }
1326   },
1327   /* Vector */
1328   {
1329     COSTS_N_INSNS (1)   /* alu.  */
1330   }
1331 };
1332
1333
1334 const struct cpu_cost_table cortexa7_extra_costs =
1335 {
1336   /* ALU */
1337   {
1338     0,                  /* arith.  */
1339     0,                  /* logical.  */
1340     COSTS_N_INSNS (1),  /* shift.  */
1341     COSTS_N_INSNS (1),  /* shift_reg.  */
1342     COSTS_N_INSNS (1),  /* arith_shift.  */
1343     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1344     COSTS_N_INSNS (1),  /* log_shift.  */
1345     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1346     COSTS_N_INSNS (1),  /* extend.  */
1347     COSTS_N_INSNS (1),  /* extend_arith.  */
1348     COSTS_N_INSNS (1),  /* bfi.  */
1349     COSTS_N_INSNS (1),  /* bfx.  */
1350     COSTS_N_INSNS (1),  /* clz.  */
1351     COSTS_N_INSNS (1),  /* rev.  */
1352     0,                  /* non_exec.  */
1353     true                /* non_exec_costs_exec.  */
1354   },
1355
1356   {
1357     /* MULT SImode */
1358     {
1359       0,                        /* simple.  */
1360       COSTS_N_INSNS (1),        /* flag_setting.  */
1361       COSTS_N_INSNS (1),        /* extend.  */
1362       COSTS_N_INSNS (1),        /* add.  */
1363       COSTS_N_INSNS (1),        /* extend_add.  */
1364       COSTS_N_INSNS (7)         /* idiv.  */
1365     },
1366     /* MULT DImode */
1367     {
1368       0,                        /* simple (N/A).  */
1369       0,                        /* flag_setting (N/A).  */
1370       COSTS_N_INSNS (1),        /* extend.  */
1371       0,                        /* add.  */
1372       COSTS_N_INSNS (2),        /* extend_add.  */
1373       0                         /* idiv (N/A).  */
1374     }
1375   },
1376   /* LD/ST */
1377   {
1378     COSTS_N_INSNS (1),  /* load.  */
1379     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1380     COSTS_N_INSNS (3),  /* ldrd.  */
1381     COSTS_N_INSNS (1),  /* ldm_1st.  */
1382     1,                  /* ldm_regs_per_insn_1st.  */
1383     2,                  /* ldm_regs_per_insn_subsequent.  */
1384     COSTS_N_INSNS (2),  /* loadf.  */
1385     COSTS_N_INSNS (2),  /* loadd.  */
1386     COSTS_N_INSNS (1),  /* load_unaligned.  */
1387     COSTS_N_INSNS (1),  /* store.  */
1388     COSTS_N_INSNS (3),  /* strd.  */
1389     COSTS_N_INSNS (1),  /* stm_1st.  */
1390     1,                  /* stm_regs_per_insn_1st.  */
1391     2,                  /* stm_regs_per_insn_subsequent.  */
1392     COSTS_N_INSNS (2),  /* storef.  */
1393     COSTS_N_INSNS (2),  /* stored.  */
1394     COSTS_N_INSNS (1),  /* store_unaligned.  */
1395     COSTS_N_INSNS (1),  /* loadv.  */
1396     COSTS_N_INSNS (1)   /* storev.  */
1397   },
1398   {
1399     /* FP SFmode */
1400     {
1401       COSTS_N_INSNS (15),       /* div.  */
1402       COSTS_N_INSNS (3),        /* mult.  */
1403       COSTS_N_INSNS (7),        /* mult_addsub. */
1404       COSTS_N_INSNS (7),        /* fma.  */
1405       COSTS_N_INSNS (3),        /* addsub.  */
1406       COSTS_N_INSNS (3),        /* fpconst.  */
1407       COSTS_N_INSNS (3),        /* neg.  */
1408       COSTS_N_INSNS (3),        /* compare.  */
1409       COSTS_N_INSNS (3),        /* widen.  */
1410       COSTS_N_INSNS (3),        /* narrow.  */
1411       COSTS_N_INSNS (3),        /* toint.  */
1412       COSTS_N_INSNS (3),        /* fromint.  */
1413       COSTS_N_INSNS (3)         /* roundint.  */
1414     },
1415     /* FP DFmode */
1416     {
1417       COSTS_N_INSNS (30),       /* div.  */
1418       COSTS_N_INSNS (6),        /* mult.  */
1419       COSTS_N_INSNS (10),       /* mult_addsub.  */
1420       COSTS_N_INSNS (7),        /* fma.  */
1421       COSTS_N_INSNS (3),        /* addsub.  */
1422       COSTS_N_INSNS (3),        /* fpconst.  */
1423       COSTS_N_INSNS (3),        /* neg.  */
1424       COSTS_N_INSNS (3),        /* compare.  */
1425       COSTS_N_INSNS (3),        /* widen.  */
1426       COSTS_N_INSNS (3),        /* narrow.  */
1427       COSTS_N_INSNS (3),        /* toint.  */
1428       COSTS_N_INSNS (3),        /* fromint.  */
1429       COSTS_N_INSNS (3)         /* roundint.  */
1430     }
1431   },
1432   /* Vector */
1433   {
1434     COSTS_N_INSNS (1)   /* alu.  */
1435   }
1436 };
1437
1438 const struct cpu_cost_table cortexa12_extra_costs =
1439 {
1440   /* ALU */
1441   {
1442     0,                  /* arith.  */
1443     0,                  /* logical.  */
1444     0,                  /* shift.  */
1445     COSTS_N_INSNS (1),  /* shift_reg.  */
1446     COSTS_N_INSNS (1),  /* arith_shift.  */
1447     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1448     COSTS_N_INSNS (1),  /* log_shift.  */
1449     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1450     0,                  /* extend.  */
1451     COSTS_N_INSNS (1),  /* extend_arith.  */
1452     0,                  /* bfi.  */
1453     COSTS_N_INSNS (1),  /* bfx.  */
1454     COSTS_N_INSNS (1),  /* clz.  */
1455     COSTS_N_INSNS (1),  /* rev.  */
1456     0,                  /* non_exec.  */
1457     true                /* non_exec_costs_exec.  */
1458   },
1459   /* MULT SImode */
1460   {
1461     {
1462       COSTS_N_INSNS (2),        /* simple.  */
1463       COSTS_N_INSNS (3),        /* flag_setting.  */
1464       COSTS_N_INSNS (2),        /* extend.  */
1465       COSTS_N_INSNS (3),        /* add.  */
1466       COSTS_N_INSNS (2),        /* extend_add.  */
1467       COSTS_N_INSNS (18)        /* idiv.  */
1468     },
1469     /* MULT DImode */
1470     {
1471       0,                        /* simple (N/A).  */
1472       0,                        /* flag_setting (N/A).  */
1473       COSTS_N_INSNS (3),        /* extend.  */
1474       0,                        /* add (N/A).  */
1475       COSTS_N_INSNS (3),        /* extend_add.  */
1476       0                         /* idiv (N/A).  */
1477     }
1478   },
1479   /* LD/ST */
1480   {
1481     COSTS_N_INSNS (3),  /* load.  */
1482     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1483     COSTS_N_INSNS (3),  /* ldrd.  */
1484     COSTS_N_INSNS (3),  /* ldm_1st.  */
1485     1,                  /* ldm_regs_per_insn_1st.  */
1486     2,                  /* ldm_regs_per_insn_subsequent.  */
1487     COSTS_N_INSNS (3),  /* loadf.  */
1488     COSTS_N_INSNS (3),  /* loadd.  */
1489     0,                  /* load_unaligned.  */
1490     0,                  /* store.  */
1491     0,                  /* strd.  */
1492     0,                  /* stm_1st.  */
1493     1,                  /* stm_regs_per_insn_1st.  */
1494     2,                  /* stm_regs_per_insn_subsequent.  */
1495     COSTS_N_INSNS (2),  /* storef.  */
1496     COSTS_N_INSNS (2),  /* stored.  */
1497     0,                  /* store_unaligned.  */
1498     COSTS_N_INSNS (1),  /* loadv.  */
1499     COSTS_N_INSNS (1)   /* storev.  */
1500   },
1501   {
1502     /* FP SFmode */
1503     {
1504       COSTS_N_INSNS (17),       /* div.  */
1505       COSTS_N_INSNS (4),        /* mult.  */
1506       COSTS_N_INSNS (8),        /* mult_addsub. */
1507       COSTS_N_INSNS (8),        /* fma.  */
1508       COSTS_N_INSNS (4),        /* addsub.  */
1509       COSTS_N_INSNS (2),        /* fpconst. */
1510       COSTS_N_INSNS (2),        /* neg.  */
1511       COSTS_N_INSNS (2),        /* compare.  */
1512       COSTS_N_INSNS (4),        /* widen.  */
1513       COSTS_N_INSNS (4),        /* narrow.  */
1514       COSTS_N_INSNS (4),        /* toint.  */
1515       COSTS_N_INSNS (4),        /* fromint.  */
1516       COSTS_N_INSNS (4)         /* roundint.  */
1517     },
1518     /* FP DFmode */
1519     {
1520       COSTS_N_INSNS (31),       /* div.  */
1521       COSTS_N_INSNS (4),        /* mult.  */
1522       COSTS_N_INSNS (8),        /* mult_addsub.  */
1523       COSTS_N_INSNS (8),        /* fma.  */
1524       COSTS_N_INSNS (4),        /* addsub.  */
1525       COSTS_N_INSNS (2),        /* fpconst.  */
1526       COSTS_N_INSNS (2),        /* neg.  */
1527       COSTS_N_INSNS (2),        /* compare.  */
1528       COSTS_N_INSNS (4),        /* widen.  */
1529       COSTS_N_INSNS (4),        /* narrow.  */
1530       COSTS_N_INSNS (4),        /* toint.  */
1531       COSTS_N_INSNS (4),        /* fromint.  */
1532       COSTS_N_INSNS (4)         /* roundint.  */
1533     }
1534   },
1535   /* Vector */
1536   {
1537     COSTS_N_INSNS (1)   /* alu.  */
1538   }
1539 };
1540
1541 const struct cpu_cost_table cortexa15_extra_costs =
1542 {
1543   /* ALU */
1544   {
1545     0,                  /* arith.  */
1546     0,                  /* logical.  */
1547     0,                  /* shift.  */
1548     0,                  /* shift_reg.  */
1549     COSTS_N_INSNS (1),  /* arith_shift.  */
1550     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1551     COSTS_N_INSNS (1),  /* log_shift.  */
1552     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1553     0,                  /* extend.  */
1554     COSTS_N_INSNS (1),  /* extend_arith.  */
1555     COSTS_N_INSNS (1),  /* bfi.  */
1556     0,                  /* bfx.  */
1557     0,                  /* clz.  */
1558     0,                  /* rev.  */
1559     0,                  /* non_exec.  */
1560     true                /* non_exec_costs_exec.  */
1561   },
1562   /* MULT SImode */
1563   {
1564     {
1565       COSTS_N_INSNS (2),        /* simple.  */
1566       COSTS_N_INSNS (3),        /* flag_setting.  */
1567       COSTS_N_INSNS (2),        /* extend.  */
1568       COSTS_N_INSNS (2),        /* add.  */
1569       COSTS_N_INSNS (2),        /* extend_add.  */
1570       COSTS_N_INSNS (18)        /* idiv.  */
1571     },
1572     /* MULT DImode */
1573     {
1574       0,                        /* simple (N/A).  */
1575       0,                        /* flag_setting (N/A).  */
1576       COSTS_N_INSNS (3),        /* extend.  */
1577       0,                        /* add (N/A).  */
1578       COSTS_N_INSNS (3),        /* extend_add.  */
1579       0                         /* idiv (N/A).  */
1580     }
1581   },
1582   /* LD/ST */
1583   {
1584     COSTS_N_INSNS (3),  /* load.  */
1585     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1586     COSTS_N_INSNS (3),  /* ldrd.  */
1587     COSTS_N_INSNS (4),  /* ldm_1st.  */
1588     1,                  /* ldm_regs_per_insn_1st.  */
1589     2,                  /* ldm_regs_per_insn_subsequent.  */
1590     COSTS_N_INSNS (4),  /* loadf.  */
1591     COSTS_N_INSNS (4),  /* loadd.  */
1592     0,                  /* load_unaligned.  */
1593     0,                  /* store.  */
1594     0,                  /* strd.  */
1595     COSTS_N_INSNS (1),  /* stm_1st.  */
1596     1,                  /* stm_regs_per_insn_1st.  */
1597     2,                  /* stm_regs_per_insn_subsequent.  */
1598     0,                  /* storef.  */
1599     0,                  /* stored.  */
1600     0,                  /* store_unaligned.  */
1601     COSTS_N_INSNS (1),  /* loadv.  */
1602     COSTS_N_INSNS (1)   /* storev.  */
1603   },
1604   {
1605     /* FP SFmode */
1606     {
1607       COSTS_N_INSNS (17),       /* div.  */
1608       COSTS_N_INSNS (4),        /* mult.  */
1609       COSTS_N_INSNS (8),        /* mult_addsub. */
1610       COSTS_N_INSNS (8),        /* fma.  */
1611       COSTS_N_INSNS (4),        /* addsub.  */
1612       COSTS_N_INSNS (2),        /* fpconst. */
1613       COSTS_N_INSNS (2),        /* neg.  */
1614       COSTS_N_INSNS (5),        /* compare.  */
1615       COSTS_N_INSNS (4),        /* widen.  */
1616       COSTS_N_INSNS (4),        /* narrow.  */
1617       COSTS_N_INSNS (4),        /* toint.  */
1618       COSTS_N_INSNS (4),        /* fromint.  */
1619       COSTS_N_INSNS (4)         /* roundint.  */
1620     },
1621     /* FP DFmode */
1622     {
1623       COSTS_N_INSNS (31),       /* div.  */
1624       COSTS_N_INSNS (4),        /* mult.  */
1625       COSTS_N_INSNS (8),        /* mult_addsub.  */
1626       COSTS_N_INSNS (8),        /* fma.  */
1627       COSTS_N_INSNS (4),        /* addsub.  */
1628       COSTS_N_INSNS (2),        /* fpconst.  */
1629       COSTS_N_INSNS (2),        /* neg.  */
1630       COSTS_N_INSNS (2),        /* compare.  */
1631       COSTS_N_INSNS (4),        /* widen.  */
1632       COSTS_N_INSNS (4),        /* narrow.  */
1633       COSTS_N_INSNS (4),        /* toint.  */
1634       COSTS_N_INSNS (4),        /* fromint.  */
1635       COSTS_N_INSNS (4)         /* roundint.  */
1636     }
1637   },
1638   /* Vector */
1639   {
1640     COSTS_N_INSNS (1)   /* alu.  */
1641   }
1642 };
1643
1644 const struct cpu_cost_table v7m_extra_costs =
1645 {
1646   /* ALU */
1647   {
1648     0,                  /* arith.  */
1649     0,                  /* logical.  */
1650     0,                  /* shift.  */
1651     0,                  /* shift_reg.  */
1652     0,                  /* arith_shift.  */
1653     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1654     0,                  /* log_shift.  */
1655     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1656     0,                  /* extend.  */
1657     COSTS_N_INSNS (1),  /* extend_arith.  */
1658     0,                  /* bfi.  */
1659     0,                  /* bfx.  */
1660     0,                  /* clz.  */
1661     0,                  /* rev.  */
1662     COSTS_N_INSNS (1),  /* non_exec.  */
1663     false               /* non_exec_costs_exec.  */
1664   },
1665   {
1666     /* MULT SImode */
1667     {
1668       COSTS_N_INSNS (1),        /* simple.  */
1669       COSTS_N_INSNS (1),        /* flag_setting.  */
1670       COSTS_N_INSNS (2),        /* extend.  */
1671       COSTS_N_INSNS (1),        /* add.  */
1672       COSTS_N_INSNS (3),        /* extend_add.  */
1673       COSTS_N_INSNS (8)         /* idiv.  */
1674     },
1675     /* MULT DImode */
1676     {
1677       0,                        /* simple (N/A).  */
1678       0,                        /* flag_setting (N/A).  */
1679       COSTS_N_INSNS (2),        /* extend.  */
1680       0,                        /* add (N/A).  */
1681       COSTS_N_INSNS (3),        /* extend_add.  */
1682       0                         /* idiv (N/A).  */
1683     }
1684   },
1685   /* LD/ST */
1686   {
1687     COSTS_N_INSNS (2),  /* load.  */
1688     0,                  /* load_sign_extend.  */
1689     COSTS_N_INSNS (3),  /* ldrd.  */
1690     COSTS_N_INSNS (2),  /* ldm_1st.  */
1691     1,                  /* ldm_regs_per_insn_1st.  */
1692     1,                  /* ldm_regs_per_insn_subsequent.  */
1693     COSTS_N_INSNS (2),  /* loadf.  */
1694     COSTS_N_INSNS (3),  /* loadd.  */
1695     COSTS_N_INSNS (1),  /* load_unaligned.  */
1696     COSTS_N_INSNS (2),  /* store.  */
1697     COSTS_N_INSNS (3),  /* strd.  */
1698     COSTS_N_INSNS (2),  /* stm_1st.  */
1699     1,                  /* stm_regs_per_insn_1st.  */
1700     1,                  /* stm_regs_per_insn_subsequent.  */
1701     COSTS_N_INSNS (2),  /* storef.  */
1702     COSTS_N_INSNS (3),  /* stored.  */
1703     COSTS_N_INSNS (1),  /* store_unaligned.  */
1704     COSTS_N_INSNS (1),  /* loadv.  */
1705     COSTS_N_INSNS (1)   /* storev.  */
1706   },
1707   {
1708     /* FP SFmode */
1709     {
1710       COSTS_N_INSNS (7),        /* div.  */
1711       COSTS_N_INSNS (2),        /* mult.  */
1712       COSTS_N_INSNS (5),        /* mult_addsub.  */
1713       COSTS_N_INSNS (3),        /* fma.  */
1714       COSTS_N_INSNS (1),        /* addsub.  */
1715       0,                        /* fpconst.  */
1716       0,                        /* neg.  */
1717       0,                        /* compare.  */
1718       0,                        /* widen.  */
1719       0,                        /* narrow.  */
1720       0,                        /* toint.  */
1721       0,                        /* fromint.  */
1722       0                         /* roundint.  */
1723     },
1724     /* FP DFmode */
1725     {
1726       COSTS_N_INSNS (15),       /* div.  */
1727       COSTS_N_INSNS (5),        /* mult.  */
1728       COSTS_N_INSNS (7),        /* mult_addsub.  */
1729       COSTS_N_INSNS (7),        /* fma.  */
1730       COSTS_N_INSNS (3),        /* addsub.  */
1731       0,                        /* fpconst.  */
1732       0,                        /* neg.  */
1733       0,                        /* compare.  */
1734       0,                        /* widen.  */
1735       0,                        /* narrow.  */
1736       0,                        /* toint.  */
1737       0,                        /* fromint.  */
1738       0                         /* roundint.  */
1739     }
1740   },
1741   /* Vector */
1742   {
1743     COSTS_N_INSNS (1)   /* alu.  */
1744   }
1745 };
1746
1747 const struct tune_params arm_slowmul_tune =
1748 {
1749   &generic_extra_costs,                 /* Insn extra costs.  */
1750   NULL,                                 /* Sched adj cost.  */
1751   arm_default_branch_cost,
1752   &arm_default_vec_cost,
1753   3,                                            /* Constant limit.  */
1754   5,                                            /* Max cond insns.  */
1755   8,                                            /* Memset max inline.  */
1756   1,                                            /* Issue rate.  */
1757   ARM_PREFETCH_NOT_BENEFICIAL,
1758   tune_params::PREF_CONST_POOL_TRUE,
1759   tune_params::PREF_LDRD_FALSE,
1760   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1761   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1762   tune_params::DISPARAGE_FLAGS_NEITHER,
1763   tune_params::PREF_NEON_64_FALSE,
1764   tune_params::PREF_NEON_STRINGOPS_FALSE,
1765   tune_params::FUSE_NOTHING,
1766   tune_params::SCHED_AUTOPREF_OFF
1767 };
1768
1769 const struct tune_params arm_fastmul_tune =
1770 {
1771   &generic_extra_costs,                 /* Insn extra costs.  */
1772   NULL,                                 /* Sched adj cost.  */
1773   arm_default_branch_cost,
1774   &arm_default_vec_cost,
1775   1,                                            /* Constant limit.  */
1776   5,                                            /* Max cond insns.  */
1777   8,                                            /* Memset max inline.  */
1778   1,                                            /* Issue rate.  */
1779   ARM_PREFETCH_NOT_BENEFICIAL,
1780   tune_params::PREF_CONST_POOL_TRUE,
1781   tune_params::PREF_LDRD_FALSE,
1782   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1783   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1784   tune_params::DISPARAGE_FLAGS_NEITHER,
1785   tune_params::PREF_NEON_64_FALSE,
1786   tune_params::PREF_NEON_STRINGOPS_FALSE,
1787   tune_params::FUSE_NOTHING,
1788   tune_params::SCHED_AUTOPREF_OFF
1789 };
1790
1791 /* StrongARM has early execution of branches, so a sequence that is worth
1792    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1793
1794 const struct tune_params arm_strongarm_tune =
1795 {
1796   &generic_extra_costs,                 /* Insn extra costs.  */
1797   NULL,                                 /* Sched adj cost.  */
1798   arm_default_branch_cost,
1799   &arm_default_vec_cost,
1800   1,                                            /* Constant limit.  */
1801   3,                                            /* Max cond insns.  */
1802   8,                                            /* Memset max inline.  */
1803   1,                                            /* Issue rate.  */
1804   ARM_PREFETCH_NOT_BENEFICIAL,
1805   tune_params::PREF_CONST_POOL_TRUE,
1806   tune_params::PREF_LDRD_FALSE,
1807   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1808   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1809   tune_params::DISPARAGE_FLAGS_NEITHER,
1810   tune_params::PREF_NEON_64_FALSE,
1811   tune_params::PREF_NEON_STRINGOPS_FALSE,
1812   tune_params::FUSE_NOTHING,
1813   tune_params::SCHED_AUTOPREF_OFF
1814 };
1815
1816 const struct tune_params arm_xscale_tune =
1817 {
1818   &generic_extra_costs,                 /* Insn extra costs.  */
1819   xscale_sched_adjust_cost,
1820   arm_default_branch_cost,
1821   &arm_default_vec_cost,
1822   2,                                            /* Constant limit.  */
1823   3,                                            /* Max cond insns.  */
1824   8,                                            /* Memset max inline.  */
1825   1,                                            /* Issue rate.  */
1826   ARM_PREFETCH_NOT_BENEFICIAL,
1827   tune_params::PREF_CONST_POOL_TRUE,
1828   tune_params::PREF_LDRD_FALSE,
1829   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1830   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1831   tune_params::DISPARAGE_FLAGS_NEITHER,
1832   tune_params::PREF_NEON_64_FALSE,
1833   tune_params::PREF_NEON_STRINGOPS_FALSE,
1834   tune_params::FUSE_NOTHING,
1835   tune_params::SCHED_AUTOPREF_OFF
1836 };
1837
1838 const struct tune_params arm_9e_tune =
1839 {
1840   &generic_extra_costs,                 /* Insn extra costs.  */
1841   NULL,                                 /* Sched adj cost.  */
1842   arm_default_branch_cost,
1843   &arm_default_vec_cost,
1844   1,                                            /* Constant limit.  */
1845   5,                                            /* Max cond insns.  */
1846   8,                                            /* Memset max inline.  */
1847   1,                                            /* Issue rate.  */
1848   ARM_PREFETCH_NOT_BENEFICIAL,
1849   tune_params::PREF_CONST_POOL_TRUE,
1850   tune_params::PREF_LDRD_FALSE,
1851   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1852   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1853   tune_params::DISPARAGE_FLAGS_NEITHER,
1854   tune_params::PREF_NEON_64_FALSE,
1855   tune_params::PREF_NEON_STRINGOPS_FALSE,
1856   tune_params::FUSE_NOTHING,
1857   tune_params::SCHED_AUTOPREF_OFF
1858 };
1859
1860 const struct tune_params arm_marvell_pj4_tune =
1861 {
1862   &generic_extra_costs,                 /* Insn extra costs.  */
1863   NULL,                                 /* Sched adj cost.  */
1864   arm_default_branch_cost,
1865   &arm_default_vec_cost,
1866   1,                                            /* Constant limit.  */
1867   5,                                            /* Max cond insns.  */
1868   8,                                            /* Memset max inline.  */
1869   2,                                            /* Issue rate.  */
1870   ARM_PREFETCH_NOT_BENEFICIAL,
1871   tune_params::PREF_CONST_POOL_TRUE,
1872   tune_params::PREF_LDRD_FALSE,
1873   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1874   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1875   tune_params::DISPARAGE_FLAGS_NEITHER,
1876   tune_params::PREF_NEON_64_FALSE,
1877   tune_params::PREF_NEON_STRINGOPS_FALSE,
1878   tune_params::FUSE_NOTHING,
1879   tune_params::SCHED_AUTOPREF_OFF
1880 };
1881
1882 const struct tune_params arm_v6t2_tune =
1883 {
1884   &generic_extra_costs,                 /* Insn extra costs.  */
1885   NULL,                                 /* Sched adj cost.  */
1886   arm_default_branch_cost,
1887   &arm_default_vec_cost,
1888   1,                                            /* Constant limit.  */
1889   5,                                            /* Max cond insns.  */
1890   8,                                            /* Memset max inline.  */
1891   1,                                            /* Issue rate.  */
1892   ARM_PREFETCH_NOT_BENEFICIAL,
1893   tune_params::PREF_CONST_POOL_FALSE,
1894   tune_params::PREF_LDRD_FALSE,
1895   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1896   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1897   tune_params::DISPARAGE_FLAGS_NEITHER,
1898   tune_params::PREF_NEON_64_FALSE,
1899   tune_params::PREF_NEON_STRINGOPS_FALSE,
1900   tune_params::FUSE_NOTHING,
1901   tune_params::SCHED_AUTOPREF_OFF
1902 };
1903
1904
1905 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1906 const struct tune_params arm_cortex_tune =
1907 {
1908   &generic_extra_costs,
1909   NULL,                                 /* Sched adj cost.  */
1910   arm_default_branch_cost,
1911   &arm_default_vec_cost,
1912   1,                                            /* Constant limit.  */
1913   5,                                            /* Max cond insns.  */
1914   8,                                            /* Memset max inline.  */
1915   2,                                            /* Issue rate.  */
1916   ARM_PREFETCH_NOT_BENEFICIAL,
1917   tune_params::PREF_CONST_POOL_FALSE,
1918   tune_params::PREF_LDRD_FALSE,
1919   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1920   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1921   tune_params::DISPARAGE_FLAGS_NEITHER,
1922   tune_params::PREF_NEON_64_FALSE,
1923   tune_params::PREF_NEON_STRINGOPS_FALSE,
1924   tune_params::FUSE_NOTHING,
1925   tune_params::SCHED_AUTOPREF_OFF
1926 };
1927
1928 const struct tune_params arm_cortex_a8_tune =
1929 {
1930   &cortexa8_extra_costs,
1931   NULL,                                 /* Sched adj cost.  */
1932   arm_default_branch_cost,
1933   &arm_default_vec_cost,
1934   1,                                            /* Constant limit.  */
1935   5,                                            /* Max cond insns.  */
1936   8,                                            /* Memset max inline.  */
1937   2,                                            /* Issue rate.  */
1938   ARM_PREFETCH_NOT_BENEFICIAL,
1939   tune_params::PREF_CONST_POOL_FALSE,
1940   tune_params::PREF_LDRD_FALSE,
1941   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1942   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1943   tune_params::DISPARAGE_FLAGS_NEITHER,
1944   tune_params::PREF_NEON_64_FALSE,
1945   tune_params::PREF_NEON_STRINGOPS_TRUE,
1946   tune_params::FUSE_NOTHING,
1947   tune_params::SCHED_AUTOPREF_OFF
1948 };
1949
1950 const struct tune_params arm_cortex_a7_tune =
1951 {
1952   &cortexa7_extra_costs,
1953   NULL,                                 /* Sched adj cost.  */
1954   arm_default_branch_cost,
1955   &arm_default_vec_cost,
1956   1,                                            /* Constant limit.  */
1957   5,                                            /* Max cond insns.  */
1958   8,                                            /* Memset max inline.  */
1959   2,                                            /* Issue rate.  */
1960   ARM_PREFETCH_NOT_BENEFICIAL,
1961   tune_params::PREF_CONST_POOL_FALSE,
1962   tune_params::PREF_LDRD_FALSE,
1963   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1964   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1965   tune_params::DISPARAGE_FLAGS_NEITHER,
1966   tune_params::PREF_NEON_64_FALSE,
1967   tune_params::PREF_NEON_STRINGOPS_TRUE,
1968   tune_params::FUSE_NOTHING,
1969   tune_params::SCHED_AUTOPREF_OFF
1970 };
1971
1972 const struct tune_params arm_cortex_a15_tune =
1973 {
1974   &cortexa15_extra_costs,
1975   NULL,                                 /* Sched adj cost.  */
1976   arm_default_branch_cost,
1977   &arm_default_vec_cost,
1978   1,                                            /* Constant limit.  */
1979   2,                                            /* Max cond insns.  */
1980   8,                                            /* Memset max inline.  */
1981   3,                                            /* Issue rate.  */
1982   ARM_PREFETCH_NOT_BENEFICIAL,
1983   tune_params::PREF_CONST_POOL_FALSE,
1984   tune_params::PREF_LDRD_TRUE,
1985   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1986   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1987   tune_params::DISPARAGE_FLAGS_ALL,
1988   tune_params::PREF_NEON_64_FALSE,
1989   tune_params::PREF_NEON_STRINGOPS_TRUE,
1990   tune_params::FUSE_NOTHING,
1991   tune_params::SCHED_AUTOPREF_FULL
1992 };
1993
1994 const struct tune_params arm_cortex_a35_tune =
1995 {
1996   &cortexa53_extra_costs,
1997   NULL,                                 /* Sched adj cost.  */
1998   arm_default_branch_cost,
1999   &arm_default_vec_cost,
2000   1,                                            /* Constant limit.  */
2001   5,                                            /* Max cond insns.  */
2002   8,                                            /* Memset max inline.  */
2003   1,                                            /* Issue rate.  */
2004   ARM_PREFETCH_NOT_BENEFICIAL,
2005   tune_params::PREF_CONST_POOL_FALSE,
2006   tune_params::PREF_LDRD_FALSE,
2007   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2008   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2009   tune_params::DISPARAGE_FLAGS_NEITHER,
2010   tune_params::PREF_NEON_64_FALSE,
2011   tune_params::PREF_NEON_STRINGOPS_TRUE,
2012   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2013   tune_params::SCHED_AUTOPREF_OFF
2014 };
2015
2016 const struct tune_params arm_cortex_a53_tune =
2017 {
2018   &cortexa53_extra_costs,
2019   NULL,                                 /* Sched adj cost.  */
2020   arm_default_branch_cost,
2021   &arm_default_vec_cost,
2022   1,                                            /* Constant limit.  */
2023   5,                                            /* Max cond insns.  */
2024   8,                                            /* Memset max inline.  */
2025   2,                                            /* Issue rate.  */
2026   ARM_PREFETCH_NOT_BENEFICIAL,
2027   tune_params::PREF_CONST_POOL_FALSE,
2028   tune_params::PREF_LDRD_FALSE,
2029   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2030   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2031   tune_params::DISPARAGE_FLAGS_NEITHER,
2032   tune_params::PREF_NEON_64_FALSE,
2033   tune_params::PREF_NEON_STRINGOPS_TRUE,
2034   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2035   tune_params::SCHED_AUTOPREF_OFF
2036 };
2037
2038 const struct tune_params arm_cortex_a57_tune =
2039 {
2040   &cortexa57_extra_costs,
2041   NULL,                                 /* Sched adj cost.  */
2042   arm_default_branch_cost,
2043   &arm_default_vec_cost,
2044   1,                                            /* Constant limit.  */
2045   2,                                            /* Max cond insns.  */
2046   8,                                            /* Memset max inline.  */
2047   3,                                            /* Issue rate.  */
2048   ARM_PREFETCH_NOT_BENEFICIAL,
2049   tune_params::PREF_CONST_POOL_FALSE,
2050   tune_params::PREF_LDRD_TRUE,
2051   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2052   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2053   tune_params::DISPARAGE_FLAGS_ALL,
2054   tune_params::PREF_NEON_64_FALSE,
2055   tune_params::PREF_NEON_STRINGOPS_TRUE,
2056   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2057   tune_params::SCHED_AUTOPREF_FULL
2058 };
2059
2060 const struct tune_params arm_exynosm1_tune =
2061 {
2062   &exynosm1_extra_costs,
2063   NULL,                                         /* Sched adj cost.  */
2064   arm_default_branch_cost,
2065   &arm_default_vec_cost,
2066   1,                                            /* Constant limit.  */
2067   2,                                            /* Max cond insns.  */
2068   8,                                            /* Memset max inline.  */
2069   3,                                            /* Issue rate.  */
2070   ARM_PREFETCH_NOT_BENEFICIAL,
2071   tune_params::PREF_CONST_POOL_FALSE,
2072   tune_params::PREF_LDRD_TRUE,
2073   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2074   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2075   tune_params::DISPARAGE_FLAGS_ALL,
2076   tune_params::PREF_NEON_64_FALSE,
2077   tune_params::PREF_NEON_STRINGOPS_TRUE,
2078   tune_params::FUSE_NOTHING,
2079   tune_params::SCHED_AUTOPREF_OFF
2080 };
2081
2082 const struct tune_params arm_xgene1_tune =
2083 {
2084   &xgene1_extra_costs,
2085   NULL,                                 /* Sched adj cost.  */
2086   arm_default_branch_cost,
2087   &arm_default_vec_cost,
2088   1,                                            /* Constant limit.  */
2089   2,                                            /* Max cond insns.  */
2090   32,                                           /* Memset max inline.  */
2091   4,                                            /* Issue rate.  */
2092   ARM_PREFETCH_NOT_BENEFICIAL,
2093   tune_params::PREF_CONST_POOL_FALSE,
2094   tune_params::PREF_LDRD_TRUE,
2095   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2096   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2097   tune_params::DISPARAGE_FLAGS_ALL,
2098   tune_params::PREF_NEON_64_FALSE,
2099   tune_params::PREF_NEON_STRINGOPS_FALSE,
2100   tune_params::FUSE_NOTHING,
2101   tune_params::SCHED_AUTOPREF_OFF
2102 };
2103
2104 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2105    less appealing.  Set max_insns_skipped to a low value.  */
2106
2107 const struct tune_params arm_cortex_a5_tune =
2108 {
2109   &cortexa5_extra_costs,
2110   NULL,                                 /* Sched adj cost.  */
2111   arm_cortex_a5_branch_cost,
2112   &arm_default_vec_cost,
2113   1,                                            /* Constant limit.  */
2114   1,                                            /* Max cond insns.  */
2115   8,                                            /* Memset max inline.  */
2116   2,                                            /* Issue rate.  */
2117   ARM_PREFETCH_NOT_BENEFICIAL,
2118   tune_params::PREF_CONST_POOL_FALSE,
2119   tune_params::PREF_LDRD_FALSE,
2120   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2121   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2122   tune_params::DISPARAGE_FLAGS_NEITHER,
2123   tune_params::PREF_NEON_64_FALSE,
2124   tune_params::PREF_NEON_STRINGOPS_TRUE,
2125   tune_params::FUSE_NOTHING,
2126   tune_params::SCHED_AUTOPREF_OFF
2127 };
2128
2129 const struct tune_params arm_cortex_a9_tune =
2130 {
2131   &cortexa9_extra_costs,
2132   cortex_a9_sched_adjust_cost,
2133   arm_default_branch_cost,
2134   &arm_default_vec_cost,
2135   1,                                            /* Constant limit.  */
2136   5,                                            /* Max cond insns.  */
2137   8,                                            /* Memset max inline.  */
2138   2,                                            /* Issue rate.  */
2139   ARM_PREFETCH_BENEFICIAL(4,32,32),
2140   tune_params::PREF_CONST_POOL_FALSE,
2141   tune_params::PREF_LDRD_FALSE,
2142   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2143   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2144   tune_params::DISPARAGE_FLAGS_NEITHER,
2145   tune_params::PREF_NEON_64_FALSE,
2146   tune_params::PREF_NEON_STRINGOPS_FALSE,
2147   tune_params::FUSE_NOTHING,
2148   tune_params::SCHED_AUTOPREF_OFF
2149 };
2150
2151 const struct tune_params arm_cortex_a12_tune =
2152 {
2153   &cortexa12_extra_costs,
2154   NULL,                                 /* Sched adj cost.  */
2155   arm_default_branch_cost,
2156   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2157   1,                                            /* Constant limit.  */
2158   2,                                            /* Max cond insns.  */
2159   8,                                            /* Memset max inline.  */
2160   2,                                            /* Issue rate.  */
2161   ARM_PREFETCH_NOT_BENEFICIAL,
2162   tune_params::PREF_CONST_POOL_FALSE,
2163   tune_params::PREF_LDRD_TRUE,
2164   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2165   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2166   tune_params::DISPARAGE_FLAGS_ALL,
2167   tune_params::PREF_NEON_64_FALSE,
2168   tune_params::PREF_NEON_STRINGOPS_TRUE,
2169   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2170   tune_params::SCHED_AUTOPREF_OFF
2171 };
2172
2173 const struct tune_params arm_cortex_a73_tune =
2174 {
2175   &cortexa57_extra_costs,
2176   NULL,                                         /* Sched adj cost.  */
2177   arm_default_branch_cost,
2178   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2179   1,                                            /* Constant limit.  */
2180   2,                                            /* Max cond insns.  */
2181   8,                                            /* Memset max inline.  */
2182   2,                                            /* Issue rate.  */
2183   ARM_PREFETCH_NOT_BENEFICIAL,
2184   tune_params::PREF_CONST_POOL_FALSE,
2185   tune_params::PREF_LDRD_TRUE,
2186   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2187   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2188   tune_params::DISPARAGE_FLAGS_ALL,
2189   tune_params::PREF_NEON_64_FALSE,
2190   tune_params::PREF_NEON_STRINGOPS_TRUE,
2191   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2192   tune_params::SCHED_AUTOPREF_FULL
2193 };
2194
2195 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2196    cycle to execute each.  An LDR from the constant pool also takes two cycles
2197    to execute, but mildly increases pipelining opportunity (consecutive
2198    loads/stores can be pipelined together, saving one cycle), and may also
2199    improve icache utilisation.  Hence we prefer the constant pool for such
2200    processors.  */
2201
2202 const struct tune_params arm_v7m_tune =
2203 {
2204   &v7m_extra_costs,
2205   NULL,                                 /* Sched adj cost.  */
2206   arm_cortex_m_branch_cost,
2207   &arm_default_vec_cost,
2208   1,                                            /* Constant limit.  */
2209   2,                                            /* Max cond insns.  */
2210   8,                                            /* Memset max inline.  */
2211   1,                                            /* Issue rate.  */
2212   ARM_PREFETCH_NOT_BENEFICIAL,
2213   tune_params::PREF_CONST_POOL_TRUE,
2214   tune_params::PREF_LDRD_FALSE,
2215   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2216   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2217   tune_params::DISPARAGE_FLAGS_NEITHER,
2218   tune_params::PREF_NEON_64_FALSE,
2219   tune_params::PREF_NEON_STRINGOPS_FALSE,
2220   tune_params::FUSE_NOTHING,
2221   tune_params::SCHED_AUTOPREF_OFF
2222 };
2223
2224 /* Cortex-M7 tuning.  */
2225
2226 const struct tune_params arm_cortex_m7_tune =
2227 {
2228   &v7m_extra_costs,
2229   NULL,                                 /* Sched adj cost.  */
2230   arm_cortex_m7_branch_cost,
2231   &arm_default_vec_cost,
2232   0,                                            /* Constant limit.  */
2233   1,                                            /* Max cond insns.  */
2234   8,                                            /* Memset max inline.  */
2235   2,                                            /* Issue rate.  */
2236   ARM_PREFETCH_NOT_BENEFICIAL,
2237   tune_params::PREF_CONST_POOL_TRUE,
2238   tune_params::PREF_LDRD_FALSE,
2239   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2240   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2241   tune_params::DISPARAGE_FLAGS_NEITHER,
2242   tune_params::PREF_NEON_64_FALSE,
2243   tune_params::PREF_NEON_STRINGOPS_FALSE,
2244   tune_params::FUSE_NOTHING,
2245   tune_params::SCHED_AUTOPREF_OFF
2246 };
2247
2248 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2249    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2250    cortex-m23.  */
2251 const struct tune_params arm_v6m_tune =
2252 {
2253   &generic_extra_costs,                 /* Insn extra costs.  */
2254   NULL,                                 /* Sched adj cost.  */
2255   arm_default_branch_cost,
2256   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2257   1,                                            /* Constant limit.  */
2258   5,                                            /* Max cond insns.  */
2259   8,                                            /* Memset max inline.  */
2260   1,                                            /* Issue rate.  */
2261   ARM_PREFETCH_NOT_BENEFICIAL,
2262   tune_params::PREF_CONST_POOL_FALSE,
2263   tune_params::PREF_LDRD_FALSE,
2264   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2265   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2266   tune_params::DISPARAGE_FLAGS_NEITHER,
2267   tune_params::PREF_NEON_64_FALSE,
2268   tune_params::PREF_NEON_STRINGOPS_FALSE,
2269   tune_params::FUSE_NOTHING,
2270   tune_params::SCHED_AUTOPREF_OFF
2271 };
2272
2273 const struct tune_params arm_fa726te_tune =
2274 {
2275   &generic_extra_costs,                         /* Insn extra costs.  */
2276   fa726te_sched_adjust_cost,
2277   arm_default_branch_cost,
2278   &arm_default_vec_cost,
2279   1,                                            /* Constant limit.  */
2280   5,                                            /* Max cond insns.  */
2281   8,                                            /* Memset max inline.  */
2282   2,                                            /* Issue rate.  */
2283   ARM_PREFETCH_NOT_BENEFICIAL,
2284   tune_params::PREF_CONST_POOL_TRUE,
2285   tune_params::PREF_LDRD_FALSE,
2286   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2287   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2288   tune_params::DISPARAGE_FLAGS_NEITHER,
2289   tune_params::PREF_NEON_64_FALSE,
2290   tune_params::PREF_NEON_STRINGOPS_FALSE,
2291   tune_params::FUSE_NOTHING,
2292   tune_params::SCHED_AUTOPREF_OFF
2293 };
2294
2295 /* Auto-generated CPU, FPU and architecture tables.  */
2296 #include "arm-cpu-data.h"
2297
2298 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2299    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2300    is thus chosen to be big enough to hold the longest architecture name.  */
2301
2302 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2303
2304 /* Supported TLS relocations.  */
2305
2306 enum tls_reloc {
2307   TLS_GD32,
2308   TLS_LDM32,
2309   TLS_LDO32,
2310   TLS_IE32,
2311   TLS_LE32,
2312   TLS_DESCSEQ   /* GNU scheme */
2313 };
2314
2315 /* The maximum number of insns to be used when loading a constant.  */
2316 inline static int
2317 arm_constant_limit (bool size_p)
2318 {
2319   return size_p ? 1 : current_tune->constant_limit;
2320 }
2321
2322 /* Emit an insn that's a simple single-set.  Both the operands must be known
2323    to be valid.  */
2324 inline static rtx_insn *
2325 emit_set_insn (rtx x, rtx y)
2326 {
2327   return emit_insn (gen_rtx_SET (x, y));
2328 }
2329
2330 /* Return the number of bits set in VALUE.  */
2331 static unsigned
2332 bit_count (unsigned long value)
2333 {
2334   unsigned long count = 0;
2335
2336   while (value)
2337     {
2338       count++;
2339       value &= value - 1;  /* Clear the least-significant set bit.  */
2340     }
2341
2342   return count;
2343 }
2344
2345 /* Return the number of bits set in BMAP.  */
2346 static unsigned
2347 bitmap_popcount (const sbitmap bmap)
2348 {
2349   unsigned int count = 0;
2350   unsigned int n = 0;
2351   sbitmap_iterator sbi;
2352
2353   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2354     count++;
2355   return count;
2356 }
2357
2358 typedef struct
2359 {
2360   machine_mode mode;
2361   const char *name;
2362 } arm_fixed_mode_set;
2363
2364 /* A small helper for setting fixed-point library libfuncs.  */
2365
2366 static void
2367 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2368                              const char *funcname, const char *modename,
2369                              int num_suffix)
2370 {
2371   char buffer[50];
2372
2373   if (num_suffix == 0)
2374     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2375   else
2376     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2377
2378   set_optab_libfunc (optable, mode, buffer);
2379 }
2380
2381 static void
2382 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2383                             machine_mode from, const char *funcname,
2384                             const char *toname, const char *fromname)
2385 {
2386   char buffer[50];
2387   const char *maybe_suffix_2 = "";
2388
2389   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2390   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2391       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2392       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2393     maybe_suffix_2 = "2";
2394
2395   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2396            maybe_suffix_2);
2397
2398   set_conv_libfunc (optable, to, from, buffer);
2399 }
2400
2401 /* Set up library functions unique to ARM.  */
2402
2403 static void
2404 arm_init_libfuncs (void)
2405 {
2406   /* For Linux, we have access to kernel support for atomic operations.  */
2407   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2408     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2409
2410   /* There are no special library functions unless we are using the
2411      ARM BPABI.  */
2412   if (!TARGET_BPABI)
2413     return;
2414
2415   /* The functions below are described in Section 4 of the "Run-Time
2416      ABI for the ARM architecture", Version 1.0.  */
2417
2418   /* Double-precision floating-point arithmetic.  Table 2.  */
2419   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2420   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2421   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2422   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2423   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2424
2425   /* Double-precision comparisons.  Table 3.  */
2426   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2427   set_optab_libfunc (ne_optab, DFmode, NULL);
2428   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2429   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2430   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2431   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2432   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2433
2434   /* Single-precision floating-point arithmetic.  Table 4.  */
2435   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2436   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2437   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2438   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2439   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2440
2441   /* Single-precision comparisons.  Table 5.  */
2442   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2443   set_optab_libfunc (ne_optab, SFmode, NULL);
2444   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2445   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2446   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2447   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2448   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2449
2450   /* Floating-point to integer conversions.  Table 6.  */
2451   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2452   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2453   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2454   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2455   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2456   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2457   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2458   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2459
2460   /* Conversions between floating types.  Table 7.  */
2461   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2462   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2463
2464   /* Integer to floating-point conversions.  Table 8.  */
2465   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2466   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2467   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2468   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2469   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2470   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2471   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2472   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2473
2474   /* Long long.  Table 9.  */
2475   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2476   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2477   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2478   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2479   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2480   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2481   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2482   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2483
2484   /* Integer (32/32->32) division.  \S 4.3.1.  */
2485   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2486   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2487
2488   /* The divmod functions are designed so that they can be used for
2489      plain division, even though they return both the quotient and the
2490      remainder.  The quotient is returned in the usual location (i.e.,
2491      r0 for SImode, {r0, r1} for DImode), just as would be expected
2492      for an ordinary division routine.  Because the AAPCS calling
2493      conventions specify that all of { r0, r1, r2, r3 } are
2494      callee-saved registers, there is no need to tell the compiler
2495      explicitly that those registers are clobbered by these
2496      routines.  */
2497   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2498   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2499
2500   /* For SImode division the ABI provides div-without-mod routines,
2501      which are faster.  */
2502   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2503   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2504
2505   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2506      divmod libcalls instead.  */
2507   set_optab_libfunc (smod_optab, DImode, NULL);
2508   set_optab_libfunc (umod_optab, DImode, NULL);
2509   set_optab_libfunc (smod_optab, SImode, NULL);
2510   set_optab_libfunc (umod_optab, SImode, NULL);
2511
2512   /* Half-precision float operations.  The compiler handles all operations
2513      with NULL libfuncs by converting the SFmode.  */
2514   switch (arm_fp16_format)
2515     {
2516     case ARM_FP16_FORMAT_IEEE:
2517     case ARM_FP16_FORMAT_ALTERNATIVE:
2518
2519       /* Conversions.  */
2520       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2521                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2522                          ? "__gnu_f2h_ieee"
2523                          : "__gnu_f2h_alternative"));
2524       set_conv_libfunc (sext_optab, SFmode, HFmode,
2525                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2526                          ? "__gnu_h2f_ieee"
2527                          : "__gnu_h2f_alternative"));
2528
2529       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2530                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2531                          ? "__gnu_d2h_ieee"
2532                          : "__gnu_d2h_alternative"));
2533
2534       /* Arithmetic.  */
2535       set_optab_libfunc (add_optab, HFmode, NULL);
2536       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2537       set_optab_libfunc (smul_optab, HFmode, NULL);
2538       set_optab_libfunc (neg_optab, HFmode, NULL);
2539       set_optab_libfunc (sub_optab, HFmode, NULL);
2540
2541       /* Comparisons.  */
2542       set_optab_libfunc (eq_optab, HFmode, NULL);
2543       set_optab_libfunc (ne_optab, HFmode, NULL);
2544       set_optab_libfunc (lt_optab, HFmode, NULL);
2545       set_optab_libfunc (le_optab, HFmode, NULL);
2546       set_optab_libfunc (ge_optab, HFmode, NULL);
2547       set_optab_libfunc (gt_optab, HFmode, NULL);
2548       set_optab_libfunc (unord_optab, HFmode, NULL);
2549       break;
2550
2551     default:
2552       break;
2553     }
2554
2555   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2556   {
2557     const arm_fixed_mode_set fixed_arith_modes[] =
2558       {
2559         { E_QQmode, "qq" },
2560         { E_UQQmode, "uqq" },
2561         { E_HQmode, "hq" },
2562         { E_UHQmode, "uhq" },
2563         { E_SQmode, "sq" },
2564         { E_USQmode, "usq" },
2565         { E_DQmode, "dq" },
2566         { E_UDQmode, "udq" },
2567         { E_TQmode, "tq" },
2568         { E_UTQmode, "utq" },
2569         { E_HAmode, "ha" },
2570         { E_UHAmode, "uha" },
2571         { E_SAmode, "sa" },
2572         { E_USAmode, "usa" },
2573         { E_DAmode, "da" },
2574         { E_UDAmode, "uda" },
2575         { E_TAmode, "ta" },
2576         { E_UTAmode, "uta" }
2577       };
2578     const arm_fixed_mode_set fixed_conv_modes[] =
2579       {
2580         { E_QQmode, "qq" },
2581         { E_UQQmode, "uqq" },
2582         { E_HQmode, "hq" },
2583         { E_UHQmode, "uhq" },
2584         { E_SQmode, "sq" },
2585         { E_USQmode, "usq" },
2586         { E_DQmode, "dq" },
2587         { E_UDQmode, "udq" },
2588         { E_TQmode, "tq" },
2589         { E_UTQmode, "utq" },
2590         { E_HAmode, "ha" },
2591         { E_UHAmode, "uha" },
2592         { E_SAmode, "sa" },
2593         { E_USAmode, "usa" },
2594         { E_DAmode, "da" },
2595         { E_UDAmode, "uda" },
2596         { E_TAmode, "ta" },
2597         { E_UTAmode, "uta" },
2598         { E_QImode, "qi" },
2599         { E_HImode, "hi" },
2600         { E_SImode, "si" },
2601         { E_DImode, "di" },
2602         { E_TImode, "ti" },
2603         { E_SFmode, "sf" },
2604         { E_DFmode, "df" }
2605       };
2606     unsigned int i, j;
2607
2608     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2609       {
2610         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2611                                      "add", fixed_arith_modes[i].name, 3);
2612         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2613                                      "ssadd", fixed_arith_modes[i].name, 3);
2614         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2615                                      "usadd", fixed_arith_modes[i].name, 3);
2616         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2617                                      "sub", fixed_arith_modes[i].name, 3);
2618         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2619                                      "sssub", fixed_arith_modes[i].name, 3);
2620         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2621                                      "ussub", fixed_arith_modes[i].name, 3);
2622         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2623                                      "mul", fixed_arith_modes[i].name, 3);
2624         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2625                                      "ssmul", fixed_arith_modes[i].name, 3);
2626         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2627                                      "usmul", fixed_arith_modes[i].name, 3);
2628         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2629                                      "div", fixed_arith_modes[i].name, 3);
2630         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2631                                      "udiv", fixed_arith_modes[i].name, 3);
2632         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2633                                      "ssdiv", fixed_arith_modes[i].name, 3);
2634         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2635                                      "usdiv", fixed_arith_modes[i].name, 3);
2636         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2637                                      "neg", fixed_arith_modes[i].name, 2);
2638         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2639                                      "ssneg", fixed_arith_modes[i].name, 2);
2640         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2641                                      "usneg", fixed_arith_modes[i].name, 2);
2642         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2643                                      "ashl", fixed_arith_modes[i].name, 3);
2644         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2645                                      "ashr", fixed_arith_modes[i].name, 3);
2646         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2647                                      "lshr", fixed_arith_modes[i].name, 3);
2648         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2649                                      "ssashl", fixed_arith_modes[i].name, 3);
2650         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2651                                      "usashl", fixed_arith_modes[i].name, 3);
2652         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2653                                      "cmp", fixed_arith_modes[i].name, 2);
2654       }
2655
2656     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2657       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2658         {
2659           if (i == j
2660               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2661                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2662             continue;
2663
2664           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2665                                       fixed_conv_modes[j].mode, "fract",
2666                                       fixed_conv_modes[i].name,
2667                                       fixed_conv_modes[j].name);
2668           arm_set_fixed_conv_libfunc (satfract_optab,
2669                                       fixed_conv_modes[i].mode,
2670                                       fixed_conv_modes[j].mode, "satfract",
2671                                       fixed_conv_modes[i].name,
2672                                       fixed_conv_modes[j].name);
2673           arm_set_fixed_conv_libfunc (fractuns_optab,
2674                                       fixed_conv_modes[i].mode,
2675                                       fixed_conv_modes[j].mode, "fractuns",
2676                                       fixed_conv_modes[i].name,
2677                                       fixed_conv_modes[j].name);
2678           arm_set_fixed_conv_libfunc (satfractuns_optab,
2679                                       fixed_conv_modes[i].mode,
2680                                       fixed_conv_modes[j].mode, "satfractuns",
2681                                       fixed_conv_modes[i].name,
2682                                       fixed_conv_modes[j].name);
2683         }
2684   }
2685
2686   if (TARGET_AAPCS_BASED)
2687     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2688 }
2689
2690 /* On AAPCS systems, this is the "struct __va_list".  */
2691 static GTY(()) tree va_list_type;
2692
2693 /* Return the type to use as __builtin_va_list.  */
2694 static tree
2695 arm_build_builtin_va_list (void)
2696 {
2697   tree va_list_name;
2698   tree ap_field;
2699
2700   if (!TARGET_AAPCS_BASED)
2701     return std_build_builtin_va_list ();
2702
2703   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2704      defined as:
2705
2706        struct __va_list
2707        {
2708          void *__ap;
2709        };
2710
2711      The C Library ABI further reinforces this definition in \S
2712      4.1.
2713
2714      We must follow this definition exactly.  The structure tag
2715      name is visible in C++ mangled names, and thus forms a part
2716      of the ABI.  The field name may be used by people who
2717      #include <stdarg.h>.  */
2718   /* Create the type.  */
2719   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2720   /* Give it the required name.  */
2721   va_list_name = build_decl (BUILTINS_LOCATION,
2722                              TYPE_DECL,
2723                              get_identifier ("__va_list"),
2724                              va_list_type);
2725   DECL_ARTIFICIAL (va_list_name) = 1;
2726   TYPE_NAME (va_list_type) = va_list_name;
2727   TYPE_STUB_DECL (va_list_type) = va_list_name;
2728   /* Create the __ap field.  */
2729   ap_field = build_decl (BUILTINS_LOCATION,
2730                          FIELD_DECL,
2731                          get_identifier ("__ap"),
2732                          ptr_type_node);
2733   DECL_ARTIFICIAL (ap_field) = 1;
2734   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2735   TYPE_FIELDS (va_list_type) = ap_field;
2736   /* Compute its layout.  */
2737   layout_type (va_list_type);
2738
2739   return va_list_type;
2740 }
2741
2742 /* Return an expression of type "void *" pointing to the next
2743    available argument in a variable-argument list.  VALIST is the
2744    user-level va_list object, of type __builtin_va_list.  */
2745 static tree
2746 arm_extract_valist_ptr (tree valist)
2747 {
2748   if (TREE_TYPE (valist) == error_mark_node)
2749     return error_mark_node;
2750
2751   /* On an AAPCS target, the pointer is stored within "struct
2752      va_list".  */
2753   if (TARGET_AAPCS_BASED)
2754     {
2755       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2756       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2757                        valist, ap_field, NULL_TREE);
2758     }
2759
2760   return valist;
2761 }
2762
2763 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2764 static void
2765 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2766 {
2767   valist = arm_extract_valist_ptr (valist);
2768   std_expand_builtin_va_start (valist, nextarg);
2769 }
2770
2771 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2772 static tree
2773 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2774                           gimple_seq *post_p)
2775 {
2776   valist = arm_extract_valist_ptr (valist);
2777   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2778 }
2779
2780 /* Check any incompatible options that the user has specified.  */
2781 static void
2782 arm_option_check_internal (struct gcc_options *opts)
2783 {
2784   int flags = opts->x_target_flags;
2785
2786   /* iWMMXt and NEON are incompatible.  */
2787   if (TARGET_IWMMXT
2788       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2789     error ("iWMMXt and NEON are incompatible");
2790
2791   /* Make sure that the processor choice does not conflict with any of the
2792      other command line choices.  */
2793   if (TARGET_ARM_P (flags)
2794       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2795     error ("target CPU does not support ARM mode");
2796
2797   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2798   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2799     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2800
2801   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2802     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2803
2804   /* If this target is normally configured to use APCS frames, warn if they
2805      are turned off and debugging is turned on.  */
2806   if (TARGET_ARM_P (flags)
2807       && write_symbols != NO_DEBUG
2808       && !TARGET_APCS_FRAME
2809       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2810     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2811
2812   /* iWMMXt unsupported under Thumb mode.  */
2813   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2814     error ("iWMMXt unsupported under Thumb mode");
2815
2816   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2817     error ("can not use -mtp=cp15 with 16-bit Thumb");
2818
2819   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2820     {
2821       error ("RTP PIC is incompatible with Thumb");
2822       flag_pic = 0;
2823     }
2824
2825   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2826      with MOVT.  */
2827   if ((target_pure_code || target_slow_flash_data)
2828       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2829     {
2830       const char *flag = (target_pure_code ? "-mpure-code" :
2831                                              "-mslow-flash-data");
2832       error ("%s only supports non-pic code on M-profile targets with the "
2833              "MOVT instruction", flag);
2834     }
2835
2836 }
2837
2838 /* Recompute the global settings depending on target attribute options.  */
2839
2840 static void
2841 arm_option_params_internal (void)
2842 {
2843   /* If we are not using the default (ARM mode) section anchor offset
2844      ranges, then set the correct ranges now.  */
2845   if (TARGET_THUMB1)
2846     {
2847       /* Thumb-1 LDR instructions cannot have negative offsets.
2848          Permissible positive offset ranges are 5-bit (for byte loads),
2849          6-bit (for halfword loads), or 7-bit (for word loads).
2850          Empirical results suggest a 7-bit anchor range gives the best
2851          overall code size.  */
2852       targetm.min_anchor_offset = 0;
2853       targetm.max_anchor_offset = 127;
2854     }
2855   else if (TARGET_THUMB2)
2856     {
2857       /* The minimum is set such that the total size of the block
2858          for a particular anchor is 248 + 1 + 4095 bytes, which is
2859          divisible by eight, ensuring natural spacing of anchors.  */
2860       targetm.min_anchor_offset = -248;
2861       targetm.max_anchor_offset = 4095;
2862     }
2863   else
2864     {
2865       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2866       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2867     }
2868
2869   if (optimize_size)
2870     {
2871       /* If optimizing for size, bump the number of instructions that we
2872          are prepared to conditionally execute (even on a StrongARM).  */
2873       max_insns_skipped = 6;
2874
2875       /* For THUMB2, we limit the conditional sequence to one IT block.  */
2876       if (TARGET_THUMB2)
2877         max_insns_skipped = arm_restrict_it ? 1 : 4;
2878     }
2879   else
2880     /* When -mrestrict-it is in use tone down the if-conversion.  */
2881     max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2882       ? 1 : current_tune->max_insns_skipped;
2883 }
2884
2885 /* True if -mflip-thumb should next add an attribute for the default
2886    mode, false if it should next add an attribute for the opposite mode.  */
2887 static GTY(()) bool thumb_flipper;
2888
2889 /* Options after initial target override.  */
2890 static GTY(()) tree init_optimize;
2891
2892 static void
2893 arm_override_options_after_change_1 (struct gcc_options *opts)
2894 {
2895   if (opts->x_align_functions <= 0)
2896     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2897       && opts->x_optimize_size ? 2 : 4;
2898 }
2899
2900 /* Implement targetm.override_options_after_change.  */
2901
2902 static void
2903 arm_override_options_after_change (void)
2904 {
2905   arm_configure_build_target (&arm_active_target,
2906                               TREE_TARGET_OPTION (target_option_default_node),
2907                               &global_options_set, false);
2908
2909   arm_override_options_after_change_1 (&global_options);
2910 }
2911
2912 /* Implement TARGET_OPTION_SAVE.  */
2913 static void
2914 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2915 {
2916   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2917   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2918   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2919 }
2920
2921 /* Implement TARGET_OPTION_RESTORE.  */
2922 static void
2923 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2924 {
2925   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2926   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2927   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2928   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2929                               false);
2930 }
2931
2932 /* Reset options between modes that the user has specified.  */
2933 static void
2934 arm_option_override_internal (struct gcc_options *opts,
2935                               struct gcc_options *opts_set)
2936 {
2937   arm_override_options_after_change_1 (opts);
2938
2939   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2940     {
2941       /* The default is to enable interworking, so this warning message would
2942          be confusing to users who have just compiled with, eg, -march=armv3.  */
2943       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2944       opts->x_target_flags &= ~MASK_INTERWORK;
2945     }
2946
2947   if (TARGET_THUMB_P (opts->x_target_flags)
2948       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2949     {
2950       warning (0, "target CPU does not support THUMB instructions");
2951       opts->x_target_flags &= ~MASK_THUMB;
2952     }
2953
2954   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2955     {
2956       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2957       opts->x_target_flags &= ~MASK_APCS_FRAME;
2958     }
2959
2960   /* Callee super interworking implies thumb interworking.  Adding
2961      this to the flags here simplifies the logic elsewhere.  */
2962   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2963     opts->x_target_flags |= MASK_INTERWORK;
2964
2965   /* need to remember initial values so combinaisons of options like
2966      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
2967   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2968
2969   if (! opts_set->x_arm_restrict_it)
2970     opts->x_arm_restrict_it = arm_arch8;
2971
2972   /* ARM execution state and M profile don't have [restrict] IT.  */
2973   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2974     opts->x_arm_restrict_it = 0;
2975
2976   /* Enable -munaligned-access by default for
2977      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2978      i.e. Thumb2 and ARM state only.
2979      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2980      - ARMv8 architecture-base processors.
2981
2982      Disable -munaligned-access by default for
2983      - all pre-ARMv6 architecture-based processors
2984      - ARMv6-M architecture-based processors
2985      - ARMv8-M Baseline processors.  */
2986
2987   if (! opts_set->x_unaligned_access)
2988     {
2989       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2990                           && arm_arch6 && (arm_arch_notm || arm_arch7));
2991     }
2992   else if (opts->x_unaligned_access == 1
2993            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2994     {
2995       warning (0, "target CPU does not support unaligned accesses");
2996      opts->x_unaligned_access = 0;
2997     }
2998
2999   /* Don't warn since it's on by default in -O2.  */
3000   if (TARGET_THUMB1_P (opts->x_target_flags))
3001     opts->x_flag_schedule_insns = 0;
3002   else
3003     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3004
3005   /* Disable shrink-wrap when optimizing function for size, since it tends to
3006      generate additional returns.  */
3007   if (optimize_function_for_size_p (cfun)
3008       && TARGET_THUMB2_P (opts->x_target_flags))
3009     opts->x_flag_shrink_wrap = false;
3010   else
3011     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3012
3013   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3014      - epilogue_insns - does not accurately model the corresponding insns
3015      emitted in the asm file.  In particular, see the comment in thumb_exit
3016      'Find out how many of the (return) argument registers we can corrupt'.
3017      As a consequence, the epilogue may clobber registers without fipa-ra
3018      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3019      TODO: Accurately model clobbers for epilogue_insns and reenable
3020      fipa-ra.  */
3021   if (TARGET_THUMB1_P (opts->x_target_flags))
3022     opts->x_flag_ipa_ra = 0;
3023   else
3024     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3025
3026   /* Thumb2 inline assembly code should always use unified syntax.
3027      This will apply to ARM and Thumb1 eventually.  */
3028   opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3029
3030 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3031   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3032 #endif
3033 }
3034
3035 static sbitmap isa_all_fpubits;
3036 static sbitmap isa_quirkbits;
3037
3038 /* Configure a build target TARGET from the user-specified options OPTS and
3039    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3040    architecture have been specified, but the two are not identical.  */
3041 void
3042 arm_configure_build_target (struct arm_build_target *target,
3043                             struct cl_target_option *opts,
3044                             struct gcc_options *opts_set,
3045                             bool warn_compatible)
3046 {
3047   const cpu_option *arm_selected_tune = NULL;
3048   const arch_option *arm_selected_arch = NULL;
3049   const cpu_option *arm_selected_cpu = NULL;
3050   const arm_fpu_desc *arm_selected_fpu = NULL;
3051   const char *tune_opts = NULL;
3052   const char *arch_opts = NULL;
3053   const char *cpu_opts = NULL;
3054
3055   bitmap_clear (target->isa);
3056   target->core_name = NULL;
3057   target->arch_name = NULL;
3058
3059   if (opts_set->x_arm_arch_string)
3060     {
3061       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3062                                                       "-march",
3063                                                       opts->x_arm_arch_string);
3064       arch_opts = strchr (opts->x_arm_arch_string, '+');
3065     }
3066
3067   if (opts_set->x_arm_cpu_string)
3068     {
3069       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3070                                                     opts->x_arm_cpu_string);
3071       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3072       arm_selected_tune = arm_selected_cpu;
3073       /* If taking the tuning from -mcpu, we don't need to rescan the
3074          options for tuning.  */
3075     }
3076
3077   if (opts_set->x_arm_tune_string)
3078     {
3079       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3080                                                      opts->x_arm_tune_string);
3081       tune_opts = strchr (opts->x_arm_tune_string, '+');
3082     }
3083
3084   if (arm_selected_arch)
3085     {
3086       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3087       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3088                                  arch_opts);
3089
3090       if (arm_selected_cpu)
3091         {
3092           auto_sbitmap cpu_isa (isa_num_bits);
3093           auto_sbitmap isa_delta (isa_num_bits);
3094
3095           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3096           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3097                                      cpu_opts);
3098           bitmap_xor (isa_delta, cpu_isa, target->isa);
3099           /* Ignore any bits that are quirk bits.  */
3100           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3101           /* Ignore (for now) any bits that might be set by -mfpu.  */
3102           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3103
3104           if (!bitmap_empty_p (isa_delta))
3105             {
3106               if (warn_compatible)
3107                 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3108                          arm_selected_cpu->common.name,
3109                          arm_selected_arch->common.name);
3110               /* -march wins for code generation.
3111                  -mcpu wins for default tuning.  */
3112               if (!arm_selected_tune)
3113                 arm_selected_tune = arm_selected_cpu;
3114
3115               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3116               target->arch_name = arm_selected_arch->common.name;
3117             }
3118           else
3119             {
3120               /* Architecture and CPU are essentially the same.
3121                  Prefer the CPU setting.  */
3122               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3123               target->core_name = arm_selected_cpu->common.name;
3124               /* Copy the CPU's capabilities, so that we inherit the
3125                  appropriate extensions and quirks.  */
3126               bitmap_copy (target->isa, cpu_isa);
3127             }
3128         }
3129       else
3130         {
3131           /* Pick a CPU based on the architecture.  */
3132           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3133           target->arch_name = arm_selected_arch->common.name;
3134           /* Note: target->core_name is left unset in this path.  */
3135         }
3136     }
3137   else if (arm_selected_cpu)
3138     {
3139       target->core_name = arm_selected_cpu->common.name;
3140       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3141       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3142                                  cpu_opts);
3143       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3144     }
3145   /* If the user did not specify a processor or architecture, choose
3146      one for them.  */
3147   else
3148     {
3149       const cpu_option *sel;
3150       auto_sbitmap sought_isa (isa_num_bits);
3151       bitmap_clear (sought_isa);
3152       auto_sbitmap default_isa (isa_num_bits);
3153
3154       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3155                                                     TARGET_CPU_DEFAULT);
3156       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3157       gcc_assert (arm_selected_cpu->common.name);
3158
3159       /* RWE: All of the selection logic below (to the end of this
3160          'if' clause) looks somewhat suspect.  It appears to be mostly
3161          there to support forcing thumb support when the default CPU
3162          does not have thumb (somewhat dubious in terms of what the
3163          user might be expecting).  I think it should be removed once
3164          support for the pre-thumb era cores is removed.  */
3165       sel = arm_selected_cpu;
3166       arm_initialize_isa (default_isa, sel->common.isa_bits);
3167       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3168                                  cpu_opts);
3169
3170       /* Now check to see if the user has specified any command line
3171          switches that require certain abilities from the cpu.  */
3172
3173       if (TARGET_INTERWORK || TARGET_THUMB)
3174         {
3175           bitmap_set_bit (sought_isa, isa_bit_thumb);
3176           bitmap_set_bit (sought_isa, isa_bit_mode32);
3177
3178           /* There are no ARM processors that support both APCS-26 and
3179              interworking.  Therefore we forcibly remove MODE26 from
3180              from the isa features here (if it was set), so that the
3181              search below will always be able to find a compatible
3182              processor.  */
3183           bitmap_clear_bit (default_isa, isa_bit_mode26);
3184         }
3185
3186       /* If there are such requirements and the default CPU does not
3187          satisfy them, we need to run over the complete list of
3188          cores looking for one that is satisfactory.  */
3189       if (!bitmap_empty_p (sought_isa)
3190           && !bitmap_subset_p (sought_isa, default_isa))
3191         {
3192           auto_sbitmap candidate_isa (isa_num_bits);
3193           /* We're only interested in a CPU with at least the
3194              capabilities of the default CPU and the required
3195              additional features.  */
3196           bitmap_ior (default_isa, default_isa, sought_isa);
3197
3198           /* Try to locate a CPU type that supports all of the abilities
3199              of the default CPU, plus the extra abilities requested by
3200              the user.  */
3201           for (sel = all_cores; sel->common.name != NULL; sel++)
3202             {
3203               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3204               /* An exact match?  */
3205               if (bitmap_equal_p (default_isa, candidate_isa))
3206                 break;
3207             }
3208
3209           if (sel->common.name == NULL)
3210             {
3211               unsigned current_bit_count = isa_num_bits;
3212               const cpu_option *best_fit = NULL;
3213
3214               /* Ideally we would like to issue an error message here
3215                  saying that it was not possible to find a CPU compatible
3216                  with the default CPU, but which also supports the command
3217                  line options specified by the programmer, and so they
3218                  ought to use the -mcpu=<name> command line option to
3219                  override the default CPU type.
3220
3221                  If we cannot find a CPU that has exactly the
3222                  characteristics of the default CPU and the given
3223                  command line options we scan the array again looking
3224                  for a best match.  The best match must have at least
3225                  the capabilities of the perfect match.  */
3226               for (sel = all_cores; sel->common.name != NULL; sel++)
3227                 {
3228                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3229
3230                   if (bitmap_subset_p (default_isa, candidate_isa))
3231                     {
3232                       unsigned count;
3233
3234                       bitmap_and_compl (candidate_isa, candidate_isa,
3235                                         default_isa);
3236                       count = bitmap_popcount (candidate_isa);
3237
3238                       if (count < current_bit_count)
3239                         {
3240                           best_fit = sel;
3241                           current_bit_count = count;
3242                         }
3243                     }
3244
3245                   gcc_assert (best_fit);
3246                   sel = best_fit;
3247                 }
3248             }
3249           arm_selected_cpu = sel;
3250         }
3251
3252       /* Now we know the CPU, we can finally initialize the target
3253          structure.  */
3254       target->core_name = arm_selected_cpu->common.name;
3255       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3256       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3257                                  cpu_opts);
3258       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3259     }
3260
3261   gcc_assert (arm_selected_cpu);
3262   gcc_assert (arm_selected_arch);
3263
3264   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3265     {
3266       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3267       auto_sbitmap fpu_bits (isa_num_bits);
3268
3269       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3270       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3271       bitmap_ior (target->isa, target->isa, fpu_bits);
3272     }
3273
3274   if (!arm_selected_tune)
3275     arm_selected_tune = arm_selected_cpu;
3276   else /* Validate the features passed to -mtune.  */
3277     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3278
3279   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3280
3281   /* Finish initializing the target structure.  */
3282   target->arch_pp_name = arm_selected_arch->arch;
3283   target->base_arch = arm_selected_arch->base_arch;
3284   target->profile = arm_selected_arch->profile;
3285
3286   target->tune_flags = tune_data->tune_flags;
3287   target->tune = tune_data->tune;
3288   target->tune_core = tune_data->scheduler;
3289 }
3290
3291 /* Fix up any incompatible options that the user has specified.  */
3292 static void
3293 arm_option_override (void)
3294 {
3295   static const enum isa_feature fpu_bitlist[]
3296     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3297   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3298   cl_target_option opts;
3299
3300   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3301   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3302
3303   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3304   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3305
3306   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3307
3308   if (!global_options_set.x_arm_fpu_index)
3309     {
3310       bool ok;
3311       int fpu_index;
3312
3313       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3314                                   CL_TARGET);
3315       gcc_assert (ok);
3316       arm_fpu_index = (enum fpu_type) fpu_index;
3317     }
3318
3319   cl_target_option_save (&opts, &global_options);
3320   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3321                               true);
3322
3323 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3324   SUBTARGET_OVERRIDE_OPTIONS;
3325 #endif
3326
3327   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3328   arm_base_arch = arm_active_target.base_arch;
3329
3330   arm_tune = arm_active_target.tune_core;
3331   tune_flags = arm_active_target.tune_flags;
3332   current_tune = arm_active_target.tune;
3333
3334   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3335   if (TARGET_APCS_FRAME)
3336     flag_shrink_wrap = false;
3337
3338   /* BPABI targets use linker tricks to allow interworking on cores
3339      without thumb support.  */
3340   if (TARGET_INTERWORK
3341       && !TARGET_BPABI
3342       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3343     {
3344       warning (0, "target CPU does not support interworking" );
3345       target_flags &= ~MASK_INTERWORK;
3346     }
3347
3348   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3349     {
3350       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3351       target_flags |= MASK_APCS_FRAME;
3352     }
3353
3354   if (TARGET_POKE_FUNCTION_NAME)
3355     target_flags |= MASK_APCS_FRAME;
3356
3357   if (TARGET_APCS_REENT && flag_pic)
3358     error ("-fpic and -mapcs-reent are incompatible");
3359
3360   if (TARGET_APCS_REENT)
3361     warning (0, "APCS reentrant code not supported.  Ignored");
3362
3363   /* Initialize boolean versions of the architectural flags, for use
3364      in the arm.md file.  */
3365   arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3366   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3367   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3368   arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3369   arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3370   arm_arch5te = arm_arch5e
3371     && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3372   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3373   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3374   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3375   arm_arch6m = arm_arch6 && !arm_arch_notm;
3376   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3377   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3378   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3379   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3380   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3381   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3382   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3383   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3384   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3385   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3386   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3387   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3388   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3389   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3390   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3391   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3392   if (arm_fp16_inst)
3393     {
3394       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3395         error ("selected fp16 options are incompatible");
3396       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3397     }
3398
3399
3400   /* Set up some tuning parameters.  */
3401   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3402   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3403   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3404   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3405   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3406   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3407
3408   /* And finally, set up some quirks.  */
3409   arm_arch_no_volatile_ce
3410     = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3411   arm_arch6kz
3412     = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3413
3414   /* V5 code we generate is completely interworking capable, so we turn off
3415      TARGET_INTERWORK here to avoid many tests later on.  */
3416
3417   /* XXX However, we must pass the right pre-processor defines to CPP
3418      or GLD can get confused.  This is a hack.  */
3419   if (TARGET_INTERWORK)
3420     arm_cpp_interwork = 1;
3421
3422   if (arm_arch5)
3423     target_flags &= ~MASK_INTERWORK;
3424
3425   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3426     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3427
3428   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3429     error ("iwmmxt abi requires an iwmmxt capable cpu");
3430
3431   /* If soft-float is specified then don't use FPU.  */
3432   if (TARGET_SOFT_FLOAT)
3433     arm_fpu_attr = FPU_NONE;
3434   else
3435     arm_fpu_attr = FPU_VFP;
3436
3437   if (TARGET_AAPCS_BASED)
3438     {
3439       if (TARGET_CALLER_INTERWORKING)
3440         error ("AAPCS does not support -mcaller-super-interworking");
3441       else
3442         if (TARGET_CALLEE_INTERWORKING)
3443           error ("AAPCS does not support -mcallee-super-interworking");
3444     }
3445
3446   /* __fp16 support currently assumes the core has ldrh.  */
3447   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3448     sorry ("__fp16 and no ldrh");
3449
3450   if (TARGET_AAPCS_BASED)
3451     {
3452       if (arm_abi == ARM_ABI_IWMMXT)
3453         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3454       else if (TARGET_HARD_FLOAT_ABI)
3455         {
3456           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3457           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3458             error ("-mfloat-abi=hard: selected processor lacks an FPU");
3459         }
3460       else
3461         arm_pcs_default = ARM_PCS_AAPCS;
3462     }
3463   else
3464     {
3465       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3466         sorry ("-mfloat-abi=hard and VFP");
3467
3468       if (arm_abi == ARM_ABI_APCS)
3469         arm_pcs_default = ARM_PCS_APCS;
3470       else
3471         arm_pcs_default = ARM_PCS_ATPCS;
3472     }
3473
3474   /* For arm2/3 there is no need to do any scheduling if we are doing
3475      software floating-point.  */
3476   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3477     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3478
3479   /* Use the cp15 method if it is available.  */
3480   if (target_thread_pointer == TP_AUTO)
3481     {
3482       if (arm_arch6k && !TARGET_THUMB1)
3483         target_thread_pointer = TP_CP15;
3484       else
3485         target_thread_pointer = TP_SOFT;
3486     }
3487
3488   /* Override the default structure alignment for AAPCS ABI.  */
3489   if (!global_options_set.x_arm_structure_size_boundary)
3490     {
3491       if (TARGET_AAPCS_BASED)
3492         arm_structure_size_boundary = 8;
3493     }
3494   else
3495     {
3496       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3497
3498       if (arm_structure_size_boundary != 8
3499           && arm_structure_size_boundary != 32
3500           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3501         {
3502           if (ARM_DOUBLEWORD_ALIGN)
3503             warning (0,
3504                      "structure size boundary can only be set to 8, 32 or 64");
3505           else
3506             warning (0, "structure size boundary can only be set to 8 or 32");
3507           arm_structure_size_boundary
3508             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3509         }
3510     }
3511
3512   if (TARGET_VXWORKS_RTP)
3513     {
3514       if (!global_options_set.x_arm_pic_data_is_text_relative)
3515         arm_pic_data_is_text_relative = 0;
3516     }
3517   else if (flag_pic
3518            && !arm_pic_data_is_text_relative
3519            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3520     /* When text & data segments don't have a fixed displacement, the
3521        intended use is with a single, read only, pic base register.
3522        Unless the user explicitly requested not to do that, set
3523        it.  */
3524     target_flags |= MASK_SINGLE_PIC_BASE;
3525
3526   /* If stack checking is disabled, we can use r10 as the PIC register,
3527      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3528   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3529     {
3530       if (TARGET_VXWORKS_RTP)
3531         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3532       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3533     }
3534
3535   if (flag_pic && TARGET_VXWORKS_RTP)
3536     arm_pic_register = 9;
3537
3538   if (arm_pic_register_string != NULL)
3539     {
3540       int pic_register = decode_reg_name (arm_pic_register_string);
3541
3542       if (!flag_pic)
3543         warning (0, "-mpic-register= is useless without -fpic");
3544
3545       /* Prevent the user from choosing an obviously stupid PIC register.  */
3546       else if (pic_register < 0 || call_used_regs[pic_register]
3547                || pic_register == HARD_FRAME_POINTER_REGNUM
3548                || pic_register == STACK_POINTER_REGNUM
3549                || pic_register >= PC_REGNUM
3550                || (TARGET_VXWORKS_RTP
3551                    && (unsigned int) pic_register != arm_pic_register))
3552         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3553       else
3554         arm_pic_register = pic_register;
3555     }
3556
3557   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3558   if (fix_cm3_ldrd == 2)
3559     {
3560       if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3561         fix_cm3_ldrd = 1;
3562       else
3563         fix_cm3_ldrd = 0;
3564     }
3565
3566   /* Hot/Cold partitioning is not currently supported, since we can't
3567      handle literal pool placement in that case.  */
3568   if (flag_reorder_blocks_and_partition)
3569     {
3570       inform (input_location,
3571               "-freorder-blocks-and-partition not supported on this architecture");
3572       flag_reorder_blocks_and_partition = 0;
3573       flag_reorder_blocks = 1;
3574     }
3575
3576   if (flag_pic)
3577     /* Hoisting PIC address calculations more aggressively provides a small,
3578        but measurable, size reduction for PIC code.  Therefore, we decrease
3579        the bar for unrestricted expression hoisting to the cost of PIC address
3580        calculation, which is 2 instructions.  */
3581     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3582                            global_options.x_param_values,
3583                            global_options_set.x_param_values);
3584
3585   /* ARM EABI defaults to strict volatile bitfields.  */
3586   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3587       && abi_version_at_least(2))
3588     flag_strict_volatile_bitfields = 1;
3589
3590   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3591      have deemed it beneficial (signified by setting
3592      prefetch.num_slots to 1 or more).  */
3593   if (flag_prefetch_loop_arrays < 0
3594       && HAVE_prefetch
3595       && optimize >= 3
3596       && current_tune->prefetch.num_slots > 0)
3597     flag_prefetch_loop_arrays = 1;
3598
3599   /* Set up parameters to be used in prefetching algorithm.  Do not
3600      override the defaults unless we are tuning for a core we have
3601      researched values for.  */
3602   if (current_tune->prefetch.num_slots > 0)
3603     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3604                            current_tune->prefetch.num_slots,
3605                            global_options.x_param_values,
3606                            global_options_set.x_param_values);
3607   if (current_tune->prefetch.l1_cache_line_size >= 0)
3608     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3609                            current_tune->prefetch.l1_cache_line_size,
3610                            global_options.x_param_values,
3611                            global_options_set.x_param_values);
3612   if (current_tune->prefetch.l1_cache_size >= 0)
3613     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3614                            current_tune->prefetch.l1_cache_size,
3615                            global_options.x_param_values,
3616                            global_options_set.x_param_values);
3617
3618   /* Use Neon to perform 64-bits operations rather than core
3619      registers.  */
3620   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3621   if (use_neon_for_64bits == 1)
3622      prefer_neon_for_64bits = true;
3623
3624   /* Use the alternative scheduling-pressure algorithm by default.  */
3625   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3626                          global_options.x_param_values,
3627                          global_options_set.x_param_values);
3628
3629   /* Look through ready list and all of queue for instructions
3630      relevant for L2 auto-prefetcher.  */
3631   int param_sched_autopref_queue_depth;
3632
3633   switch (current_tune->sched_autopref)
3634     {
3635     case tune_params::SCHED_AUTOPREF_OFF:
3636       param_sched_autopref_queue_depth = -1;
3637       break;
3638
3639     case tune_params::SCHED_AUTOPREF_RANK:
3640       param_sched_autopref_queue_depth = 0;
3641       break;
3642
3643     case tune_params::SCHED_AUTOPREF_FULL:
3644       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3645       break;
3646
3647     default:
3648       gcc_unreachable ();
3649     }
3650
3651   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3652                          param_sched_autopref_queue_depth,
3653                          global_options.x_param_values,
3654                          global_options_set.x_param_values);
3655
3656   /* Currently, for slow flash data, we just disable literal pools.  We also
3657      disable it for pure-code.  */
3658   if (target_slow_flash_data || target_pure_code)
3659     arm_disable_literal_pool = true;
3660
3661   if (use_cmse && !arm_arch_cmse)
3662     error ("target CPU does not support ARMv8-M Security Extensions");
3663
3664   /* Disable scheduling fusion by default if it's not armv7 processor
3665      or doesn't prefer ldrd/strd.  */
3666   if (flag_schedule_fusion == 2
3667       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3668     flag_schedule_fusion = 0;
3669
3670   /* Need to remember initial options before they are overriden.  */
3671   init_optimize = build_optimization_node (&global_options);
3672
3673   arm_option_override_internal (&global_options, &global_options_set);
3674   arm_option_check_internal (&global_options);
3675   arm_option_params_internal ();
3676
3677   /* Create the default target_options structure.  */
3678   target_option_default_node = target_option_current_node
3679     = build_target_option_node (&global_options);
3680
3681   /* Register global variables with the garbage collector.  */
3682   arm_add_gc_roots ();
3683
3684   /* Init initial mode for testing.  */
3685   thumb_flipper = TARGET_THUMB;
3686 }
3687
3688 static void
3689 arm_add_gc_roots (void)
3690 {
3691   gcc_obstack_init(&minipool_obstack);
3692   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3693 }
3694 \f
3695 /* A table of known ARM exception types.
3696    For use with the interrupt function attribute.  */
3697
3698 typedef struct
3699 {
3700   const char *const arg;
3701   const unsigned long return_value;
3702 }
3703 isr_attribute_arg;
3704
3705 static const isr_attribute_arg isr_attribute_args [] =
3706 {
3707   { "IRQ",   ARM_FT_ISR },
3708   { "irq",   ARM_FT_ISR },
3709   { "FIQ",   ARM_FT_FIQ },
3710   { "fiq",   ARM_FT_FIQ },
3711   { "ABORT", ARM_FT_ISR },
3712   { "abort", ARM_FT_ISR },
3713   { "ABORT", ARM_FT_ISR },
3714   { "abort", ARM_FT_ISR },
3715   { "UNDEF", ARM_FT_EXCEPTION },
3716   { "undef", ARM_FT_EXCEPTION },
3717   { "SWI",   ARM_FT_EXCEPTION },
3718   { "swi",   ARM_FT_EXCEPTION },
3719   { NULL,    ARM_FT_NORMAL }
3720 };
3721
3722 /* Returns the (interrupt) function type of the current
3723    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3724
3725 static unsigned long
3726 arm_isr_value (tree argument)
3727 {
3728   const isr_attribute_arg * ptr;
3729   const char *              arg;
3730
3731   if (!arm_arch_notm)
3732     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3733
3734   /* No argument - default to IRQ.  */
3735   if (argument == NULL_TREE)
3736     return ARM_FT_ISR;
3737
3738   /* Get the value of the argument.  */
3739   if (TREE_VALUE (argument) == NULL_TREE
3740       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3741     return ARM_FT_UNKNOWN;
3742
3743   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3744
3745   /* Check it against the list of known arguments.  */
3746   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3747     if (streq (arg, ptr->arg))
3748       return ptr->return_value;
3749
3750   /* An unrecognized interrupt type.  */
3751   return ARM_FT_UNKNOWN;
3752 }
3753
3754 /* Computes the type of the current function.  */
3755
3756 static unsigned long
3757 arm_compute_func_type (void)
3758 {
3759   unsigned long type = ARM_FT_UNKNOWN;
3760   tree a;
3761   tree attr;
3762
3763   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3764
3765   /* Decide if the current function is volatile.  Such functions
3766      never return, and many memory cycles can be saved by not storing
3767      register values that will never be needed again.  This optimization
3768      was added to speed up context switching in a kernel application.  */
3769   if (optimize > 0
3770       && (TREE_NOTHROW (current_function_decl)
3771           || !(flag_unwind_tables
3772                || (flag_exceptions
3773                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3774       && TREE_THIS_VOLATILE (current_function_decl))
3775     type |= ARM_FT_VOLATILE;
3776
3777   if (cfun->static_chain_decl != NULL)
3778     type |= ARM_FT_NESTED;
3779
3780   attr = DECL_ATTRIBUTES (current_function_decl);
3781
3782   a = lookup_attribute ("naked", attr);
3783   if (a != NULL_TREE)
3784     type |= ARM_FT_NAKED;
3785
3786   a = lookup_attribute ("isr", attr);
3787   if (a == NULL_TREE)
3788     a = lookup_attribute ("interrupt", attr);
3789
3790   if (a == NULL_TREE)
3791     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3792   else
3793     type |= arm_isr_value (TREE_VALUE (a));
3794
3795   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3796     type |= ARM_FT_CMSE_ENTRY;
3797
3798   return type;
3799 }
3800
3801 /* Returns the type of the current function.  */
3802
3803 unsigned long
3804 arm_current_func_type (void)
3805 {
3806   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3807     cfun->machine->func_type = arm_compute_func_type ();
3808
3809   return cfun->machine->func_type;
3810 }
3811
3812 bool
3813 arm_allocate_stack_slots_for_args (void)
3814 {
3815   /* Naked functions should not allocate stack slots for arguments.  */
3816   return !IS_NAKED (arm_current_func_type ());
3817 }
3818
3819 static bool
3820 arm_warn_func_return (tree decl)
3821 {
3822   /* Naked functions are implemented entirely in assembly, including the
3823      return sequence, so suppress warnings about this.  */
3824   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3825 }
3826
3827 \f
3828 /* Output assembler code for a block containing the constant parts
3829    of a trampoline, leaving space for the variable parts.
3830
3831    On the ARM, (if r8 is the static chain regnum, and remembering that
3832    referencing pc adds an offset of 8) the trampoline looks like:
3833            ldr          r8, [pc, #0]
3834            ldr          pc, [pc]
3835            .word        static chain value
3836            .word        function's address
3837    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3838
3839 static void
3840 arm_asm_trampoline_template (FILE *f)
3841 {
3842   fprintf (f, "\t.syntax unified\n");
3843
3844   if (TARGET_ARM)
3845     {
3846       fprintf (f, "\t.arm\n");
3847       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3848       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3849     }
3850   else if (TARGET_THUMB2)
3851     {
3852       fprintf (f, "\t.thumb\n");
3853       /* The Thumb-2 trampoline is similar to the arm implementation.
3854          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3855       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3856                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3857       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3858     }
3859   else
3860     {
3861       ASM_OUTPUT_ALIGN (f, 2);
3862       fprintf (f, "\t.code\t16\n");
3863       fprintf (f, ".Ltrampoline_start:\n");
3864       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3865       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3866       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3867       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3868       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3869       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3870     }
3871   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3872   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3873 }
3874
3875 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3876
3877 static void
3878 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3879 {
3880   rtx fnaddr, mem, a_tramp;
3881
3882   emit_block_move (m_tramp, assemble_trampoline_template (),
3883                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3884
3885   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3886   emit_move_insn (mem, chain_value);
3887
3888   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3889   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3890   emit_move_insn (mem, fnaddr);
3891
3892   a_tramp = XEXP (m_tramp, 0);
3893   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3894                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3895                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3896 }
3897
3898 /* Thumb trampolines should be entered in thumb mode, so set
3899    the bottom bit of the address.  */
3900
3901 static rtx
3902 arm_trampoline_adjust_address (rtx addr)
3903 {
3904   if (TARGET_THUMB)
3905     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3906                                 NULL, 0, OPTAB_LIB_WIDEN);
3907   return addr;
3908 }
3909 \f
3910 /* Return 1 if it is possible to return using a single instruction.
3911    If SIBLING is non-null, this is a test for a return before a sibling
3912    call.  SIBLING is the call insn, so we can examine its register usage.  */
3913
3914 int
3915 use_return_insn (int iscond, rtx sibling)
3916 {
3917   int regno;
3918   unsigned int func_type;
3919   unsigned long saved_int_regs;
3920   unsigned HOST_WIDE_INT stack_adjust;
3921   arm_stack_offsets *offsets;
3922
3923   /* Never use a return instruction before reload has run.  */
3924   if (!reload_completed)
3925     return 0;
3926
3927   func_type = arm_current_func_type ();
3928
3929   /* Naked, volatile and stack alignment functions need special
3930      consideration.  */
3931   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3932     return 0;
3933
3934   /* So do interrupt functions that use the frame pointer and Thumb
3935      interrupt functions.  */
3936   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3937     return 0;
3938
3939   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3940       && !optimize_function_for_size_p (cfun))
3941     return 0;
3942
3943   offsets = arm_get_frame_offsets ();
3944   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3945
3946   /* As do variadic functions.  */
3947   if (crtl->args.pretend_args_size
3948       || cfun->machine->uses_anonymous_args
3949       /* Or if the function calls __builtin_eh_return () */
3950       || crtl->calls_eh_return
3951       /* Or if the function calls alloca */
3952       || cfun->calls_alloca
3953       /* Or if there is a stack adjustment.  However, if the stack pointer
3954          is saved on the stack, we can use a pre-incrementing stack load.  */
3955       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3956                                  && stack_adjust == 4))
3957       /* Or if the static chain register was saved above the frame, under the
3958          assumption that the stack pointer isn't saved on the stack.  */
3959       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3960           && arm_compute_static_chain_stack_bytes() != 0))
3961     return 0;
3962
3963   saved_int_regs = offsets->saved_regs_mask;
3964
3965   /* Unfortunately, the insn
3966
3967        ldmib sp, {..., sp, ...}
3968
3969      triggers a bug on most SA-110 based devices, such that the stack
3970      pointer won't be correctly restored if the instruction takes a
3971      page fault.  We work around this problem by popping r3 along with
3972      the other registers, since that is never slower than executing
3973      another instruction.
3974
3975      We test for !arm_arch5 here, because code for any architecture
3976      less than this could potentially be run on one of the buggy
3977      chips.  */
3978   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3979     {
3980       /* Validate that r3 is a call-clobbered register (always true in
3981          the default abi) ...  */
3982       if (!call_used_regs[3])
3983         return 0;
3984
3985       /* ... that it isn't being used for a return value ... */
3986       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3987         return 0;
3988
3989       /* ... or for a tail-call argument ...  */
3990       if (sibling)
3991         {
3992           gcc_assert (CALL_P (sibling));
3993
3994           if (find_regno_fusage (sibling, USE, 3))
3995             return 0;
3996         }
3997
3998       /* ... and that there are no call-saved registers in r0-r2
3999          (always true in the default ABI).  */
4000       if (saved_int_regs & 0x7)
4001         return 0;
4002     }
4003
4004   /* Can't be done if interworking with Thumb, and any registers have been
4005      stacked.  */
4006   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4007     return 0;
4008
4009   /* On StrongARM, conditional returns are expensive if they aren't
4010      taken and multiple registers have been stacked.  */
4011   if (iscond && arm_tune_strongarm)
4012     {
4013       /* Conditional return when just the LR is stored is a simple
4014          conditional-load instruction, that's not expensive.  */
4015       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4016         return 0;
4017
4018       if (flag_pic
4019           && arm_pic_register != INVALID_REGNUM
4020           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4021         return 0;
4022     }
4023
4024   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4025      several instructions if anything needs to be popped.  */
4026   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4027     return 0;
4028
4029   /* If there are saved registers but the LR isn't saved, then we need
4030      two instructions for the return.  */
4031   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4032     return 0;
4033
4034   /* Can't be done if any of the VFP regs are pushed,
4035      since this also requires an insn.  */
4036   if (TARGET_HARD_FLOAT)
4037     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4038       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4039         return 0;
4040
4041   if (TARGET_REALLY_IWMMXT)
4042     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4043       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4044         return 0;
4045
4046   return 1;
4047 }
4048
4049 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4050    shrink-wrapping if possible.  This is the case if we need to emit a
4051    prologue, which we can test by looking at the offsets.  */
4052 bool
4053 use_simple_return_p (void)
4054 {
4055   arm_stack_offsets *offsets;
4056
4057   /* Note this function can be called before or after reload.  */
4058   if (!reload_completed)
4059     arm_compute_frame_layout ();
4060
4061   offsets = arm_get_frame_offsets ();
4062   return offsets->outgoing_args != 0;
4063 }
4064
4065 /* Return TRUE if int I is a valid immediate ARM constant.  */
4066
4067 int
4068 const_ok_for_arm (HOST_WIDE_INT i)
4069 {
4070   int lowbit;
4071
4072   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4073      be all zero, or all one.  */
4074   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4075       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4076           != ((~(unsigned HOST_WIDE_INT) 0)
4077               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4078     return FALSE;
4079
4080   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4081
4082   /* Fast return for 0 and small values.  We must do this for zero, since
4083      the code below can't handle that one case.  */
4084   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4085     return TRUE;
4086
4087   /* Get the number of trailing zeros.  */
4088   lowbit = ffs((int) i) - 1;
4089
4090   /* Only even shifts are allowed in ARM mode so round down to the
4091      nearest even number.  */
4092   if (TARGET_ARM)
4093     lowbit &= ~1;
4094
4095   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4096     return TRUE;
4097
4098   if (TARGET_ARM)
4099     {
4100       /* Allow rotated constants in ARM mode.  */
4101       if (lowbit <= 4
4102            && ((i & ~0xc000003f) == 0
4103                || (i & ~0xf000000f) == 0
4104                || (i & ~0xfc000003) == 0))
4105         return TRUE;
4106     }
4107   else if (TARGET_THUMB2)
4108     {
4109       HOST_WIDE_INT v;
4110
4111       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4112       v = i & 0xff;
4113       v |= v << 16;
4114       if (i == v || i == (v | (v << 8)))
4115         return TRUE;
4116
4117       /* Allow repeated pattern 0xXY00XY00.  */
4118       v = i & 0xff00;
4119       v |= v << 16;
4120       if (i == v)
4121         return TRUE;
4122     }
4123   else if (TARGET_HAVE_MOVT)
4124     {
4125       /* Thumb-1 Targets with MOVT.  */
4126       if (i > 0xffff)
4127         return FALSE;
4128       else
4129         return TRUE;
4130     }
4131
4132   return FALSE;
4133 }
4134
4135 /* Return true if I is a valid constant for the operation CODE.  */
4136 int
4137 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4138 {
4139   if (const_ok_for_arm (i))
4140     return 1;
4141
4142   switch (code)
4143     {
4144     case SET:
4145       /* See if we can use movw.  */
4146       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4147         return 1;
4148       else
4149         /* Otherwise, try mvn.  */
4150         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4151
4152     case PLUS:
4153       /* See if we can use addw or subw.  */
4154       if (TARGET_THUMB2
4155           && ((i & 0xfffff000) == 0
4156               || ((-i) & 0xfffff000) == 0))
4157         return 1;
4158       /* Fall through.  */
4159     case COMPARE:
4160     case EQ:
4161     case NE:
4162     case GT:
4163     case LE:
4164     case LT:
4165     case GE:
4166     case GEU:
4167     case LTU:
4168     case GTU:
4169     case LEU:
4170     case UNORDERED:
4171     case ORDERED:
4172     case UNEQ:
4173     case UNGE:
4174     case UNLT:
4175     case UNGT:
4176     case UNLE:
4177       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4178
4179     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4180     case XOR:
4181       return 0;
4182
4183     case IOR:
4184       if (TARGET_THUMB2)
4185         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4186       return 0;
4187
4188     case AND:
4189       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4190
4191     default:
4192       gcc_unreachable ();
4193     }
4194 }
4195
4196 /* Return true if I is a valid di mode constant for the operation CODE.  */
4197 int
4198 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4199 {
4200   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4201   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4202   rtx hi = GEN_INT (hi_val);
4203   rtx lo = GEN_INT (lo_val);
4204
4205   if (TARGET_THUMB1)
4206     return 0;
4207
4208   switch (code)
4209     {
4210     case AND:
4211     case IOR:
4212     case XOR:
4213       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4214               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4215     case PLUS:
4216       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4217
4218     default:
4219       return 0;
4220     }
4221 }
4222
4223 /* Emit a sequence of insns to handle a large constant.
4224    CODE is the code of the operation required, it can be any of SET, PLUS,
4225    IOR, AND, XOR, MINUS;
4226    MODE is the mode in which the operation is being performed;
4227    VAL is the integer to operate on;
4228    SOURCE is the other operand (a register, or a null-pointer for SET);
4229    SUBTARGETS means it is safe to create scratch registers if that will
4230    either produce a simpler sequence, or we will want to cse the values.
4231    Return value is the number of insns emitted.  */
4232
4233 /* ??? Tweak this for thumb2.  */
4234 int
4235 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4236                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4237 {
4238   rtx cond;
4239
4240   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4241     cond = COND_EXEC_TEST (PATTERN (insn));
4242   else
4243     cond = NULL_RTX;
4244
4245   if (subtargets || code == SET
4246       || (REG_P (target) && REG_P (source)
4247           && REGNO (target) != REGNO (source)))
4248     {
4249       /* After arm_reorg has been called, we can't fix up expensive
4250          constants by pushing them into memory so we must synthesize
4251          them in-line, regardless of the cost.  This is only likely to
4252          be more costly on chips that have load delay slots and we are
4253          compiling without running the scheduler (so no splitting
4254          occurred before the final instruction emission).
4255
4256          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4257       */
4258       if (!cfun->machine->after_arm_reorg
4259           && !cond
4260           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4261                                 1, 0)
4262               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4263                  + (code != SET))))
4264         {
4265           if (code == SET)
4266             {
4267               /* Currently SET is the only monadic value for CODE, all
4268                  the rest are diadic.  */
4269               if (TARGET_USE_MOVT)
4270                 arm_emit_movpair (target, GEN_INT (val));
4271               else
4272                 emit_set_insn (target, GEN_INT (val));
4273
4274               return 1;
4275             }
4276           else
4277             {
4278               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4279
4280               if (TARGET_USE_MOVT)
4281                 arm_emit_movpair (temp, GEN_INT (val));
4282               else
4283                 emit_set_insn (temp, GEN_INT (val));
4284
4285               /* For MINUS, the value is subtracted from, since we never
4286                  have subtraction of a constant.  */
4287               if (code == MINUS)
4288                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4289               else
4290                 emit_set_insn (target,
4291                                gen_rtx_fmt_ee (code, mode, source, temp));
4292               return 2;
4293             }
4294         }
4295     }
4296
4297   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4298                            1);
4299 }
4300
4301 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4302    ARM/THUMB2 immediates, and add up to VAL.
4303    Thr function return value gives the number of insns required.  */
4304 static int
4305 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4306                             struct four_ints *return_sequence)
4307 {
4308   int best_consecutive_zeros = 0;
4309   int i;
4310   int best_start = 0;
4311   int insns1, insns2;
4312   struct four_ints tmp_sequence;
4313
4314   /* If we aren't targeting ARM, the best place to start is always at
4315      the bottom, otherwise look more closely.  */
4316   if (TARGET_ARM)
4317     {
4318       for (i = 0; i < 32; i += 2)
4319         {
4320           int consecutive_zeros = 0;
4321
4322           if (!(val & (3 << i)))
4323             {
4324               while ((i < 32) && !(val & (3 << i)))
4325                 {
4326                   consecutive_zeros += 2;
4327                   i += 2;
4328                 }
4329               if (consecutive_zeros > best_consecutive_zeros)
4330                 {
4331                   best_consecutive_zeros = consecutive_zeros;
4332                   best_start = i - consecutive_zeros;
4333                 }
4334               i -= 2;
4335             }
4336         }
4337     }
4338
4339   /* So long as it won't require any more insns to do so, it's
4340      desirable to emit a small constant (in bits 0...9) in the last
4341      insn.  This way there is more chance that it can be combined with
4342      a later addressing insn to form a pre-indexed load or store
4343      operation.  Consider:
4344
4345            *((volatile int *)0xe0000100) = 1;
4346            *((volatile int *)0xe0000110) = 2;
4347
4348      We want this to wind up as:
4349
4350             mov rA, #0xe0000000
4351             mov rB, #1
4352             str rB, [rA, #0x100]
4353             mov rB, #2
4354             str rB, [rA, #0x110]
4355
4356      rather than having to synthesize both large constants from scratch.
4357
4358      Therefore, we calculate how many insns would be required to emit
4359      the constant starting from `best_start', and also starting from
4360      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4361      yield a shorter sequence, we may as well use zero.  */
4362   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4363   if (best_start != 0
4364       && ((HOST_WIDE_INT_1U << best_start) < val))
4365     {
4366       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4367       if (insns2 <= insns1)
4368         {
4369           *return_sequence = tmp_sequence;
4370           insns1 = insns2;
4371         }
4372     }
4373
4374   return insns1;
4375 }
4376
4377 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4378 static int
4379 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4380                              struct four_ints *return_sequence, int i)
4381 {
4382   int remainder = val & 0xffffffff;
4383   int insns = 0;
4384
4385   /* Try and find a way of doing the job in either two or three
4386      instructions.
4387
4388      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4389      location.  We start at position I.  This may be the MSB, or
4390      optimial_immediate_sequence may have positioned it at the largest block
4391      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4392      wrapping around to the top of the word when we drop off the bottom.
4393      In the worst case this code should produce no more than four insns.
4394
4395      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4396      constants, shifted to any arbitrary location.  We should always start
4397      at the MSB.  */
4398   do
4399     {
4400       int end;
4401       unsigned int b1, b2, b3, b4;
4402       unsigned HOST_WIDE_INT result;
4403       int loc;
4404
4405       gcc_assert (insns < 4);
4406
4407       if (i <= 0)
4408         i += 32;
4409
4410       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4411       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4412         {
4413           loc = i;
4414           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4415             /* We can use addw/subw for the last 12 bits.  */
4416             result = remainder;
4417           else
4418             {
4419               /* Use an 8-bit shifted/rotated immediate.  */
4420               end = i - 8;
4421               if (end < 0)
4422                 end += 32;
4423               result = remainder & ((0x0ff << end)
4424                                    | ((i < end) ? (0xff >> (32 - end))
4425                                                 : 0));
4426               i -= 8;
4427             }
4428         }
4429       else
4430         {
4431           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4432              arbitrary shifts.  */
4433           i -= TARGET_ARM ? 2 : 1;
4434           continue;
4435         }
4436
4437       /* Next, see if we can do a better job with a thumb2 replicated
4438          constant.
4439
4440          We do it this way around to catch the cases like 0x01F001E0 where
4441          two 8-bit immediates would work, but a replicated constant would
4442          make it worse.
4443
4444          TODO: 16-bit constants that don't clear all the bits, but still win.
4445          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4446       if (TARGET_THUMB2)
4447         {
4448           b1 = (remainder & 0xff000000) >> 24;
4449           b2 = (remainder & 0x00ff0000) >> 16;
4450           b3 = (remainder & 0x0000ff00) >> 8;
4451           b4 = remainder & 0xff;
4452
4453           if (loc > 24)
4454             {
4455               /* The 8-bit immediate already found clears b1 (and maybe b2),
4456                  but must leave b3 and b4 alone.  */
4457
4458               /* First try to find a 32-bit replicated constant that clears
4459                  almost everything.  We can assume that we can't do it in one,
4460                  or else we wouldn't be here.  */
4461               unsigned int tmp = b1 & b2 & b3 & b4;
4462               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4463                                   + (tmp << 24);
4464               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4465                                             + (tmp == b3) + (tmp == b4);
4466               if (tmp
4467                   && (matching_bytes >= 3
4468                       || (matching_bytes == 2
4469                           && const_ok_for_op (remainder & ~tmp2, code))))
4470                 {
4471                   /* At least 3 of the bytes match, and the fourth has at
4472                      least as many bits set, or two of the bytes match
4473                      and it will only require one more insn to finish.  */
4474                   result = tmp2;
4475                   i = tmp != b1 ? 32
4476                       : tmp != b2 ? 24
4477                       : tmp != b3 ? 16
4478                       : 8;
4479                 }
4480
4481               /* Second, try to find a 16-bit replicated constant that can
4482                  leave three of the bytes clear.  If b2 or b4 is already
4483                  zero, then we can.  If the 8-bit from above would not
4484                  clear b2 anyway, then we still win.  */
4485               else if (b1 == b3 && (!b2 || !b4
4486                                || (remainder & 0x00ff0000 & ~result)))
4487                 {
4488                   result = remainder & 0xff00ff00;
4489                   i = 24;
4490                 }
4491             }
4492           else if (loc > 16)
4493             {
4494               /* The 8-bit immediate already found clears b2 (and maybe b3)
4495                  and we don't get here unless b1 is alredy clear, but it will
4496                  leave b4 unchanged.  */
4497
4498               /* If we can clear b2 and b4 at once, then we win, since the
4499                  8-bits couldn't possibly reach that far.  */
4500               if (b2 == b4)
4501                 {
4502                   result = remainder & 0x00ff00ff;
4503                   i = 16;
4504                 }
4505             }
4506         }
4507
4508       return_sequence->i[insns++] = result;
4509       remainder &= ~result;
4510
4511       if (code == SET || code == MINUS)
4512         code = PLUS;
4513     }
4514   while (remainder);
4515
4516   return insns;
4517 }
4518
4519 /* Emit an instruction with the indicated PATTERN.  If COND is
4520    non-NULL, conditionalize the execution of the instruction on COND
4521    being true.  */
4522
4523 static void
4524 emit_constant_insn (rtx cond, rtx pattern)
4525 {
4526   if (cond)
4527     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4528   emit_insn (pattern);
4529 }
4530
4531 /* As above, but extra parameter GENERATE which, if clear, suppresses
4532    RTL generation.  */
4533
4534 static int
4535 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4536                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4537                   int subtargets, int generate)
4538 {
4539   int can_invert = 0;
4540   int can_negate = 0;
4541   int final_invert = 0;
4542   int i;
4543   int set_sign_bit_copies = 0;
4544   int clear_sign_bit_copies = 0;
4545   int clear_zero_bit_copies = 0;
4546   int set_zero_bit_copies = 0;
4547   int insns = 0, neg_insns, inv_insns;
4548   unsigned HOST_WIDE_INT temp1, temp2;
4549   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4550   struct four_ints *immediates;
4551   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4552
4553   /* Find out which operations are safe for a given CODE.  Also do a quick
4554      check for degenerate cases; these can occur when DImode operations
4555      are split.  */
4556   switch (code)
4557     {
4558     case SET:
4559       can_invert = 1;
4560       break;
4561
4562     case PLUS:
4563       can_negate = 1;
4564       break;
4565
4566     case IOR:
4567       if (remainder == 0xffffffff)
4568         {
4569           if (generate)
4570             emit_constant_insn (cond,
4571                                 gen_rtx_SET (target,
4572                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4573           return 1;
4574         }
4575
4576       if (remainder == 0)
4577         {
4578           if (reload_completed && rtx_equal_p (target, source))
4579             return 0;
4580
4581           if (generate)
4582             emit_constant_insn (cond, gen_rtx_SET (target, source));
4583           return 1;
4584         }
4585       break;
4586
4587     case AND:
4588       if (remainder == 0)
4589         {
4590           if (generate)
4591             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4592           return 1;
4593         }
4594       if (remainder == 0xffffffff)
4595         {
4596           if (reload_completed && rtx_equal_p (target, source))
4597             return 0;
4598           if (generate)
4599             emit_constant_insn (cond, gen_rtx_SET (target, source));
4600           return 1;
4601         }
4602       can_invert = 1;
4603       break;
4604
4605     case XOR:
4606       if (remainder == 0)
4607         {
4608           if (reload_completed && rtx_equal_p (target, source))
4609             return 0;
4610           if (generate)
4611             emit_constant_insn (cond, gen_rtx_SET (target, source));
4612           return 1;
4613         }
4614
4615       if (remainder == 0xffffffff)
4616         {
4617           if (generate)
4618             emit_constant_insn (cond,
4619                                 gen_rtx_SET (target,
4620                                              gen_rtx_NOT (mode, source)));
4621           return 1;
4622         }
4623       final_invert = 1;
4624       break;
4625
4626     case MINUS:
4627       /* We treat MINUS as (val - source), since (source - val) is always
4628          passed as (source + (-val)).  */
4629       if (remainder == 0)
4630         {
4631           if (generate)
4632             emit_constant_insn (cond,
4633                                 gen_rtx_SET (target,
4634                                              gen_rtx_NEG (mode, source)));
4635           return 1;
4636         }
4637       if (const_ok_for_arm (val))
4638         {
4639           if (generate)
4640             emit_constant_insn (cond,
4641                                 gen_rtx_SET (target,
4642                                              gen_rtx_MINUS (mode, GEN_INT (val),
4643                                                             source)));
4644           return 1;
4645         }
4646
4647       break;
4648
4649     default:
4650       gcc_unreachable ();
4651     }
4652
4653   /* If we can do it in one insn get out quickly.  */
4654   if (const_ok_for_op (val, code))
4655     {
4656       if (generate)
4657         emit_constant_insn (cond,
4658                             gen_rtx_SET (target,
4659                                          (source
4660                                           ? gen_rtx_fmt_ee (code, mode, source,
4661                                                             GEN_INT (val))
4662                                           : GEN_INT (val))));
4663       return 1;
4664     }
4665
4666   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4667      insn.  */
4668   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4669       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4670     {
4671       if (generate)
4672         {
4673           if (mode == SImode && i == 16)
4674             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4675                smaller insn.  */
4676             emit_constant_insn (cond,
4677                                 gen_zero_extendhisi2
4678                                 (target, gen_lowpart (HImode, source)));
4679           else
4680             /* Extz only supports SImode, but we can coerce the operands
4681                into that mode.  */
4682             emit_constant_insn (cond,
4683                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4684                                               gen_lowpart (SImode, source),
4685                                               GEN_INT (i), const0_rtx));
4686         }
4687
4688       return 1;
4689     }
4690
4691   /* Calculate a few attributes that may be useful for specific
4692      optimizations.  */
4693   /* Count number of leading zeros.  */
4694   for (i = 31; i >= 0; i--)
4695     {
4696       if ((remainder & (1 << i)) == 0)
4697         clear_sign_bit_copies++;
4698       else
4699         break;
4700     }
4701
4702   /* Count number of leading 1's.  */
4703   for (i = 31; i >= 0; i--)
4704     {
4705       if ((remainder & (1 << i)) != 0)
4706         set_sign_bit_copies++;
4707       else
4708         break;
4709     }
4710
4711   /* Count number of trailing zero's.  */
4712   for (i = 0; i <= 31; i++)
4713     {
4714       if ((remainder & (1 << i)) == 0)
4715         clear_zero_bit_copies++;
4716       else
4717         break;
4718     }
4719
4720   /* Count number of trailing 1's.  */
4721   for (i = 0; i <= 31; i++)
4722     {
4723       if ((remainder & (1 << i)) != 0)
4724         set_zero_bit_copies++;
4725       else
4726         break;
4727     }
4728
4729   switch (code)
4730     {
4731     case SET:
4732       /* See if we can do this by sign_extending a constant that is known
4733          to be negative.  This is a good, way of doing it, since the shift
4734          may well merge into a subsequent insn.  */
4735       if (set_sign_bit_copies > 1)
4736         {
4737           if (const_ok_for_arm
4738               (temp1 = ARM_SIGN_EXTEND (remainder
4739                                         << (set_sign_bit_copies - 1))))
4740             {
4741               if (generate)
4742                 {
4743                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4744                   emit_constant_insn (cond,
4745                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4746                   emit_constant_insn (cond,
4747                                       gen_ashrsi3 (target, new_src,
4748                                                    GEN_INT (set_sign_bit_copies - 1)));
4749                 }
4750               return 2;
4751             }
4752           /* For an inverted constant, we will need to set the low bits,
4753              these will be shifted out of harm's way.  */
4754           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4755           if (const_ok_for_arm (~temp1))
4756             {
4757               if (generate)
4758                 {
4759                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4760                   emit_constant_insn (cond,
4761                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4762                   emit_constant_insn (cond,
4763                                       gen_ashrsi3 (target, new_src,
4764                                                    GEN_INT (set_sign_bit_copies - 1)));
4765                 }
4766               return 2;
4767             }
4768         }
4769
4770       /* See if we can calculate the value as the difference between two
4771          valid immediates.  */
4772       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4773         {
4774           int topshift = clear_sign_bit_copies & ~1;
4775
4776           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4777                                    & (0xff000000 >> topshift));
4778
4779           /* If temp1 is zero, then that means the 9 most significant
4780              bits of remainder were 1 and we've caused it to overflow.
4781              When topshift is 0 we don't need to do anything since we
4782              can borrow from 'bit 32'.  */
4783           if (temp1 == 0 && topshift != 0)
4784             temp1 = 0x80000000 >> (topshift - 1);
4785
4786           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4787
4788           if (const_ok_for_arm (temp2))
4789             {
4790               if (generate)
4791                 {
4792                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4793                   emit_constant_insn (cond,
4794                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4795                   emit_constant_insn (cond,
4796                                       gen_addsi3 (target, new_src,
4797                                                   GEN_INT (-temp2)));
4798                 }
4799
4800               return 2;
4801             }
4802         }
4803
4804       /* See if we can generate this by setting the bottom (or the top)
4805          16 bits, and then shifting these into the other half of the
4806          word.  We only look for the simplest cases, to do more would cost
4807          too much.  Be careful, however, not to generate this when the
4808          alternative would take fewer insns.  */
4809       if (val & 0xffff0000)
4810         {
4811           temp1 = remainder & 0xffff0000;
4812           temp2 = remainder & 0x0000ffff;
4813
4814           /* Overlaps outside this range are best done using other methods.  */
4815           for (i = 9; i < 24; i++)
4816             {
4817               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4818                   && !const_ok_for_arm (temp2))
4819                 {
4820                   rtx new_src = (subtargets
4821                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4822                                  : target);
4823                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4824                                             source, subtargets, generate);
4825                   source = new_src;
4826                   if (generate)
4827                     emit_constant_insn
4828                       (cond,
4829                        gen_rtx_SET
4830                        (target,
4831                         gen_rtx_IOR (mode,
4832                                      gen_rtx_ASHIFT (mode, source,
4833                                                      GEN_INT (i)),
4834                                      source)));
4835                   return insns + 1;
4836                 }
4837             }
4838
4839           /* Don't duplicate cases already considered.  */
4840           for (i = 17; i < 24; i++)
4841             {
4842               if (((temp1 | (temp1 >> i)) == remainder)
4843                   && !const_ok_for_arm (temp1))
4844                 {
4845                   rtx new_src = (subtargets
4846                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4847                                  : target);
4848                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4849                                             source, subtargets, generate);
4850                   source = new_src;
4851                   if (generate)
4852                     emit_constant_insn
4853                       (cond,
4854                        gen_rtx_SET (target,
4855                                     gen_rtx_IOR
4856                                     (mode,
4857                                      gen_rtx_LSHIFTRT (mode, source,
4858                                                        GEN_INT (i)),
4859                                      source)));
4860                   return insns + 1;
4861                 }
4862             }
4863         }
4864       break;
4865
4866     case IOR:
4867     case XOR:
4868       /* If we have IOR or XOR, and the constant can be loaded in a
4869          single instruction, and we can find a temporary to put it in,
4870          then this can be done in two instructions instead of 3-4.  */
4871       if (subtargets
4872           /* TARGET can't be NULL if SUBTARGETS is 0 */
4873           || (reload_completed && !reg_mentioned_p (target, source)))
4874         {
4875           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4876             {
4877               if (generate)
4878                 {
4879                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4880
4881                   emit_constant_insn (cond,
4882                                       gen_rtx_SET (sub, GEN_INT (val)));
4883                   emit_constant_insn (cond,
4884                                       gen_rtx_SET (target,
4885                                                    gen_rtx_fmt_ee (code, mode,
4886                                                                    source, sub)));
4887                 }
4888               return 2;
4889             }
4890         }
4891
4892       if (code == XOR)
4893         break;
4894
4895       /*  Convert.
4896           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4897                              and the remainder 0s for e.g. 0xfff00000)
4898           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4899
4900           This can be done in 2 instructions by using shifts with mov or mvn.
4901           e.g. for
4902           x = x | 0xfff00000;
4903           we generate.
4904           mvn   r0, r0, asl #12
4905           mvn   r0, r0, lsr #12  */
4906       if (set_sign_bit_copies > 8
4907           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4908         {
4909           if (generate)
4910             {
4911               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4912               rtx shift = GEN_INT (set_sign_bit_copies);
4913
4914               emit_constant_insn
4915                 (cond,
4916                  gen_rtx_SET (sub,
4917                               gen_rtx_NOT (mode,
4918                                            gen_rtx_ASHIFT (mode,
4919                                                            source,
4920                                                            shift))));
4921               emit_constant_insn
4922                 (cond,
4923                  gen_rtx_SET (target,
4924                               gen_rtx_NOT (mode,
4925                                            gen_rtx_LSHIFTRT (mode, sub,
4926                                                              shift))));
4927             }
4928           return 2;
4929         }
4930
4931       /* Convert
4932           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4933            to
4934           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4935
4936           For eg. r0 = r0 | 0xfff
4937                mvn      r0, r0, lsr #12
4938                mvn      r0, r0, asl #12
4939
4940       */
4941       if (set_zero_bit_copies > 8
4942           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4943         {
4944           if (generate)
4945             {
4946               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4947               rtx shift = GEN_INT (set_zero_bit_copies);
4948
4949               emit_constant_insn
4950                 (cond,
4951                  gen_rtx_SET (sub,
4952                               gen_rtx_NOT (mode,
4953                                            gen_rtx_LSHIFTRT (mode,
4954                                                              source,
4955                                                              shift))));
4956               emit_constant_insn
4957                 (cond,
4958                  gen_rtx_SET (target,
4959                               gen_rtx_NOT (mode,
4960                                            gen_rtx_ASHIFT (mode, sub,
4961                                                            shift))));
4962             }
4963           return 2;
4964         }
4965
4966       /* This will never be reached for Thumb2 because orn is a valid
4967          instruction. This is for Thumb1 and the ARM 32 bit cases.
4968
4969          x = y | constant (such that ~constant is a valid constant)
4970          Transform this to
4971          x = ~(~y & ~constant).
4972       */
4973       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4974         {
4975           if (generate)
4976             {
4977               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4978               emit_constant_insn (cond,
4979                                   gen_rtx_SET (sub,
4980                                                gen_rtx_NOT (mode, source)));
4981               source = sub;
4982               if (subtargets)
4983                 sub = gen_reg_rtx (mode);
4984               emit_constant_insn (cond,
4985                                   gen_rtx_SET (sub,
4986                                                gen_rtx_AND (mode, source,
4987                                                             GEN_INT (temp1))));
4988               emit_constant_insn (cond,
4989                                   gen_rtx_SET (target,
4990                                                gen_rtx_NOT (mode, sub)));
4991             }
4992           return 3;
4993         }
4994       break;
4995
4996     case AND:
4997       /* See if two shifts will do 2 or more insn's worth of work.  */
4998       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4999         {
5000           HOST_WIDE_INT shift_mask = ((0xffffffff
5001                                        << (32 - clear_sign_bit_copies))
5002                                       & 0xffffffff);
5003
5004           if ((remainder | shift_mask) != 0xffffffff)
5005             {
5006               HOST_WIDE_INT new_val
5007                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5008
5009               if (generate)
5010                 {
5011                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5012                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5013                                             new_src, source, subtargets, 1);
5014                   source = new_src;
5015                 }
5016               else
5017                 {
5018                   rtx targ = subtargets ? NULL_RTX : target;
5019                   insns = arm_gen_constant (AND, mode, cond, new_val,
5020                                             targ, source, subtargets, 0);
5021                 }
5022             }
5023
5024           if (generate)
5025             {
5026               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5027               rtx shift = GEN_INT (clear_sign_bit_copies);
5028
5029               emit_insn (gen_ashlsi3 (new_src, source, shift));
5030               emit_insn (gen_lshrsi3 (target, new_src, shift));
5031             }
5032
5033           return insns + 2;
5034         }
5035
5036       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5037         {
5038           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5039
5040           if ((remainder | shift_mask) != 0xffffffff)
5041             {
5042               HOST_WIDE_INT new_val
5043                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5044               if (generate)
5045                 {
5046                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5047
5048                   insns = arm_gen_constant (AND, mode, cond, new_val,
5049                                             new_src, source, subtargets, 1);
5050                   source = new_src;
5051                 }
5052               else
5053                 {
5054                   rtx targ = subtargets ? NULL_RTX : target;
5055
5056                   insns = arm_gen_constant (AND, mode, cond, new_val,
5057                                             targ, source, subtargets, 0);
5058                 }
5059             }
5060
5061           if (generate)
5062             {
5063               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5064               rtx shift = GEN_INT (clear_zero_bit_copies);
5065
5066               emit_insn (gen_lshrsi3 (new_src, source, shift));
5067               emit_insn (gen_ashlsi3 (target, new_src, shift));
5068             }
5069
5070           return insns + 2;
5071         }
5072
5073       break;
5074
5075     default:
5076       break;
5077     }
5078
5079   /* Calculate what the instruction sequences would be if we generated it
5080      normally, negated, or inverted.  */
5081   if (code == AND)
5082     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5083     insns = 99;
5084   else
5085     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5086
5087   if (can_negate)
5088     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5089                                             &neg_immediates);
5090   else
5091     neg_insns = 99;
5092
5093   if (can_invert || final_invert)
5094     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5095                                             &inv_immediates);
5096   else
5097     inv_insns = 99;
5098
5099   immediates = &pos_immediates;
5100
5101   /* Is the negated immediate sequence more efficient?  */
5102   if (neg_insns < insns && neg_insns <= inv_insns)
5103     {
5104       insns = neg_insns;
5105       immediates = &neg_immediates;
5106     }
5107   else
5108     can_negate = 0;
5109
5110   /* Is the inverted immediate sequence more efficient?
5111      We must allow for an extra NOT instruction for XOR operations, although
5112      there is some chance that the final 'mvn' will get optimized later.  */
5113   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5114     {
5115       insns = inv_insns;
5116       immediates = &inv_immediates;
5117     }
5118   else
5119     {
5120       can_invert = 0;
5121       final_invert = 0;
5122     }
5123
5124   /* Now output the chosen sequence as instructions.  */
5125   if (generate)
5126     {
5127       for (i = 0; i < insns; i++)
5128         {
5129           rtx new_src, temp1_rtx;
5130
5131           temp1 = immediates->i[i];
5132
5133           if (code == SET || code == MINUS)
5134             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5135           else if ((final_invert || i < (insns - 1)) && subtargets)
5136             new_src = gen_reg_rtx (mode);
5137           else
5138             new_src = target;
5139
5140           if (can_invert)
5141             temp1 = ~temp1;
5142           else if (can_negate)
5143             temp1 = -temp1;
5144
5145           temp1 = trunc_int_for_mode (temp1, mode);
5146           temp1_rtx = GEN_INT (temp1);
5147
5148           if (code == SET)
5149             ;
5150           else if (code == MINUS)
5151             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5152           else
5153             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5154
5155           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5156           source = new_src;
5157
5158           if (code == SET)
5159             {
5160               can_negate = can_invert;
5161               can_invert = 0;
5162               code = PLUS;
5163             }
5164           else if (code == MINUS)
5165             code = PLUS;
5166         }
5167     }
5168
5169   if (final_invert)
5170     {
5171       if (generate)
5172         emit_constant_insn (cond, gen_rtx_SET (target,
5173                                                gen_rtx_NOT (mode, source)));
5174       insns++;
5175     }
5176
5177   return insns;
5178 }
5179
5180 /* Canonicalize a comparison so that we are more likely to recognize it.
5181    This can be done for a few constant compares, where we can make the
5182    immediate value easier to load.  */
5183
5184 static void
5185 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5186                              bool op0_preserve_value)
5187 {
5188   machine_mode mode;
5189   unsigned HOST_WIDE_INT i, maxval;
5190
5191   mode = GET_MODE (*op0);
5192   if (mode == VOIDmode)
5193     mode = GET_MODE (*op1);
5194
5195   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5196
5197   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5198      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5199      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5200      for GTU/LEU in Thumb mode.  */
5201   if (mode == DImode)
5202     {
5203
5204       if (*code == GT || *code == LE
5205           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5206         {
5207           /* Missing comparison.  First try to use an available
5208              comparison.  */
5209           if (CONST_INT_P (*op1))
5210             {
5211               i = INTVAL (*op1);
5212               switch (*code)
5213                 {
5214                 case GT:
5215                 case LE:
5216                   if (i != maxval
5217                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5218                     {
5219                       *op1 = GEN_INT (i + 1);
5220                       *code = *code == GT ? GE : LT;
5221                       return;
5222                     }
5223                   break;
5224                 case GTU:
5225                 case LEU:
5226                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5227                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5228                     {
5229                       *op1 = GEN_INT (i + 1);
5230                       *code = *code == GTU ? GEU : LTU;
5231                       return;
5232                     }
5233                   break;
5234                 default:
5235                   gcc_unreachable ();
5236                 }
5237             }
5238
5239           /* If that did not work, reverse the condition.  */
5240           if (!op0_preserve_value)
5241             {
5242               std::swap (*op0, *op1);
5243               *code = (int)swap_condition ((enum rtx_code)*code);
5244             }
5245         }
5246       return;
5247     }
5248
5249   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5250      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5251      to facilitate possible combining with a cmp into 'ands'.  */
5252   if (mode == SImode
5253       && GET_CODE (*op0) == ZERO_EXTEND
5254       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5255       && GET_MODE (XEXP (*op0, 0)) == QImode
5256       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5257       && subreg_lowpart_p (XEXP (*op0, 0))
5258       && *op1 == const0_rtx)
5259     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5260                         GEN_INT (255));
5261
5262   /* Comparisons smaller than DImode.  Only adjust comparisons against
5263      an out-of-range constant.  */
5264   if (!CONST_INT_P (*op1)
5265       || const_ok_for_arm (INTVAL (*op1))
5266       || const_ok_for_arm (- INTVAL (*op1)))
5267     return;
5268
5269   i = INTVAL (*op1);
5270
5271   switch (*code)
5272     {
5273     case EQ:
5274     case NE:
5275       return;
5276
5277     case GT:
5278     case LE:
5279       if (i != maxval
5280           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5281         {
5282           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5283           *code = *code == GT ? GE : LT;
5284           return;
5285         }
5286       break;
5287
5288     case GE:
5289     case LT:
5290       if (i != ~maxval
5291           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5292         {
5293           *op1 = GEN_INT (i - 1);
5294           *code = *code == GE ? GT : LE;
5295           return;
5296         }
5297       break;
5298
5299     case GTU:
5300     case LEU:
5301       if (i != ~((unsigned HOST_WIDE_INT) 0)
5302           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5303         {
5304           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5305           *code = *code == GTU ? GEU : LTU;
5306           return;
5307         }
5308       break;
5309
5310     case GEU:
5311     case LTU:
5312       if (i != 0
5313           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5314         {
5315           *op1 = GEN_INT (i - 1);
5316           *code = *code == GEU ? GTU : LEU;
5317           return;
5318         }
5319       break;
5320
5321     default:
5322       gcc_unreachable ();
5323     }
5324 }
5325
5326
5327 /* Define how to find the value returned by a function.  */
5328
5329 static rtx
5330 arm_function_value(const_tree type, const_tree func,
5331                    bool outgoing ATTRIBUTE_UNUSED)
5332 {
5333   machine_mode mode;
5334   int unsignedp ATTRIBUTE_UNUSED;
5335   rtx r ATTRIBUTE_UNUSED;
5336
5337   mode = TYPE_MODE (type);
5338
5339   if (TARGET_AAPCS_BASED)
5340     return aapcs_allocate_return_reg (mode, type, func);
5341
5342   /* Promote integer types.  */
5343   if (INTEGRAL_TYPE_P (type))
5344     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5345
5346   /* Promotes small structs returned in a register to full-word size
5347      for big-endian AAPCS.  */
5348   if (arm_return_in_msb (type))
5349     {
5350       HOST_WIDE_INT size = int_size_in_bytes (type);
5351       if (size % UNITS_PER_WORD != 0)
5352         {
5353           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5354           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5355         }
5356     }
5357
5358   return arm_libcall_value_1 (mode);
5359 }
5360
5361 /* libcall hashtable helpers.  */
5362
5363 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5364 {
5365   static inline hashval_t hash (const rtx_def *);
5366   static inline bool equal (const rtx_def *, const rtx_def *);
5367   static inline void remove (rtx_def *);
5368 };
5369
5370 inline bool
5371 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5372 {
5373   return rtx_equal_p (p1, p2);
5374 }
5375
5376 inline hashval_t
5377 libcall_hasher::hash (const rtx_def *p1)
5378 {
5379   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5380 }
5381
5382 typedef hash_table<libcall_hasher> libcall_table_type;
5383
5384 static void
5385 add_libcall (libcall_table_type *htab, rtx libcall)
5386 {
5387   *htab->find_slot (libcall, INSERT) = libcall;
5388 }
5389
5390 static bool
5391 arm_libcall_uses_aapcs_base (const_rtx libcall)
5392 {
5393   static bool init_done = false;
5394   static libcall_table_type *libcall_htab = NULL;
5395
5396   if (!init_done)
5397     {
5398       init_done = true;
5399
5400       libcall_htab = new libcall_table_type (31);
5401       add_libcall (libcall_htab,
5402                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5403       add_libcall (libcall_htab,
5404                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5405       add_libcall (libcall_htab,
5406                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5407       add_libcall (libcall_htab,
5408                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5409
5410       add_libcall (libcall_htab,
5411                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5412       add_libcall (libcall_htab,
5413                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5414       add_libcall (libcall_htab,
5415                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5416       add_libcall (libcall_htab,
5417                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5418
5419       add_libcall (libcall_htab,
5420                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5421       add_libcall (libcall_htab,
5422                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5423       add_libcall (libcall_htab,
5424                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5425       add_libcall (libcall_htab,
5426                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5427       add_libcall (libcall_htab,
5428                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5429       add_libcall (libcall_htab,
5430                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5431       add_libcall (libcall_htab,
5432                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5433       add_libcall (libcall_htab,
5434                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5435
5436       /* Values from double-precision helper functions are returned in core
5437          registers if the selected core only supports single-precision
5438          arithmetic, even if we are using the hard-float ABI.  The same is
5439          true for single-precision helpers, but we will never be using the
5440          hard-float ABI on a CPU which doesn't support single-precision
5441          operations in hardware.  */
5442       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5443       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5444       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5445       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5446       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5447       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5448       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5449       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5450       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5451       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5452       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5453       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5454                                                         SFmode));
5455       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5456                                                         DFmode));
5457       add_libcall (libcall_htab,
5458                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5459     }
5460
5461   return libcall && libcall_htab->find (libcall) != NULL;
5462 }
5463
5464 static rtx
5465 arm_libcall_value_1 (machine_mode mode)
5466 {
5467   if (TARGET_AAPCS_BASED)
5468     return aapcs_libcall_value (mode);
5469   else if (TARGET_IWMMXT_ABI
5470            && arm_vector_mode_supported_p (mode))
5471     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5472   else
5473     return gen_rtx_REG (mode, ARG_REGISTER (1));
5474 }
5475
5476 /* Define how to find the value returned by a library function
5477    assuming the value has mode MODE.  */
5478
5479 static rtx
5480 arm_libcall_value (machine_mode mode, const_rtx libcall)
5481 {
5482   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5483       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5484     {
5485       /* The following libcalls return their result in integer registers,
5486          even though they return a floating point value.  */
5487       if (arm_libcall_uses_aapcs_base (libcall))
5488         return gen_rtx_REG (mode, ARG_REGISTER(1));
5489
5490     }
5491
5492   return arm_libcall_value_1 (mode);
5493 }
5494
5495 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5496
5497 static bool
5498 arm_function_value_regno_p (const unsigned int regno)
5499 {
5500   if (regno == ARG_REGISTER (1)
5501       || (TARGET_32BIT
5502           && TARGET_AAPCS_BASED
5503           && TARGET_HARD_FLOAT
5504           && regno == FIRST_VFP_REGNUM)
5505       || (TARGET_IWMMXT_ABI
5506           && regno == FIRST_IWMMXT_REGNUM))
5507     return true;
5508
5509   return false;
5510 }
5511
5512 /* Determine the amount of memory needed to store the possible return
5513    registers of an untyped call.  */
5514 int
5515 arm_apply_result_size (void)
5516 {
5517   int size = 16;
5518
5519   if (TARGET_32BIT)
5520     {
5521       if (TARGET_HARD_FLOAT_ABI)
5522         size += 32;
5523       if (TARGET_IWMMXT_ABI)
5524         size += 8;
5525     }
5526
5527   return size;
5528 }
5529
5530 /* Decide whether TYPE should be returned in memory (true)
5531    or in a register (false).  FNTYPE is the type of the function making
5532    the call.  */
5533 static bool
5534 arm_return_in_memory (const_tree type, const_tree fntype)
5535 {
5536   HOST_WIDE_INT size;
5537
5538   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5539
5540   if (TARGET_AAPCS_BASED)
5541     {
5542       /* Simple, non-aggregate types (ie not including vectors and
5543          complex) are always returned in a register (or registers).
5544          We don't care about which register here, so we can short-cut
5545          some of the detail.  */
5546       if (!AGGREGATE_TYPE_P (type)
5547           && TREE_CODE (type) != VECTOR_TYPE
5548           && TREE_CODE (type) != COMPLEX_TYPE)
5549         return false;
5550
5551       /* Any return value that is no larger than one word can be
5552          returned in r0.  */
5553       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5554         return false;
5555
5556       /* Check any available co-processors to see if they accept the
5557          type as a register candidate (VFP, for example, can return
5558          some aggregates in consecutive registers).  These aren't
5559          available if the call is variadic.  */
5560       if (aapcs_select_return_coproc (type, fntype) >= 0)
5561         return false;
5562
5563       /* Vector values should be returned using ARM registers, not
5564          memory (unless they're over 16 bytes, which will break since
5565          we only have four call-clobbered registers to play with).  */
5566       if (TREE_CODE (type) == VECTOR_TYPE)
5567         return (size < 0 || size > (4 * UNITS_PER_WORD));
5568
5569       /* The rest go in memory.  */
5570       return true;
5571     }
5572
5573   if (TREE_CODE (type) == VECTOR_TYPE)
5574     return (size < 0 || size > (4 * UNITS_PER_WORD));
5575
5576   if (!AGGREGATE_TYPE_P (type) &&
5577       (TREE_CODE (type) != VECTOR_TYPE))
5578     /* All simple types are returned in registers.  */
5579     return false;
5580
5581   if (arm_abi != ARM_ABI_APCS)
5582     {
5583       /* ATPCS and later return aggregate types in memory only if they are
5584          larger than a word (or are variable size).  */
5585       return (size < 0 || size > UNITS_PER_WORD);
5586     }
5587
5588   /* For the arm-wince targets we choose to be compatible with Microsoft's
5589      ARM and Thumb compilers, which always return aggregates in memory.  */
5590 #ifndef ARM_WINCE
5591   /* All structures/unions bigger than one word are returned in memory.
5592      Also catch the case where int_size_in_bytes returns -1.  In this case
5593      the aggregate is either huge or of variable size, and in either case
5594      we will want to return it via memory and not in a register.  */
5595   if (size < 0 || size > UNITS_PER_WORD)
5596     return true;
5597
5598   if (TREE_CODE (type) == RECORD_TYPE)
5599     {
5600       tree field;
5601
5602       /* For a struct the APCS says that we only return in a register
5603          if the type is 'integer like' and every addressable element
5604          has an offset of zero.  For practical purposes this means
5605          that the structure can have at most one non bit-field element
5606          and that this element must be the first one in the structure.  */
5607
5608       /* Find the first field, ignoring non FIELD_DECL things which will
5609          have been created by C++.  */
5610       for (field = TYPE_FIELDS (type);
5611            field && TREE_CODE (field) != FIELD_DECL;
5612            field = DECL_CHAIN (field))
5613         continue;
5614
5615       if (field == NULL)
5616         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5617
5618       /* Check that the first field is valid for returning in a register.  */
5619
5620       /* ... Floats are not allowed */
5621       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5622         return true;
5623
5624       /* ... Aggregates that are not themselves valid for returning in
5625          a register are not allowed.  */
5626       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5627         return true;
5628
5629       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5630          since they are not addressable.  */
5631       for (field = DECL_CHAIN (field);
5632            field;
5633            field = DECL_CHAIN (field))
5634         {
5635           if (TREE_CODE (field) != FIELD_DECL)
5636             continue;
5637
5638           if (!DECL_BIT_FIELD_TYPE (field))
5639             return true;
5640         }
5641
5642       return false;
5643     }
5644
5645   if (TREE_CODE (type) == UNION_TYPE)
5646     {
5647       tree field;
5648
5649       /* Unions can be returned in registers if every element is
5650          integral, or can be returned in an integer register.  */
5651       for (field = TYPE_FIELDS (type);
5652            field;
5653            field = DECL_CHAIN (field))
5654         {
5655           if (TREE_CODE (field) != FIELD_DECL)
5656             continue;
5657
5658           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5659             return true;
5660
5661           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5662             return true;
5663         }
5664
5665       return false;
5666     }
5667 #endif /* not ARM_WINCE */
5668
5669   /* Return all other types in memory.  */
5670   return true;
5671 }
5672
5673 const struct pcs_attribute_arg
5674 {
5675   const char *arg;
5676   enum arm_pcs value;
5677 } pcs_attribute_args[] =
5678   {
5679     {"aapcs", ARM_PCS_AAPCS},
5680     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5681 #if 0
5682     /* We could recognize these, but changes would be needed elsewhere
5683      * to implement them.  */
5684     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5685     {"atpcs", ARM_PCS_ATPCS},
5686     {"apcs", ARM_PCS_APCS},
5687 #endif
5688     {NULL, ARM_PCS_UNKNOWN}
5689   };
5690
5691 static enum arm_pcs
5692 arm_pcs_from_attribute (tree attr)
5693 {
5694   const struct pcs_attribute_arg *ptr;
5695   const char *arg;
5696
5697   /* Get the value of the argument.  */
5698   if (TREE_VALUE (attr) == NULL_TREE
5699       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5700     return ARM_PCS_UNKNOWN;
5701
5702   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5703
5704   /* Check it against the list of known arguments.  */
5705   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5706     if (streq (arg, ptr->arg))
5707       return ptr->value;
5708
5709   /* An unrecognized interrupt type.  */
5710   return ARM_PCS_UNKNOWN;
5711 }
5712
5713 /* Get the PCS variant to use for this call.  TYPE is the function's type
5714    specification, DECL is the specific declartion.  DECL may be null if
5715    the call could be indirect or if this is a library call.  */
5716 static enum arm_pcs
5717 arm_get_pcs_model (const_tree type, const_tree decl)
5718 {
5719   bool user_convention = false;
5720   enum arm_pcs user_pcs = arm_pcs_default;
5721   tree attr;
5722
5723   gcc_assert (type);
5724
5725   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5726   if (attr)
5727     {
5728       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5729       user_convention = true;
5730     }
5731
5732   if (TARGET_AAPCS_BASED)
5733     {
5734       /* Detect varargs functions.  These always use the base rules
5735          (no argument is ever a candidate for a co-processor
5736          register).  */
5737       bool base_rules = stdarg_p (type);
5738
5739       if (user_convention)
5740         {
5741           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5742             sorry ("non-AAPCS derived PCS variant");
5743           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5744             error ("variadic functions must use the base AAPCS variant");
5745         }
5746
5747       if (base_rules)
5748         return ARM_PCS_AAPCS;
5749       else if (user_convention)
5750         return user_pcs;
5751       else if (decl && flag_unit_at_a_time)
5752         {
5753           /* Local functions never leak outside this compilation unit,
5754              so we are free to use whatever conventions are
5755              appropriate.  */
5756           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5757           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5758           if (i && i->local)
5759             return ARM_PCS_AAPCS_LOCAL;
5760         }
5761     }
5762   else if (user_convention && user_pcs != arm_pcs_default)
5763     sorry ("PCS variant");
5764
5765   /* For everything else we use the target's default.  */
5766   return arm_pcs_default;
5767 }
5768
5769
5770 static void
5771 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5772                     const_tree fntype ATTRIBUTE_UNUSED,
5773                     rtx libcall ATTRIBUTE_UNUSED,
5774                     const_tree fndecl ATTRIBUTE_UNUSED)
5775 {
5776   /* Record the unallocated VFP registers.  */
5777   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5778   pcum->aapcs_vfp_reg_alloc = 0;
5779 }
5780
5781 /* Walk down the type tree of TYPE counting consecutive base elements.
5782    If *MODEP is VOIDmode, then set it to the first valid floating point
5783    type.  If a non-floating point type is found, or if a floating point
5784    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5785    otherwise return the count in the sub-tree.  */
5786 static int
5787 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5788 {
5789   machine_mode mode;
5790   HOST_WIDE_INT size;
5791
5792   switch (TREE_CODE (type))
5793     {
5794     case REAL_TYPE:
5795       mode = TYPE_MODE (type);
5796       if (mode != DFmode && mode != SFmode && mode != HFmode)
5797         return -1;
5798
5799       if (*modep == VOIDmode)
5800         *modep = mode;
5801
5802       if (*modep == mode)
5803         return 1;
5804
5805       break;
5806
5807     case COMPLEX_TYPE:
5808       mode = TYPE_MODE (TREE_TYPE (type));
5809       if (mode != DFmode && mode != SFmode)
5810         return -1;
5811
5812       if (*modep == VOIDmode)
5813         *modep = mode;
5814
5815       if (*modep == mode)
5816         return 2;
5817
5818       break;
5819
5820     case VECTOR_TYPE:
5821       /* Use V2SImode and V4SImode as representatives of all 64-bit
5822          and 128-bit vector types, whether or not those modes are
5823          supported with the present options.  */
5824       size = int_size_in_bytes (type);
5825       switch (size)
5826         {
5827         case 8:
5828           mode = V2SImode;
5829           break;
5830         case 16:
5831           mode = V4SImode;
5832           break;
5833         default:
5834           return -1;
5835         }
5836
5837       if (*modep == VOIDmode)
5838         *modep = mode;
5839
5840       /* Vector modes are considered to be opaque: two vectors are
5841          equivalent for the purposes of being homogeneous aggregates
5842          if they are the same size.  */
5843       if (*modep == mode)
5844         return 1;
5845
5846       break;
5847
5848     case ARRAY_TYPE:
5849       {
5850         int count;
5851         tree index = TYPE_DOMAIN (type);
5852
5853         /* Can't handle incomplete types nor sizes that are not
5854            fixed.  */
5855         if (!COMPLETE_TYPE_P (type)
5856             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5857           return -1;
5858
5859         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5860         if (count == -1
5861             || !index
5862             || !TYPE_MAX_VALUE (index)
5863             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5864             || !TYPE_MIN_VALUE (index)
5865             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5866             || count < 0)
5867           return -1;
5868
5869         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5870                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5871
5872         /* There must be no padding.  */
5873         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5874           return -1;
5875
5876         return count;
5877       }
5878
5879     case RECORD_TYPE:
5880       {
5881         int count = 0;
5882         int sub_count;
5883         tree field;
5884
5885         /* Can't handle incomplete types nor sizes that are not
5886            fixed.  */
5887         if (!COMPLETE_TYPE_P (type)
5888             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5889           return -1;
5890
5891         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5892           {
5893             if (TREE_CODE (field) != FIELD_DECL)
5894               continue;
5895
5896             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5897             if (sub_count < 0)
5898               return -1;
5899             count += sub_count;
5900           }
5901
5902         /* There must be no padding.  */
5903         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5904           return -1;
5905
5906         return count;
5907       }
5908
5909     case UNION_TYPE:
5910     case QUAL_UNION_TYPE:
5911       {
5912         /* These aren't very interesting except in a degenerate case.  */
5913         int count = 0;
5914         int sub_count;
5915         tree field;
5916
5917         /* Can't handle incomplete types nor sizes that are not
5918            fixed.  */
5919         if (!COMPLETE_TYPE_P (type)
5920             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5921           return -1;
5922
5923         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5924           {
5925             if (TREE_CODE (field) != FIELD_DECL)
5926               continue;
5927
5928             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5929             if (sub_count < 0)
5930               return -1;
5931             count = count > sub_count ? count : sub_count;
5932           }
5933
5934         /* There must be no padding.  */
5935         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5936           return -1;
5937
5938         return count;
5939       }
5940
5941     default:
5942       break;
5943     }
5944
5945   return -1;
5946 }
5947
5948 /* Return true if PCS_VARIANT should use VFP registers.  */
5949 static bool
5950 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5951 {
5952   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5953     {
5954       static bool seen_thumb1_vfp = false;
5955
5956       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5957         {
5958           sorry ("Thumb-1 hard-float VFP ABI");
5959           /* sorry() is not immediately fatal, so only display this once.  */
5960           seen_thumb1_vfp = true;
5961         }
5962
5963       return true;
5964     }
5965
5966   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5967     return false;
5968
5969   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5970           (TARGET_VFP_DOUBLE || !is_double));
5971 }
5972
5973 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5974    suitable for passing or returning in VFP registers for the PCS
5975    variant selected.  If it is, then *BASE_MODE is updated to contain
5976    a machine mode describing each element of the argument's type and
5977    *COUNT to hold the number of such elements.  */
5978 static bool
5979 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5980                                        machine_mode mode, const_tree type,
5981                                        machine_mode *base_mode, int *count)
5982 {
5983   machine_mode new_mode = VOIDmode;
5984
5985   /* If we have the type information, prefer that to working things
5986      out from the mode.  */
5987   if (type)
5988     {
5989       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5990
5991       if (ag_count > 0 && ag_count <= 4)
5992         *count = ag_count;
5993       else
5994         return false;
5995     }
5996   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5997            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5998            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5999     {
6000       *count = 1;
6001       new_mode = mode;
6002     }
6003   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6004     {
6005       *count = 2;
6006       new_mode = (mode == DCmode ? DFmode : SFmode);
6007     }
6008   else
6009     return false;
6010
6011
6012   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6013     return false;
6014
6015   *base_mode = new_mode;
6016   return true;
6017 }
6018
6019 static bool
6020 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6021                                machine_mode mode, const_tree type)
6022 {
6023   int count ATTRIBUTE_UNUSED;
6024   machine_mode ag_mode ATTRIBUTE_UNUSED;
6025
6026   if (!use_vfp_abi (pcs_variant, false))
6027     return false;
6028   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6029                                                 &ag_mode, &count);
6030 }
6031
6032 static bool
6033 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6034                              const_tree type)
6035 {
6036   if (!use_vfp_abi (pcum->pcs_variant, false))
6037     return false;
6038
6039   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6040                                                 &pcum->aapcs_vfp_rmode,
6041                                                 &pcum->aapcs_vfp_rcount);
6042 }
6043
6044 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6045    for the behaviour of this function.  */
6046
6047 static bool
6048 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6049                     const_tree type  ATTRIBUTE_UNUSED)
6050 {
6051   int rmode_size
6052     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6053   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6054   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6055   int regno;
6056
6057   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6058     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6059       {
6060         pcum->aapcs_vfp_reg_alloc = mask << regno;
6061         if (mode == BLKmode
6062             || (mode == TImode && ! TARGET_NEON)
6063             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6064           {
6065             int i;
6066             int rcount = pcum->aapcs_vfp_rcount;
6067             int rshift = shift;
6068             machine_mode rmode = pcum->aapcs_vfp_rmode;
6069             rtx par;
6070             if (!TARGET_NEON)
6071               {
6072                 /* Avoid using unsupported vector modes.  */
6073                 if (rmode == V2SImode)
6074                   rmode = DImode;
6075                 else if (rmode == V4SImode)
6076                   {
6077                     rmode = DImode;
6078                     rcount *= 2;
6079                     rshift /= 2;
6080                   }
6081               }
6082             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6083             for (i = 0; i < rcount; i++)
6084               {
6085                 rtx tmp = gen_rtx_REG (rmode,
6086                                        FIRST_VFP_REGNUM + regno + i * rshift);
6087                 tmp = gen_rtx_EXPR_LIST
6088                   (VOIDmode, tmp,
6089                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6090                 XVECEXP (par, 0, i) = tmp;
6091               }
6092
6093             pcum->aapcs_reg = par;
6094           }
6095         else
6096           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6097         return true;
6098       }
6099   return false;
6100 }
6101
6102 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6103    comment there for the behaviour of this function.  */
6104
6105 static rtx
6106 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6107                                machine_mode mode,
6108                                const_tree type ATTRIBUTE_UNUSED)
6109 {
6110   if (!use_vfp_abi (pcs_variant, false))
6111     return NULL;
6112
6113   if (mode == BLKmode
6114       || (GET_MODE_CLASS (mode) == MODE_INT
6115           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6116           && !TARGET_NEON))
6117     {
6118       int count;
6119       machine_mode ag_mode;
6120       int i;
6121       rtx par;
6122       int shift;
6123
6124       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6125                                              &ag_mode, &count);
6126
6127       if (!TARGET_NEON)
6128         {
6129           if (ag_mode == V2SImode)
6130             ag_mode = DImode;
6131           else if (ag_mode == V4SImode)
6132             {
6133               ag_mode = DImode;
6134               count *= 2;
6135             }
6136         }
6137       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6138       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6139       for (i = 0; i < count; i++)
6140         {
6141           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6142           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6143                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6144           XVECEXP (par, 0, i) = tmp;
6145         }
6146
6147       return par;
6148     }
6149
6150   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6151 }
6152
6153 static void
6154 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6155                    machine_mode mode  ATTRIBUTE_UNUSED,
6156                    const_tree type  ATTRIBUTE_UNUSED)
6157 {
6158   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6159   pcum->aapcs_vfp_reg_alloc = 0;
6160   return;
6161 }
6162
6163 #define AAPCS_CP(X)                             \
6164   {                                             \
6165     aapcs_ ## X ## _cum_init,                   \
6166     aapcs_ ## X ## _is_call_candidate,          \
6167     aapcs_ ## X ## _allocate,                   \
6168     aapcs_ ## X ## _is_return_candidate,        \
6169     aapcs_ ## X ## _allocate_return_reg,        \
6170     aapcs_ ## X ## _advance                     \
6171   }
6172
6173 /* Table of co-processors that can be used to pass arguments in
6174    registers.  Idealy no arugment should be a candidate for more than
6175    one co-processor table entry, but the table is processed in order
6176    and stops after the first match.  If that entry then fails to put
6177    the argument into a co-processor register, the argument will go on
6178    the stack.  */
6179 static struct
6180 {
6181   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6182   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6183
6184   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6185      BLKmode) is a candidate for this co-processor's registers; this
6186      function should ignore any position-dependent state in
6187      CUMULATIVE_ARGS and only use call-type dependent information.  */
6188   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6189
6190   /* Return true if the argument does get a co-processor register; it
6191      should set aapcs_reg to an RTX of the register allocated as is
6192      required for a return from FUNCTION_ARG.  */
6193   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6194
6195   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6196      be returned in this co-processor's registers.  */
6197   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6198
6199   /* Allocate and return an RTX element to hold the return type of a call.  This
6200      routine must not fail and will only be called if is_return_candidate
6201      returned true with the same parameters.  */
6202   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6203
6204   /* Finish processing this argument and prepare to start processing
6205      the next one.  */
6206   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6207 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6208   {
6209     AAPCS_CP(vfp)
6210   };
6211
6212 #undef AAPCS_CP
6213
6214 static int
6215 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6216                           const_tree type)
6217 {
6218   int i;
6219
6220   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6221     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6222       return i;
6223
6224   return -1;
6225 }
6226
6227 static int
6228 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6229 {
6230   /* We aren't passed a decl, so we can't check that a call is local.
6231      However, it isn't clear that that would be a win anyway, since it
6232      might limit some tail-calling opportunities.  */
6233   enum arm_pcs pcs_variant;
6234
6235   if (fntype)
6236     {
6237       const_tree fndecl = NULL_TREE;
6238
6239       if (TREE_CODE (fntype) == FUNCTION_DECL)
6240         {
6241           fndecl = fntype;
6242           fntype = TREE_TYPE (fntype);
6243         }
6244
6245       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6246     }
6247   else
6248     pcs_variant = arm_pcs_default;
6249
6250   if (pcs_variant != ARM_PCS_AAPCS)
6251     {
6252       int i;
6253
6254       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6255         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6256                                                         TYPE_MODE (type),
6257                                                         type))
6258           return i;
6259     }
6260   return -1;
6261 }
6262
6263 static rtx
6264 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6265                            const_tree fntype)
6266 {
6267   /* We aren't passed a decl, so we can't check that a call is local.
6268      However, it isn't clear that that would be a win anyway, since it
6269      might limit some tail-calling opportunities.  */
6270   enum arm_pcs pcs_variant;
6271   int unsignedp ATTRIBUTE_UNUSED;
6272
6273   if (fntype)
6274     {
6275       const_tree fndecl = NULL_TREE;
6276
6277       if (TREE_CODE (fntype) == FUNCTION_DECL)
6278         {
6279           fndecl = fntype;
6280           fntype = TREE_TYPE (fntype);
6281         }
6282
6283       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6284     }
6285   else
6286     pcs_variant = arm_pcs_default;
6287
6288   /* Promote integer types.  */
6289   if (type && INTEGRAL_TYPE_P (type))
6290     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6291
6292   if (pcs_variant != ARM_PCS_AAPCS)
6293     {
6294       int i;
6295
6296       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6297         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6298                                                         type))
6299           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6300                                                              mode, type);
6301     }
6302
6303   /* Promotes small structs returned in a register to full-word size
6304      for big-endian AAPCS.  */
6305   if (type && arm_return_in_msb (type))
6306     {
6307       HOST_WIDE_INT size = int_size_in_bytes (type);
6308       if (size % UNITS_PER_WORD != 0)
6309         {
6310           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6311           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6312         }
6313     }
6314
6315   return gen_rtx_REG (mode, R0_REGNUM);
6316 }
6317
6318 static rtx
6319 aapcs_libcall_value (machine_mode mode)
6320 {
6321   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6322       && GET_MODE_SIZE (mode) <= 4)
6323     mode = SImode;
6324
6325   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6326 }
6327
6328 /* Lay out a function argument using the AAPCS rules.  The rule
6329    numbers referred to here are those in the AAPCS.  */
6330 static void
6331 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6332                   const_tree type, bool named)
6333 {
6334   int nregs, nregs2;
6335   int ncrn;
6336
6337   /* We only need to do this once per argument.  */
6338   if (pcum->aapcs_arg_processed)
6339     return;
6340
6341   pcum->aapcs_arg_processed = true;
6342
6343   /* Special case: if named is false then we are handling an incoming
6344      anonymous argument which is on the stack.  */
6345   if (!named)
6346     return;
6347
6348   /* Is this a potential co-processor register candidate?  */
6349   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6350     {
6351       int slot = aapcs_select_call_coproc (pcum, mode, type);
6352       pcum->aapcs_cprc_slot = slot;
6353
6354       /* We don't have to apply any of the rules from part B of the
6355          preparation phase, these are handled elsewhere in the
6356          compiler.  */
6357
6358       if (slot >= 0)
6359         {
6360           /* A Co-processor register candidate goes either in its own
6361              class of registers or on the stack.  */
6362           if (!pcum->aapcs_cprc_failed[slot])
6363             {
6364               /* C1.cp - Try to allocate the argument to co-processor
6365                  registers.  */
6366               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6367                 return;
6368
6369               /* C2.cp - Put the argument on the stack and note that we
6370                  can't assign any more candidates in this slot.  We also
6371                  need to note that we have allocated stack space, so that
6372                  we won't later try to split a non-cprc candidate between
6373                  core registers and the stack.  */
6374               pcum->aapcs_cprc_failed[slot] = true;
6375               pcum->can_split = false;
6376             }
6377
6378           /* We didn't get a register, so this argument goes on the
6379              stack.  */
6380           gcc_assert (pcum->can_split == false);
6381           return;
6382         }
6383     }
6384
6385   /* C3 - For double-word aligned arguments, round the NCRN up to the
6386      next even number.  */
6387   ncrn = pcum->aapcs_ncrn;
6388   if (ncrn & 1)
6389     {
6390       int res = arm_needs_doubleword_align (mode, type);
6391       /* Only warn during RTL expansion of call stmts, otherwise we would
6392          warn e.g. during gimplification even on functions that will be
6393          always inlined, and we'd warn multiple times.  Don't warn when
6394          called in expand_function_start either, as we warn instead in
6395          arm_function_arg_boundary in that case.  */
6396       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6397         inform (input_location, "parameter passing for argument of type "
6398                 "%qT changed in GCC 7.1", type);
6399       else if (res > 0)
6400         ncrn++;
6401     }
6402
6403   nregs = ARM_NUM_REGS2(mode, type);
6404
6405   /* Sigh, this test should really assert that nregs > 0, but a GCC
6406      extension allows empty structs and then gives them empty size; it
6407      then allows such a structure to be passed by value.  For some of
6408      the code below we have to pretend that such an argument has
6409      non-zero size so that we 'locate' it correctly either in
6410      registers or on the stack.  */
6411   gcc_assert (nregs >= 0);
6412
6413   nregs2 = nregs ? nregs : 1;
6414
6415   /* C4 - Argument fits entirely in core registers.  */
6416   if (ncrn + nregs2 <= NUM_ARG_REGS)
6417     {
6418       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6419       pcum->aapcs_next_ncrn = ncrn + nregs;
6420       return;
6421     }
6422
6423   /* C5 - Some core registers left and there are no arguments already
6424      on the stack: split this argument between the remaining core
6425      registers and the stack.  */
6426   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6427     {
6428       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6429       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6430       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6431       return;
6432     }
6433
6434   /* C6 - NCRN is set to 4.  */
6435   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6436
6437   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6438   return;
6439 }
6440
6441 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6442    for a call to a function whose data type is FNTYPE.
6443    For a library call, FNTYPE is NULL.  */
6444 void
6445 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6446                           rtx libname,
6447                           tree fndecl ATTRIBUTE_UNUSED)
6448 {
6449   /* Long call handling.  */
6450   if (fntype)
6451     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6452   else
6453     pcum->pcs_variant = arm_pcs_default;
6454
6455   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6456     {
6457       if (arm_libcall_uses_aapcs_base (libname))
6458         pcum->pcs_variant = ARM_PCS_AAPCS;
6459
6460       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6461       pcum->aapcs_reg = NULL_RTX;
6462       pcum->aapcs_partial = 0;
6463       pcum->aapcs_arg_processed = false;
6464       pcum->aapcs_cprc_slot = -1;
6465       pcum->can_split = true;
6466
6467       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6468         {
6469           int i;
6470
6471           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6472             {
6473               pcum->aapcs_cprc_failed[i] = false;
6474               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6475             }
6476         }
6477       return;
6478     }
6479
6480   /* Legacy ABIs */
6481
6482   /* On the ARM, the offset starts at 0.  */
6483   pcum->nregs = 0;
6484   pcum->iwmmxt_nregs = 0;
6485   pcum->can_split = true;
6486
6487   /* Varargs vectors are treated the same as long long.
6488      named_count avoids having to change the way arm handles 'named' */
6489   pcum->named_count = 0;
6490   pcum->nargs = 0;
6491
6492   if (TARGET_REALLY_IWMMXT && fntype)
6493     {
6494       tree fn_arg;
6495
6496       for (fn_arg = TYPE_ARG_TYPES (fntype);
6497            fn_arg;
6498            fn_arg = TREE_CHAIN (fn_arg))
6499         pcum->named_count += 1;
6500
6501       if (! pcum->named_count)
6502         pcum->named_count = INT_MAX;
6503     }
6504 }
6505
6506 /* Return 1 if double word alignment is required for argument passing.
6507    Return -1 if double word alignment used to be required for argument
6508    passing before PR77728 ABI fix, but is not required anymore.
6509    Return 0 if double word alignment is not required and wasn't requried
6510    before either.  */
6511 static int
6512 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6513 {
6514   if (!type)
6515     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6516
6517   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6518   if (!AGGREGATE_TYPE_P (type))
6519     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6520
6521   /* Array types: Use member alignment of element type.  */
6522   if (TREE_CODE (type) == ARRAY_TYPE)
6523     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6524
6525   int ret = 0;
6526   /* Record/aggregate types: Use greatest member alignment of any member.  */
6527   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6528     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6529       {
6530         if (TREE_CODE (field) == FIELD_DECL)
6531           return 1;
6532         else
6533           /* Before PR77728 fix, we were incorrectly considering also
6534              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6535              Make sure we can warn about that with -Wpsabi.  */
6536           ret = -1;
6537       }
6538
6539   return ret;
6540 }
6541
6542
6543 /* Determine where to put an argument to a function.
6544    Value is zero to push the argument on the stack,
6545    or a hard register in which to store the argument.
6546
6547    MODE is the argument's machine mode.
6548    TYPE is the data type of the argument (as a tree).
6549     This is null for libcalls where that information may
6550     not be available.
6551    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6552     the preceding args and about the function being called.
6553    NAMED is nonzero if this argument is a named parameter
6554     (otherwise it is an extra parameter matching an ellipsis).
6555
6556    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6557    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6558    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6559    defined), say it is passed in the stack (function_prologue will
6560    indeed make it pass in the stack if necessary).  */
6561
6562 static rtx
6563 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6564                   const_tree type, bool named)
6565 {
6566   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6567   int nregs;
6568
6569   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6570      a call insn (op3 of a call_value insn).  */
6571   if (mode == VOIDmode)
6572     return const0_rtx;
6573
6574   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6575     {
6576       aapcs_layout_arg (pcum, mode, type, named);
6577       return pcum->aapcs_reg;
6578     }
6579
6580   /* Varargs vectors are treated the same as long long.
6581      named_count avoids having to change the way arm handles 'named' */
6582   if (TARGET_IWMMXT_ABI
6583       && arm_vector_mode_supported_p (mode)
6584       && pcum->named_count > pcum->nargs + 1)
6585     {
6586       if (pcum->iwmmxt_nregs <= 9)
6587         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6588       else
6589         {
6590           pcum->can_split = false;
6591           return NULL_RTX;
6592         }
6593     }
6594
6595   /* Put doubleword aligned quantities in even register pairs.  */
6596   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6597     {
6598       int res = arm_needs_doubleword_align (mode, type);
6599       if (res < 0 && warn_psabi)
6600         inform (input_location, "parameter passing for argument of type "
6601                 "%qT changed in GCC 7.1", type);
6602       else if (res > 0)
6603         pcum->nregs++;
6604     }
6605
6606   /* Only allow splitting an arg between regs and memory if all preceding
6607      args were allocated to regs.  For args passed by reference we only count
6608      the reference pointer.  */
6609   if (pcum->can_split)
6610     nregs = 1;
6611   else
6612     nregs = ARM_NUM_REGS2 (mode, type);
6613
6614   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6615     return NULL_RTX;
6616
6617   return gen_rtx_REG (mode, pcum->nregs);
6618 }
6619
6620 static unsigned int
6621 arm_function_arg_boundary (machine_mode mode, const_tree type)
6622 {
6623   if (!ARM_DOUBLEWORD_ALIGN)
6624     return PARM_BOUNDARY;
6625
6626   int res = arm_needs_doubleword_align (mode, type);
6627   if (res < 0 && warn_psabi)
6628     inform (input_location, "parameter passing for argument of type %qT "
6629             "changed in GCC 7.1", type);
6630
6631   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6632 }
6633
6634 static int
6635 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6636                        tree type, bool named)
6637 {
6638   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6639   int nregs = pcum->nregs;
6640
6641   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6642     {
6643       aapcs_layout_arg (pcum, mode, type, named);
6644       return pcum->aapcs_partial;
6645     }
6646
6647   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6648     return 0;
6649
6650   if (NUM_ARG_REGS > nregs
6651       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6652       && pcum->can_split)
6653     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6654
6655   return 0;
6656 }
6657
6658 /* Update the data in PCUM to advance over an argument
6659    of mode MODE and data type TYPE.
6660    (TYPE is null for libcalls where that information may not be available.)  */
6661
6662 static void
6663 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6664                           const_tree type, bool named)
6665 {
6666   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6667
6668   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6669     {
6670       aapcs_layout_arg (pcum, mode, type, named);
6671
6672       if (pcum->aapcs_cprc_slot >= 0)
6673         {
6674           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6675                                                               type);
6676           pcum->aapcs_cprc_slot = -1;
6677         }
6678
6679       /* Generic stuff.  */
6680       pcum->aapcs_arg_processed = false;
6681       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6682       pcum->aapcs_reg = NULL_RTX;
6683       pcum->aapcs_partial = 0;
6684     }
6685   else
6686     {
6687       pcum->nargs += 1;
6688       if (arm_vector_mode_supported_p (mode)
6689           && pcum->named_count > pcum->nargs
6690           && TARGET_IWMMXT_ABI)
6691         pcum->iwmmxt_nregs += 1;
6692       else
6693         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6694     }
6695 }
6696
6697 /* Variable sized types are passed by reference.  This is a GCC
6698    extension to the ARM ABI.  */
6699
6700 static bool
6701 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6702                        machine_mode mode ATTRIBUTE_UNUSED,
6703                        const_tree type, bool named ATTRIBUTE_UNUSED)
6704 {
6705   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6706 }
6707 \f
6708 /* Encode the current state of the #pragma [no_]long_calls.  */
6709 typedef enum
6710 {
6711   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6712   LONG,         /* #pragma long_calls is in effect.  */
6713   SHORT         /* #pragma no_long_calls is in effect.  */
6714 } arm_pragma_enum;
6715
6716 static arm_pragma_enum arm_pragma_long_calls = OFF;
6717
6718 void
6719 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6720 {
6721   arm_pragma_long_calls = LONG;
6722 }
6723
6724 void
6725 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6726 {
6727   arm_pragma_long_calls = SHORT;
6728 }
6729
6730 void
6731 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6732 {
6733   arm_pragma_long_calls = OFF;
6734 }
6735 \f
6736 /* Handle an attribute requiring a FUNCTION_DECL;
6737    arguments as in struct attribute_spec.handler.  */
6738 static tree
6739 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6740                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6741 {
6742   if (TREE_CODE (*node) != FUNCTION_DECL)
6743     {
6744       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6745                name);
6746       *no_add_attrs = true;
6747     }
6748
6749   return NULL_TREE;
6750 }
6751
6752 /* Handle an "interrupt" or "isr" attribute;
6753    arguments as in struct attribute_spec.handler.  */
6754 static tree
6755 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6756                           bool *no_add_attrs)
6757 {
6758   if (DECL_P (*node))
6759     {
6760       if (TREE_CODE (*node) != FUNCTION_DECL)
6761         {
6762           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6763                    name);
6764           *no_add_attrs = true;
6765         }
6766       /* FIXME: the argument if any is checked for type attributes;
6767          should it be checked for decl ones?  */
6768     }
6769   else
6770     {
6771       if (TREE_CODE (*node) == FUNCTION_TYPE
6772           || TREE_CODE (*node) == METHOD_TYPE)
6773         {
6774           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6775             {
6776               warning (OPT_Wattributes, "%qE attribute ignored",
6777                        name);
6778               *no_add_attrs = true;
6779             }
6780         }
6781       else if (TREE_CODE (*node) == POINTER_TYPE
6782                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6783                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6784                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6785         {
6786           *node = build_variant_type_copy (*node);
6787           TREE_TYPE (*node) = build_type_attribute_variant
6788             (TREE_TYPE (*node),
6789              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6790           *no_add_attrs = true;
6791         }
6792       else
6793         {
6794           /* Possibly pass this attribute on from the type to a decl.  */
6795           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6796                        | (int) ATTR_FLAG_FUNCTION_NEXT
6797                        | (int) ATTR_FLAG_ARRAY_NEXT))
6798             {
6799               *no_add_attrs = true;
6800               return tree_cons (name, args, NULL_TREE);
6801             }
6802           else
6803             {
6804               warning (OPT_Wattributes, "%qE attribute ignored",
6805                        name);
6806             }
6807         }
6808     }
6809
6810   return NULL_TREE;
6811 }
6812
6813 /* Handle a "pcs" attribute; arguments as in struct
6814    attribute_spec.handler.  */
6815 static tree
6816 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6817                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6818 {
6819   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6820     {
6821       warning (OPT_Wattributes, "%qE attribute ignored", name);
6822       *no_add_attrs = true;
6823     }
6824   return NULL_TREE;
6825 }
6826
6827 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6828 /* Handle the "notshared" attribute.  This attribute is another way of
6829    requesting hidden visibility.  ARM's compiler supports
6830    "__declspec(notshared)"; we support the same thing via an
6831    attribute.  */
6832
6833 static tree
6834 arm_handle_notshared_attribute (tree *node,
6835                                 tree name ATTRIBUTE_UNUSED,
6836                                 tree args ATTRIBUTE_UNUSED,
6837                                 int flags ATTRIBUTE_UNUSED,
6838                                 bool *no_add_attrs)
6839 {
6840   tree decl = TYPE_NAME (*node);
6841
6842   if (decl)
6843     {
6844       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6845       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6846       *no_add_attrs = false;
6847     }
6848   return NULL_TREE;
6849 }
6850 #endif
6851
6852 /* This function returns true if a function with declaration FNDECL and type
6853    FNTYPE uses the stack to pass arguments or return variables and false
6854    otherwise.  This is used for functions with the attributes
6855    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6856    diagnostic messages if the stack is used.  NAME is the name of the attribute
6857    used.  */
6858
6859 static bool
6860 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6861 {
6862   function_args_iterator args_iter;
6863   CUMULATIVE_ARGS args_so_far_v;
6864   cumulative_args_t args_so_far;
6865   bool first_param = true;
6866   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6867
6868   /* Error out if any argument is passed on the stack.  */
6869   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6870   args_so_far = pack_cumulative_args (&args_so_far_v);
6871   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6872     {
6873       rtx arg_rtx;
6874       machine_mode arg_mode = TYPE_MODE (arg_type);
6875
6876       prev_arg_type = arg_type;
6877       if (VOID_TYPE_P (arg_type))
6878         continue;
6879
6880       if (!first_param)
6881         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6882       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6883       if (!arg_rtx
6884           || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6885         {
6886           error ("%qE attribute not available to functions with arguments "
6887                  "passed on the stack", name);
6888           return true;
6889         }
6890       first_param = false;
6891     }
6892
6893   /* Error out for variadic functions since we cannot control how many
6894      arguments will be passed and thus stack could be used.  stdarg_p () is not
6895      used for the checking to avoid browsing arguments twice.  */
6896   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6897     {
6898       error ("%qE attribute not available to functions with variable number "
6899              "of arguments", name);
6900       return true;
6901     }
6902
6903   /* Error out if return value is passed on the stack.  */
6904   ret_type = TREE_TYPE (fntype);
6905   if (arm_return_in_memory (ret_type, fntype))
6906     {
6907       error ("%qE attribute not available to functions that return value on "
6908              "the stack", name);
6909       return true;
6910     }
6911   return false;
6912 }
6913
6914 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6915    function will check whether the attribute is allowed here and will add the
6916    attribute to the function declaration tree or otherwise issue a warning.  */
6917
6918 static tree
6919 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6920                                  tree /* args */,
6921                                  int /* flags */,
6922                                  bool *no_add_attrs)
6923 {
6924   tree fndecl;
6925
6926   if (!use_cmse)
6927     {
6928       *no_add_attrs = true;
6929       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6930                name);
6931       return NULL_TREE;
6932     }
6933
6934   /* Ignore attribute for function types.  */
6935   if (TREE_CODE (*node) != FUNCTION_DECL)
6936     {
6937       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6938                name);
6939       *no_add_attrs = true;
6940       return NULL_TREE;
6941     }
6942
6943   fndecl = *node;
6944
6945   /* Warn for static linkage functions.  */
6946   if (!TREE_PUBLIC (fndecl))
6947     {
6948       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6949                "with static linkage", name);
6950       *no_add_attrs = true;
6951       return NULL_TREE;
6952     }
6953
6954   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6955                                                 TREE_TYPE (fndecl));
6956   return NULL_TREE;
6957 }
6958
6959
6960 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6961    function will check whether the attribute is allowed here and will add the
6962    attribute to the function type tree or otherwise issue a diagnostic.  The
6963    reason we check this at declaration time is to only allow the use of the
6964    attribute with declarations of function pointers and not function
6965    declarations.  This function checks NODE is of the expected type and issues
6966    diagnostics otherwise using NAME.  If it is not of the expected type
6967    *NO_ADD_ATTRS will be set to true.  */
6968
6969 static tree
6970 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6971                                  tree /* args */,
6972                                  int /* flags */,
6973                                  bool *no_add_attrs)
6974 {
6975   tree decl = NULL_TREE, fntype = NULL_TREE;
6976   tree type;
6977
6978   if (!use_cmse)
6979     {
6980       *no_add_attrs = true;
6981       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6982                name);
6983       return NULL_TREE;
6984     }
6985
6986   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6987     {
6988       decl = *node;
6989       fntype = TREE_TYPE (decl);
6990     }
6991
6992   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6993     fntype = TREE_TYPE (fntype);
6994
6995   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6996     {
6997         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6998                  "function pointer", name);
6999         *no_add_attrs = true;
7000         return NULL_TREE;
7001     }
7002
7003   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7004
7005   if (*no_add_attrs)
7006     return NULL_TREE;
7007
7008   /* Prevent trees being shared among function types with and without
7009      cmse_nonsecure_call attribute.  */
7010   type = TREE_TYPE (decl);
7011
7012   type = build_distinct_type_copy (type);
7013   TREE_TYPE (decl) = type;
7014   fntype = type;
7015
7016   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7017     {
7018       type = fntype;
7019       fntype = TREE_TYPE (fntype);
7020       fntype = build_distinct_type_copy (fntype);
7021       TREE_TYPE (type) = fntype;
7022     }
7023
7024   /* Construct a type attribute and add it to the function type.  */
7025   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7026                           TYPE_ATTRIBUTES (fntype));
7027   TYPE_ATTRIBUTES (fntype) = attrs;
7028   return NULL_TREE;
7029 }
7030
7031 /* Return 0 if the attributes for two types are incompatible, 1 if they
7032    are compatible, and 2 if they are nearly compatible (which causes a
7033    warning to be generated).  */
7034 static int
7035 arm_comp_type_attributes (const_tree type1, const_tree type2)
7036 {
7037   int l1, l2, s1, s2;
7038
7039   /* Check for mismatch of non-default calling convention.  */
7040   if (TREE_CODE (type1) != FUNCTION_TYPE)
7041     return 1;
7042
7043   /* Check for mismatched call attributes.  */
7044   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7045   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7046   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7047   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7048
7049   /* Only bother to check if an attribute is defined.  */
7050   if (l1 | l2 | s1 | s2)
7051     {
7052       /* If one type has an attribute, the other must have the same attribute.  */
7053       if ((l1 != l2) || (s1 != s2))
7054         return 0;
7055
7056       /* Disallow mixed attributes.  */
7057       if ((l1 & s2) || (l2 & s1))
7058         return 0;
7059     }
7060
7061   /* Check for mismatched ISR attribute.  */
7062   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7063   if (! l1)
7064     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7065   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7066   if (! l2)
7067     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7068   if (l1 != l2)
7069     return 0;
7070
7071   l1 = lookup_attribute ("cmse_nonsecure_call",
7072                          TYPE_ATTRIBUTES (type1)) != NULL;
7073   l2 = lookup_attribute ("cmse_nonsecure_call",
7074                          TYPE_ATTRIBUTES (type2)) != NULL;
7075
7076   if (l1 != l2)
7077     return 0;
7078
7079   return 1;
7080 }
7081
7082 /*  Assigns default attributes to newly defined type.  This is used to
7083     set short_call/long_call attributes for function types of
7084     functions defined inside corresponding #pragma scopes.  */
7085 static void
7086 arm_set_default_type_attributes (tree type)
7087 {
7088   /* Add __attribute__ ((long_call)) to all functions, when
7089      inside #pragma long_calls or __attribute__ ((short_call)),
7090      when inside #pragma no_long_calls.  */
7091   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7092     {
7093       tree type_attr_list, attr_name;
7094       type_attr_list = TYPE_ATTRIBUTES (type);
7095
7096       if (arm_pragma_long_calls == LONG)
7097         attr_name = get_identifier ("long_call");
7098       else if (arm_pragma_long_calls == SHORT)
7099         attr_name = get_identifier ("short_call");
7100       else
7101         return;
7102
7103       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7104       TYPE_ATTRIBUTES (type) = type_attr_list;
7105     }
7106 }
7107 \f
7108 /* Return true if DECL is known to be linked into section SECTION.  */
7109
7110 static bool
7111 arm_function_in_section_p (tree decl, section *section)
7112 {
7113   /* We can only be certain about the prevailing symbol definition.  */
7114   if (!decl_binds_to_current_def_p (decl))
7115     return false;
7116
7117   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7118   if (!DECL_SECTION_NAME (decl))
7119     {
7120       /* Make sure that we will not create a unique section for DECL.  */
7121       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7122         return false;
7123     }
7124
7125   return function_section (decl) == section;
7126 }
7127
7128 /* Return nonzero if a 32-bit "long_call" should be generated for
7129    a call from the current function to DECL.  We generate a long_call
7130    if the function:
7131
7132         a.  has an __attribute__((long call))
7133      or b.  is within the scope of a #pragma long_calls
7134      or c.  the -mlong-calls command line switch has been specified
7135
7136    However we do not generate a long call if the function:
7137
7138         d.  has an __attribute__ ((short_call))
7139      or e.  is inside the scope of a #pragma no_long_calls
7140      or f.  is defined in the same section as the current function.  */
7141
7142 bool
7143 arm_is_long_call_p (tree decl)
7144 {
7145   tree attrs;
7146
7147   if (!decl)
7148     return TARGET_LONG_CALLS;
7149
7150   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7151   if (lookup_attribute ("short_call", attrs))
7152     return false;
7153
7154   /* For "f", be conservative, and only cater for cases in which the
7155      whole of the current function is placed in the same section.  */
7156   if (!flag_reorder_blocks_and_partition
7157       && TREE_CODE (decl) == FUNCTION_DECL
7158       && arm_function_in_section_p (decl, current_function_section ()))
7159     return false;
7160
7161   if (lookup_attribute ("long_call", attrs))
7162     return true;
7163
7164   return TARGET_LONG_CALLS;
7165 }
7166
7167 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7168 static bool
7169 arm_function_ok_for_sibcall (tree decl, tree exp)
7170 {
7171   unsigned long func_type;
7172
7173   if (cfun->machine->sibcall_blocked)
7174     return false;
7175
7176   /* Never tailcall something if we are generating code for Thumb-1.  */
7177   if (TARGET_THUMB1)
7178     return false;
7179
7180   /* The PIC register is live on entry to VxWorks PLT entries, so we
7181      must make the call before restoring the PIC register.  */
7182   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7183     return false;
7184
7185   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7186      may be used both as target of the call and base register for restoring
7187      the VFP registers  */
7188   if (TARGET_APCS_FRAME && TARGET_ARM
7189       && TARGET_HARD_FLOAT
7190       && decl && arm_is_long_call_p (decl))
7191     return false;
7192
7193   /* If we are interworking and the function is not declared static
7194      then we can't tail-call it unless we know that it exists in this
7195      compilation unit (since it might be a Thumb routine).  */
7196   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7197       && !TREE_ASM_WRITTEN (decl))
7198     return false;
7199
7200   func_type = arm_current_func_type ();
7201   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7202   if (IS_INTERRUPT (func_type))
7203     return false;
7204
7205   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7206      generated for entry functions themselves.  */
7207   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7208     return false;
7209
7210   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7211      this would complicate matters for later code generation.  */
7212   if (TREE_CODE (exp) == CALL_EXPR)
7213     {
7214       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7215       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7216         return false;
7217     }
7218
7219   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7220     {
7221       /* Check that the return value locations are the same.  For
7222          example that we aren't returning a value from the sibling in
7223          a VFP register but then need to transfer it to a core
7224          register.  */
7225       rtx a, b;
7226       tree decl_or_type = decl;
7227
7228       /* If it is an indirect function pointer, get the function type.  */
7229       if (!decl)
7230         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7231
7232       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7233       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7234                               cfun->decl, false);
7235       if (!rtx_equal_p (a, b))
7236         return false;
7237     }
7238
7239   /* Never tailcall if function may be called with a misaligned SP.  */
7240   if (IS_STACKALIGN (func_type))
7241     return false;
7242
7243   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7244      references should become a NOP.  Don't convert such calls into
7245      sibling calls.  */
7246   if (TARGET_AAPCS_BASED
7247       && arm_abi == ARM_ABI_AAPCS
7248       && decl
7249       && DECL_WEAK (decl))
7250     return false;
7251
7252   /* We cannot do a tailcall for an indirect call by descriptor if all the
7253      argument registers are used because the only register left to load the
7254      address is IP and it will already contain the static chain.  */
7255   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7256     {
7257       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7258       CUMULATIVE_ARGS cum;
7259       cumulative_args_t cum_v;
7260
7261       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7262       cum_v = pack_cumulative_args (&cum);
7263
7264       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7265         {
7266           tree type = TREE_VALUE (t);
7267           if (!VOID_TYPE_P (type))
7268             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7269         }
7270
7271       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7272         return false;
7273     }
7274
7275   /* Everything else is ok.  */
7276   return true;
7277 }
7278
7279 \f
7280 /* Addressing mode support functions.  */
7281
7282 /* Return nonzero if X is a legitimate immediate operand when compiling
7283    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7284 int
7285 legitimate_pic_operand_p (rtx x)
7286 {
7287   if (GET_CODE (x) == SYMBOL_REF
7288       || (GET_CODE (x) == CONST
7289           && GET_CODE (XEXP (x, 0)) == PLUS
7290           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7291     return 0;
7292
7293   return 1;
7294 }
7295
7296 /* Record that the current function needs a PIC register.  Initialize
7297    cfun->machine->pic_reg if we have not already done so.  */
7298
7299 static void
7300 require_pic_register (void)
7301 {
7302   /* A lot of the logic here is made obscure by the fact that this
7303      routine gets called as part of the rtx cost estimation process.
7304      We don't want those calls to affect any assumptions about the real
7305      function; and further, we can't call entry_of_function() until we
7306      start the real expansion process.  */
7307   if (!crtl->uses_pic_offset_table)
7308     {
7309       gcc_assert (can_create_pseudo_p ());
7310       if (arm_pic_register != INVALID_REGNUM
7311           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7312         {
7313           if (!cfun->machine->pic_reg)
7314             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7315
7316           /* Play games to avoid marking the function as needing pic
7317              if we are being called as part of the cost-estimation
7318              process.  */
7319           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7320             crtl->uses_pic_offset_table = 1;
7321         }
7322       else
7323         {
7324           rtx_insn *seq, *insn;
7325
7326           if (!cfun->machine->pic_reg)
7327             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7328
7329           /* Play games to avoid marking the function as needing pic
7330              if we are being called as part of the cost-estimation
7331              process.  */
7332           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7333             {
7334               crtl->uses_pic_offset_table = 1;
7335               start_sequence ();
7336
7337               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7338                   && arm_pic_register > LAST_LO_REGNUM)
7339                 emit_move_insn (cfun->machine->pic_reg,
7340                                 gen_rtx_REG (Pmode, arm_pic_register));
7341               else
7342                 arm_load_pic_register (0UL);
7343
7344               seq = get_insns ();
7345               end_sequence ();
7346
7347               for (insn = seq; insn; insn = NEXT_INSN (insn))
7348                 if (INSN_P (insn))
7349                   INSN_LOCATION (insn) = prologue_location;
7350
7351               /* We can be called during expansion of PHI nodes, where
7352                  we can't yet emit instructions directly in the final
7353                  insn stream.  Queue the insns on the entry edge, they will
7354                  be committed after everything else is expanded.  */
7355               insert_insn_on_edge (seq,
7356                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7357             }
7358         }
7359     }
7360 }
7361
7362 rtx
7363 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7364 {
7365   if (GET_CODE (orig) == SYMBOL_REF
7366       || GET_CODE (orig) == LABEL_REF)
7367     {
7368       if (reg == 0)
7369         {
7370           gcc_assert (can_create_pseudo_p ());
7371           reg = gen_reg_rtx (Pmode);
7372         }
7373
7374       /* VxWorks does not impose a fixed gap between segments; the run-time
7375          gap can be different from the object-file gap.  We therefore can't
7376          use GOTOFF unless we are absolutely sure that the symbol is in the
7377          same segment as the GOT.  Unfortunately, the flexibility of linker
7378          scripts means that we can't be sure of that in general, so assume
7379          that GOTOFF is never valid on VxWorks.  */
7380       /* References to weak symbols cannot be resolved locally: they
7381          may be overridden by a non-weak definition at link time.  */
7382       rtx_insn *insn;
7383       if ((GET_CODE (orig) == LABEL_REF
7384            || (GET_CODE (orig) == SYMBOL_REF
7385                && SYMBOL_REF_LOCAL_P (orig)
7386                && (SYMBOL_REF_DECL (orig)
7387                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7388           && NEED_GOT_RELOC
7389           && arm_pic_data_is_text_relative)
7390         insn = arm_pic_static_addr (orig, reg);
7391       else
7392         {
7393           rtx pat;
7394           rtx mem;
7395
7396           /* If this function doesn't have a pic register, create one now.  */
7397           require_pic_register ();
7398
7399           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7400
7401           /* Make the MEM as close to a constant as possible.  */
7402           mem = SET_SRC (pat);
7403           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7404           MEM_READONLY_P (mem) = 1;
7405           MEM_NOTRAP_P (mem) = 1;
7406
7407           insn = emit_insn (pat);
7408         }
7409
7410       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7411          by loop.  */
7412       set_unique_reg_note (insn, REG_EQUAL, orig);
7413
7414       return reg;
7415     }
7416   else if (GET_CODE (orig) == CONST)
7417     {
7418       rtx base, offset;
7419
7420       if (GET_CODE (XEXP (orig, 0)) == PLUS
7421           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7422         return orig;
7423
7424       /* Handle the case where we have: const (UNSPEC_TLS).  */
7425       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7426           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7427         return orig;
7428
7429       /* Handle the case where we have:
7430          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7431          CONST_INT.  */
7432       if (GET_CODE (XEXP (orig, 0)) == PLUS
7433           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7434           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7435         {
7436           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7437           return orig;
7438         }
7439
7440       if (reg == 0)
7441         {
7442           gcc_assert (can_create_pseudo_p ());
7443           reg = gen_reg_rtx (Pmode);
7444         }
7445
7446       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7447
7448       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7449       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7450                                        base == reg ? 0 : reg);
7451
7452       if (CONST_INT_P (offset))
7453         {
7454           /* The base register doesn't really matter, we only want to
7455              test the index for the appropriate mode.  */
7456           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7457             {
7458               gcc_assert (can_create_pseudo_p ());
7459               offset = force_reg (Pmode, offset);
7460             }
7461
7462           if (CONST_INT_P (offset))
7463             return plus_constant (Pmode, base, INTVAL (offset));
7464         }
7465
7466       if (GET_MODE_SIZE (mode) > 4
7467           && (GET_MODE_CLASS (mode) == MODE_INT
7468               || TARGET_SOFT_FLOAT))
7469         {
7470           emit_insn (gen_addsi3 (reg, base, offset));
7471           return reg;
7472         }
7473
7474       return gen_rtx_PLUS (Pmode, base, offset);
7475     }
7476
7477   return orig;
7478 }
7479
7480
7481 /* Find a spare register to use during the prolog of a function.  */
7482
7483 static int
7484 thumb_find_work_register (unsigned long pushed_regs_mask)
7485 {
7486   int reg;
7487
7488   /* Check the argument registers first as these are call-used.  The
7489      register allocation order means that sometimes r3 might be used
7490      but earlier argument registers might not, so check them all.  */
7491   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7492     if (!df_regs_ever_live_p (reg))
7493       return reg;
7494
7495   /* Before going on to check the call-saved registers we can try a couple
7496      more ways of deducing that r3 is available.  The first is when we are
7497      pushing anonymous arguments onto the stack and we have less than 4
7498      registers worth of fixed arguments(*).  In this case r3 will be part of
7499      the variable argument list and so we can be sure that it will be
7500      pushed right at the start of the function.  Hence it will be available
7501      for the rest of the prologue.
7502      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7503   if (cfun->machine->uses_anonymous_args
7504       && crtl->args.pretend_args_size > 0)
7505     return LAST_ARG_REGNUM;
7506
7507   /* The other case is when we have fixed arguments but less than 4 registers
7508      worth.  In this case r3 might be used in the body of the function, but
7509      it is not being used to convey an argument into the function.  In theory
7510      we could just check crtl->args.size to see how many bytes are
7511      being passed in argument registers, but it seems that it is unreliable.
7512      Sometimes it will have the value 0 when in fact arguments are being
7513      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7514      check the args_info.nregs field as well.  The problem with this field is
7515      that it makes no allowances for arguments that are passed to the
7516      function but which are not used.  Hence we could miss an opportunity
7517      when a function has an unused argument in r3.  But it is better to be
7518      safe than to be sorry.  */
7519   if (! cfun->machine->uses_anonymous_args
7520       && crtl->args.size >= 0
7521       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7522       && (TARGET_AAPCS_BASED
7523           ? crtl->args.info.aapcs_ncrn < 4
7524           : crtl->args.info.nregs < 4))
7525     return LAST_ARG_REGNUM;
7526
7527   /* Otherwise look for a call-saved register that is going to be pushed.  */
7528   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7529     if (pushed_regs_mask & (1 << reg))
7530       return reg;
7531
7532   if (TARGET_THUMB2)
7533     {
7534       /* Thumb-2 can use high regs.  */
7535       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7536         if (pushed_regs_mask & (1 << reg))
7537           return reg;
7538     }
7539   /* Something went wrong - thumb_compute_save_reg_mask()
7540      should have arranged for a suitable register to be pushed.  */
7541   gcc_unreachable ();
7542 }
7543
7544 static GTY(()) int pic_labelno;
7545
7546 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7547    low register.  */
7548
7549 void
7550 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7551 {
7552   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7553
7554   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7555     return;
7556
7557   gcc_assert (flag_pic);
7558
7559   pic_reg = cfun->machine->pic_reg;
7560   if (TARGET_VXWORKS_RTP)
7561     {
7562       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7563       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7564       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7565
7566       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7567
7568       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7569       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7570     }
7571   else
7572     {
7573       /* We use an UNSPEC rather than a LABEL_REF because this label
7574          never appears in the code stream.  */
7575
7576       labelno = GEN_INT (pic_labelno++);
7577       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7578       l1 = gen_rtx_CONST (VOIDmode, l1);
7579
7580       /* On the ARM the PC register contains 'dot + 8' at the time of the
7581          addition, on the Thumb it is 'dot + 4'.  */
7582       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7583       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7584                                 UNSPEC_GOTSYM_OFF);
7585       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7586
7587       if (TARGET_32BIT)
7588         {
7589           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7590         }
7591       else /* TARGET_THUMB1 */
7592         {
7593           if (arm_pic_register != INVALID_REGNUM
7594               && REGNO (pic_reg) > LAST_LO_REGNUM)
7595             {
7596               /* We will have pushed the pic register, so we should always be
7597                  able to find a work register.  */
7598               pic_tmp = gen_rtx_REG (SImode,
7599                                      thumb_find_work_register (saved_regs));
7600               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7601               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7602               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7603             }
7604           else if (arm_pic_register != INVALID_REGNUM
7605                    && arm_pic_register > LAST_LO_REGNUM
7606                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7607             {
7608               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7609               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7610               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7611             }
7612           else
7613             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7614         }
7615     }
7616
7617   /* Need to emit this whether or not we obey regdecls,
7618      since setjmp/longjmp can cause life info to screw up.  */
7619   emit_use (pic_reg);
7620 }
7621
7622 /* Generate code to load the address of a static var when flag_pic is set.  */
7623 static rtx_insn *
7624 arm_pic_static_addr (rtx orig, rtx reg)
7625 {
7626   rtx l1, labelno, offset_rtx;
7627
7628   gcc_assert (flag_pic);
7629
7630   /* We use an UNSPEC rather than a LABEL_REF because this label
7631      never appears in the code stream.  */
7632   labelno = GEN_INT (pic_labelno++);
7633   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7634   l1 = gen_rtx_CONST (VOIDmode, l1);
7635
7636   /* On the ARM the PC register contains 'dot + 8' at the time of the
7637      addition, on the Thumb it is 'dot + 4'.  */
7638   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7639   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7640                                UNSPEC_SYMBOL_OFFSET);
7641   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7642
7643   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7644 }
7645
7646 /* Return nonzero if X is valid as an ARM state addressing register.  */
7647 static int
7648 arm_address_register_rtx_p (rtx x, int strict_p)
7649 {
7650   int regno;
7651
7652   if (!REG_P (x))
7653     return 0;
7654
7655   regno = REGNO (x);
7656
7657   if (strict_p)
7658     return ARM_REGNO_OK_FOR_BASE_P (regno);
7659
7660   return (regno <= LAST_ARM_REGNUM
7661           || regno >= FIRST_PSEUDO_REGISTER
7662           || regno == FRAME_POINTER_REGNUM
7663           || regno == ARG_POINTER_REGNUM);
7664 }
7665
7666 /* Return TRUE if this rtx is the difference of a symbol and a label,
7667    and will reduce to a PC-relative relocation in the object file.
7668    Expressions like this can be left alone when generating PIC, rather
7669    than forced through the GOT.  */
7670 static int
7671 pcrel_constant_p (rtx x)
7672 {
7673   if (GET_CODE (x) == MINUS)
7674     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7675
7676   return FALSE;
7677 }
7678
7679 /* Return true if X will surely end up in an index register after next
7680    splitting pass.  */
7681 static bool
7682 will_be_in_index_register (const_rtx x)
7683 {
7684   /* arm.md: calculate_pic_address will split this into a register.  */
7685   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7686 }
7687
7688 /* Return nonzero if X is a valid ARM state address operand.  */
7689 int
7690 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7691                                 int strict_p)
7692 {
7693   bool use_ldrd;
7694   enum rtx_code code = GET_CODE (x);
7695
7696   if (arm_address_register_rtx_p (x, strict_p))
7697     return 1;
7698
7699   use_ldrd = (TARGET_LDRD
7700               && (mode == DImode || mode == DFmode));
7701
7702   if (code == POST_INC || code == PRE_DEC
7703       || ((code == PRE_INC || code == POST_DEC)
7704           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7705     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7706
7707   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7708            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7709            && GET_CODE (XEXP (x, 1)) == PLUS
7710            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7711     {
7712       rtx addend = XEXP (XEXP (x, 1), 1);
7713
7714       /* Don't allow ldrd post increment by register because it's hard
7715          to fixup invalid register choices.  */
7716       if (use_ldrd
7717           && GET_CODE (x) == POST_MODIFY
7718           && REG_P (addend))
7719         return 0;
7720
7721       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7722               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7723     }
7724
7725   /* After reload constants split into minipools will have addresses
7726      from a LABEL_REF.  */
7727   else if (reload_completed
7728            && (code == LABEL_REF
7729                || (code == CONST
7730                    && GET_CODE (XEXP (x, 0)) == PLUS
7731                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7732                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7733     return 1;
7734
7735   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7736     return 0;
7737
7738   else if (code == PLUS)
7739     {
7740       rtx xop0 = XEXP (x, 0);
7741       rtx xop1 = XEXP (x, 1);
7742
7743       return ((arm_address_register_rtx_p (xop0, strict_p)
7744                && ((CONST_INT_P (xop1)
7745                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7746                    || (!strict_p && will_be_in_index_register (xop1))))
7747               || (arm_address_register_rtx_p (xop1, strict_p)
7748                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7749     }
7750
7751 #if 0
7752   /* Reload currently can't handle MINUS, so disable this for now */
7753   else if (GET_CODE (x) == MINUS)
7754     {
7755       rtx xop0 = XEXP (x, 0);
7756       rtx xop1 = XEXP (x, 1);
7757
7758       return (arm_address_register_rtx_p (xop0, strict_p)
7759               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7760     }
7761 #endif
7762
7763   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7764            && code == SYMBOL_REF
7765            && CONSTANT_POOL_ADDRESS_P (x)
7766            && ! (flag_pic
7767                  && symbol_mentioned_p (get_pool_constant (x))
7768                  && ! pcrel_constant_p (get_pool_constant (x))))
7769     return 1;
7770
7771   return 0;
7772 }
7773
7774 /* Return true if we can avoid creating a constant pool entry for x.  */
7775 static bool
7776 can_avoid_literal_pool_for_label_p (rtx x)
7777 {
7778   /* Normally we can assign constant values to target registers without
7779      the help of constant pool.  But there are cases we have to use constant
7780      pool like:
7781      1) assign a label to register.
7782      2) sign-extend a 8bit value to 32bit and then assign to register.
7783
7784      Constant pool access in format:
7785      (set (reg r0) (mem (symbol_ref (".LC0"))))
7786      will cause the use of literal pool (later in function arm_reorg).
7787      So here we mark such format as an invalid format, then the compiler
7788      will adjust it into:
7789      (set (reg r0) (symbol_ref (".LC0")))
7790      (set (reg r0) (mem (reg r0))).
7791      No extra register is required, and (mem (reg r0)) won't cause the use
7792      of literal pools.  */
7793   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7794       && CONSTANT_POOL_ADDRESS_P (x))
7795     return 1;
7796   return 0;
7797 }
7798
7799
7800 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7801 static int
7802 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7803 {
7804   bool use_ldrd;
7805   enum rtx_code code = GET_CODE (x);
7806
7807   if (arm_address_register_rtx_p (x, strict_p))
7808     return 1;
7809
7810   use_ldrd = (TARGET_LDRD
7811               && (mode == DImode || mode == DFmode));
7812
7813   if (code == POST_INC || code == PRE_DEC
7814       || ((code == PRE_INC || code == POST_DEC)
7815           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7816     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7817
7818   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7819            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7820            && GET_CODE (XEXP (x, 1)) == PLUS
7821            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7822     {
7823       /* Thumb-2 only has autoincrement by constant.  */
7824       rtx addend = XEXP (XEXP (x, 1), 1);
7825       HOST_WIDE_INT offset;
7826
7827       if (!CONST_INT_P (addend))
7828         return 0;
7829
7830       offset = INTVAL(addend);
7831       if (GET_MODE_SIZE (mode) <= 4)
7832         return (offset > -256 && offset < 256);
7833
7834       return (use_ldrd && offset > -1024 && offset < 1024
7835               && (offset & 3) == 0);
7836     }
7837
7838   /* After reload constants split into minipools will have addresses
7839      from a LABEL_REF.  */
7840   else if (reload_completed
7841            && (code == LABEL_REF
7842                || (code == CONST
7843                    && GET_CODE (XEXP (x, 0)) == PLUS
7844                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7845                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7846     return 1;
7847
7848   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7849     return 0;
7850
7851   else if (code == PLUS)
7852     {
7853       rtx xop0 = XEXP (x, 0);
7854       rtx xop1 = XEXP (x, 1);
7855
7856       return ((arm_address_register_rtx_p (xop0, strict_p)
7857                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7858                    || (!strict_p && will_be_in_index_register (xop1))))
7859               || (arm_address_register_rtx_p (xop1, strict_p)
7860                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7861     }
7862
7863   else if (can_avoid_literal_pool_for_label_p (x))
7864     return 0;
7865
7866   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7867            && code == SYMBOL_REF
7868            && CONSTANT_POOL_ADDRESS_P (x)
7869            && ! (flag_pic
7870                  && symbol_mentioned_p (get_pool_constant (x))
7871                  && ! pcrel_constant_p (get_pool_constant (x))))
7872     return 1;
7873
7874   return 0;
7875 }
7876
7877 /* Return nonzero if INDEX is valid for an address index operand in
7878    ARM state.  */
7879 static int
7880 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7881                         int strict_p)
7882 {
7883   HOST_WIDE_INT range;
7884   enum rtx_code code = GET_CODE (index);
7885
7886   /* Standard coprocessor addressing modes.  */
7887   if (TARGET_HARD_FLOAT
7888       && (mode == SFmode || mode == DFmode))
7889     return (code == CONST_INT && INTVAL (index) < 1024
7890             && INTVAL (index) > -1024
7891             && (INTVAL (index) & 3) == 0);
7892
7893   /* For quad modes, we restrict the constant offset to be slightly less
7894      than what the instruction format permits.  We do this because for
7895      quad mode moves, we will actually decompose them into two separate
7896      double-mode reads or writes.  INDEX must therefore be a valid
7897      (double-mode) offset and so should INDEX+8.  */
7898   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7899     return (code == CONST_INT
7900             && INTVAL (index) < 1016
7901             && INTVAL (index) > -1024
7902             && (INTVAL (index) & 3) == 0);
7903
7904   /* We have no such constraint on double mode offsets, so we permit the
7905      full range of the instruction format.  */
7906   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7907     return (code == CONST_INT
7908             && INTVAL (index) < 1024
7909             && INTVAL (index) > -1024
7910             && (INTVAL (index) & 3) == 0);
7911
7912   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7913     return (code == CONST_INT
7914             && INTVAL (index) < 1024
7915             && INTVAL (index) > -1024
7916             && (INTVAL (index) & 3) == 0);
7917
7918   if (arm_address_register_rtx_p (index, strict_p)
7919       && (GET_MODE_SIZE (mode) <= 4))
7920     return 1;
7921
7922   if (mode == DImode || mode == DFmode)
7923     {
7924       if (code == CONST_INT)
7925         {
7926           HOST_WIDE_INT val = INTVAL (index);
7927
7928           if (TARGET_LDRD)
7929             return val > -256 && val < 256;
7930           else
7931             return val > -4096 && val < 4092;
7932         }
7933
7934       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7935     }
7936
7937   if (GET_MODE_SIZE (mode) <= 4
7938       && ! (arm_arch4
7939             && (mode == HImode
7940                 || mode == HFmode
7941                 || (mode == QImode && outer == SIGN_EXTEND))))
7942     {
7943       if (code == MULT)
7944         {
7945           rtx xiop0 = XEXP (index, 0);
7946           rtx xiop1 = XEXP (index, 1);
7947
7948           return ((arm_address_register_rtx_p (xiop0, strict_p)
7949                    && power_of_two_operand (xiop1, SImode))
7950                   || (arm_address_register_rtx_p (xiop1, strict_p)
7951                       && power_of_two_operand (xiop0, SImode)));
7952         }
7953       else if (code == LSHIFTRT || code == ASHIFTRT
7954                || code == ASHIFT || code == ROTATERT)
7955         {
7956           rtx op = XEXP (index, 1);
7957
7958           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7959                   && CONST_INT_P (op)
7960                   && INTVAL (op) > 0
7961                   && INTVAL (op) <= 31);
7962         }
7963     }
7964
7965   /* For ARM v4 we may be doing a sign-extend operation during the
7966      load.  */
7967   if (arm_arch4)
7968     {
7969       if (mode == HImode
7970           || mode == HFmode
7971           || (outer == SIGN_EXTEND && mode == QImode))
7972         range = 256;
7973       else
7974         range = 4096;
7975     }
7976   else
7977     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7978
7979   return (code == CONST_INT
7980           && INTVAL (index) < range
7981           && INTVAL (index) > -range);
7982 }
7983
7984 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7985    index operand.  i.e. 1, 2, 4 or 8.  */
7986 static bool
7987 thumb2_index_mul_operand (rtx op)
7988 {
7989   HOST_WIDE_INT val;
7990
7991   if (!CONST_INT_P (op))
7992     return false;
7993
7994   val = INTVAL(op);
7995   return (val == 1 || val == 2 || val == 4 || val == 8);
7996 }
7997
7998 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
7999 static int
8000 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8001 {
8002   enum rtx_code code = GET_CODE (index);
8003
8004   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8005   /* Standard coprocessor addressing modes.  */
8006   if (TARGET_HARD_FLOAT
8007       && (mode == SFmode || mode == DFmode))
8008     return (code == CONST_INT && INTVAL (index) < 1024
8009             /* Thumb-2 allows only > -256 index range for it's core register
8010                load/stores. Since we allow SF/DF in core registers, we have
8011                to use the intersection between -256~4096 (core) and -1024~1024
8012                (coprocessor).  */
8013             && INTVAL (index) > -256
8014             && (INTVAL (index) & 3) == 0);
8015
8016   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8017     {
8018       /* For DImode assume values will usually live in core regs
8019          and only allow LDRD addressing modes.  */
8020       if (!TARGET_LDRD || mode != DImode)
8021         return (code == CONST_INT
8022                 && INTVAL (index) < 1024
8023                 && INTVAL (index) > -1024
8024                 && (INTVAL (index) & 3) == 0);
8025     }
8026
8027   /* For quad modes, we restrict the constant offset to be slightly less
8028      than what the instruction format permits.  We do this because for
8029      quad mode moves, we will actually decompose them into two separate
8030      double-mode reads or writes.  INDEX must therefore be a valid
8031      (double-mode) offset and so should INDEX+8.  */
8032   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8033     return (code == CONST_INT
8034             && INTVAL (index) < 1016
8035             && INTVAL (index) > -1024
8036             && (INTVAL (index) & 3) == 0);
8037
8038   /* We have no such constraint on double mode offsets, so we permit the
8039      full range of the instruction format.  */
8040   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8041     return (code == CONST_INT
8042             && INTVAL (index) < 1024
8043             && INTVAL (index) > -1024
8044             && (INTVAL (index) & 3) == 0);
8045
8046   if (arm_address_register_rtx_p (index, strict_p)
8047       && (GET_MODE_SIZE (mode) <= 4))
8048     return 1;
8049
8050   if (mode == DImode || mode == DFmode)
8051     {
8052       if (code == CONST_INT)
8053         {
8054           HOST_WIDE_INT val = INTVAL (index);
8055           /* ??? Can we assume ldrd for thumb2?  */
8056           /* Thumb-2 ldrd only has reg+const addressing modes.  */
8057           /* ldrd supports offsets of +-1020.
8058              However the ldr fallback does not.  */
8059           return val > -256 && val < 256 && (val & 3) == 0;
8060         }
8061       else
8062         return 0;
8063     }
8064
8065   if (code == MULT)
8066     {
8067       rtx xiop0 = XEXP (index, 0);
8068       rtx xiop1 = XEXP (index, 1);
8069
8070       return ((arm_address_register_rtx_p (xiop0, strict_p)
8071                && thumb2_index_mul_operand (xiop1))
8072               || (arm_address_register_rtx_p (xiop1, strict_p)
8073                   && thumb2_index_mul_operand (xiop0)));
8074     }
8075   else if (code == ASHIFT)
8076     {
8077       rtx op = XEXP (index, 1);
8078
8079       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8080               && CONST_INT_P (op)
8081               && INTVAL (op) > 0
8082               && INTVAL (op) <= 3);
8083     }
8084
8085   return (code == CONST_INT
8086           && INTVAL (index) < 4096
8087           && INTVAL (index) > -256);
8088 }
8089
8090 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8091 static int
8092 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8093 {
8094   int regno;
8095
8096   if (!REG_P (x))
8097     return 0;
8098
8099   regno = REGNO (x);
8100
8101   if (strict_p)
8102     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8103
8104   return (regno <= LAST_LO_REGNUM
8105           || regno > LAST_VIRTUAL_REGISTER
8106           || regno == FRAME_POINTER_REGNUM
8107           || (GET_MODE_SIZE (mode) >= 4
8108               && (regno == STACK_POINTER_REGNUM
8109                   || regno >= FIRST_PSEUDO_REGISTER
8110                   || x == hard_frame_pointer_rtx
8111                   || x == arg_pointer_rtx)));
8112 }
8113
8114 /* Return nonzero if x is a legitimate index register.  This is the case
8115    for any base register that can access a QImode object.  */
8116 inline static int
8117 thumb1_index_register_rtx_p (rtx x, int strict_p)
8118 {
8119   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8120 }
8121
8122 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8123
8124    The AP may be eliminated to either the SP or the FP, so we use the
8125    least common denominator, e.g. SImode, and offsets from 0 to 64.
8126
8127    ??? Verify whether the above is the right approach.
8128
8129    ??? Also, the FP may be eliminated to the SP, so perhaps that
8130    needs special handling also.
8131
8132    ??? Look at how the mips16 port solves this problem.  It probably uses
8133    better ways to solve some of these problems.
8134
8135    Although it is not incorrect, we don't accept QImode and HImode
8136    addresses based on the frame pointer or arg pointer until the
8137    reload pass starts.  This is so that eliminating such addresses
8138    into stack based ones won't produce impossible code.  */
8139 int
8140 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8141 {
8142   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8143     return 0;
8144
8145   /* ??? Not clear if this is right.  Experiment.  */
8146   if (GET_MODE_SIZE (mode) < 4
8147       && !(reload_in_progress || reload_completed)
8148       && (reg_mentioned_p (frame_pointer_rtx, x)
8149           || reg_mentioned_p (arg_pointer_rtx, x)
8150           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8151           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8152           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8153           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8154     return 0;
8155
8156   /* Accept any base register.  SP only in SImode or larger.  */
8157   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8158     return 1;
8159
8160   /* This is PC relative data before arm_reorg runs.  */
8161   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8162            && GET_CODE (x) == SYMBOL_REF
8163            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8164     return 1;
8165
8166   /* This is PC relative data after arm_reorg runs.  */
8167   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8168            && reload_completed
8169            && (GET_CODE (x) == LABEL_REF
8170                || (GET_CODE (x) == CONST
8171                    && GET_CODE (XEXP (x, 0)) == PLUS
8172                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8173                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8174     return 1;
8175
8176   /* Post-inc indexing only supported for SImode and larger.  */
8177   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8178            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8179     return 1;
8180
8181   else if (GET_CODE (x) == PLUS)
8182     {
8183       /* REG+REG address can be any two index registers.  */
8184       /* We disallow FRAME+REG addressing since we know that FRAME
8185          will be replaced with STACK, and SP relative addressing only
8186          permits SP+OFFSET.  */
8187       if (GET_MODE_SIZE (mode) <= 4
8188           && XEXP (x, 0) != frame_pointer_rtx
8189           && XEXP (x, 1) != frame_pointer_rtx
8190           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8191           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8192               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8193         return 1;
8194
8195       /* REG+const has 5-7 bit offset for non-SP registers.  */
8196       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8197                 || XEXP (x, 0) == arg_pointer_rtx)
8198                && CONST_INT_P (XEXP (x, 1))
8199                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8200         return 1;
8201
8202       /* REG+const has 10-bit offset for SP, but only SImode and
8203          larger is supported.  */
8204       /* ??? Should probably check for DI/DFmode overflow here
8205          just like GO_IF_LEGITIMATE_OFFSET does.  */
8206       else if (REG_P (XEXP (x, 0))
8207                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8208                && GET_MODE_SIZE (mode) >= 4
8209                && CONST_INT_P (XEXP (x, 1))
8210                && INTVAL (XEXP (x, 1)) >= 0
8211                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8212                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8213         return 1;
8214
8215       else if (REG_P (XEXP (x, 0))
8216                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8217                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8218                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8219                        && REGNO (XEXP (x, 0))
8220                           <= LAST_VIRTUAL_POINTER_REGISTER))
8221                && GET_MODE_SIZE (mode) >= 4
8222                && CONST_INT_P (XEXP (x, 1))
8223                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8224         return 1;
8225     }
8226
8227   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8228            && GET_MODE_SIZE (mode) == 4
8229            && GET_CODE (x) == SYMBOL_REF
8230            && CONSTANT_POOL_ADDRESS_P (x)
8231            && ! (flag_pic
8232                  && symbol_mentioned_p (get_pool_constant (x))
8233                  && ! pcrel_constant_p (get_pool_constant (x))))
8234     return 1;
8235
8236   return 0;
8237 }
8238
8239 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8240    instruction of mode MODE.  */
8241 int
8242 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8243 {
8244   switch (GET_MODE_SIZE (mode))
8245     {
8246     case 1:
8247       return val >= 0 && val < 32;
8248
8249     case 2:
8250       return val >= 0 && val < 64 && (val & 1) == 0;
8251
8252     default:
8253       return (val >= 0
8254               && (val + GET_MODE_SIZE (mode)) <= 128
8255               && (val & 3) == 0);
8256     }
8257 }
8258
8259 bool
8260 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8261 {
8262   if (TARGET_ARM)
8263     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8264   else if (TARGET_THUMB2)
8265     return thumb2_legitimate_address_p (mode, x, strict_p);
8266   else /* if (TARGET_THUMB1) */
8267     return thumb1_legitimate_address_p (mode, x, strict_p);
8268 }
8269
8270 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8271
8272    Given an rtx X being reloaded into a reg required to be
8273    in class CLASS, return the class of reg to actually use.
8274    In general this is just CLASS, but for the Thumb core registers and
8275    immediate constants we prefer a LO_REGS class or a subset.  */
8276
8277 static reg_class_t
8278 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8279 {
8280   if (TARGET_32BIT)
8281     return rclass;
8282   else
8283     {
8284       if (rclass == GENERAL_REGS)
8285         return LO_REGS;
8286       else
8287         return rclass;
8288     }
8289 }
8290
8291 /* Build the SYMBOL_REF for __tls_get_addr.  */
8292
8293 static GTY(()) rtx tls_get_addr_libfunc;
8294
8295 static rtx
8296 get_tls_get_addr (void)
8297 {
8298   if (!tls_get_addr_libfunc)
8299     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8300   return tls_get_addr_libfunc;
8301 }
8302
8303 rtx
8304 arm_load_tp (rtx target)
8305 {
8306   if (!target)
8307     target = gen_reg_rtx (SImode);
8308
8309   if (TARGET_HARD_TP)
8310     {
8311       /* Can return in any reg.  */
8312       emit_insn (gen_load_tp_hard (target));
8313     }
8314   else
8315     {
8316       /* Always returned in r0.  Immediately copy the result into a pseudo,
8317          otherwise other uses of r0 (e.g. setting up function arguments) may
8318          clobber the value.  */
8319
8320       rtx tmp;
8321
8322       emit_insn (gen_load_tp_soft ());
8323
8324       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8325       emit_move_insn (target, tmp);
8326     }
8327   return target;
8328 }
8329
8330 static rtx
8331 load_tls_operand (rtx x, rtx reg)
8332 {
8333   rtx tmp;
8334
8335   if (reg == NULL_RTX)
8336     reg = gen_reg_rtx (SImode);
8337
8338   tmp = gen_rtx_CONST (SImode, x);
8339
8340   emit_move_insn (reg, tmp);
8341
8342   return reg;
8343 }
8344
8345 static rtx_insn *
8346 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8347 {
8348   rtx label, labelno, sum;
8349
8350   gcc_assert (reloc != TLS_DESCSEQ);
8351   start_sequence ();
8352
8353   labelno = GEN_INT (pic_labelno++);
8354   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8355   label = gen_rtx_CONST (VOIDmode, label);
8356
8357   sum = gen_rtx_UNSPEC (Pmode,
8358                         gen_rtvec (4, x, GEN_INT (reloc), label,
8359                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8360                         UNSPEC_TLS);
8361   reg = load_tls_operand (sum, reg);
8362
8363   if (TARGET_ARM)
8364     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8365   else
8366     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8367
8368   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8369                                      LCT_PURE, /* LCT_CONST?  */
8370                                      Pmode, reg, Pmode);
8371
8372   rtx_insn *insns = get_insns ();
8373   end_sequence ();
8374
8375   return insns;
8376 }
8377
8378 static rtx
8379 arm_tls_descseq_addr (rtx x, rtx reg)
8380 {
8381   rtx labelno = GEN_INT (pic_labelno++);
8382   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8383   rtx sum = gen_rtx_UNSPEC (Pmode,
8384                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8385                                        gen_rtx_CONST (VOIDmode, label),
8386                                        GEN_INT (!TARGET_ARM)),
8387                             UNSPEC_TLS);
8388   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8389
8390   emit_insn (gen_tlscall (x, labelno));
8391   if (!reg)
8392     reg = gen_reg_rtx (SImode);
8393   else
8394     gcc_assert (REGNO (reg) != R0_REGNUM);
8395
8396   emit_move_insn (reg, reg0);
8397
8398   return reg;
8399 }
8400
8401 rtx
8402 legitimize_tls_address (rtx x, rtx reg)
8403 {
8404   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8405   rtx_insn *insns;
8406   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8407
8408   switch (model)
8409     {
8410     case TLS_MODEL_GLOBAL_DYNAMIC:
8411       if (TARGET_GNU2_TLS)
8412         {
8413           reg = arm_tls_descseq_addr (x, reg);
8414
8415           tp = arm_load_tp (NULL_RTX);
8416
8417           dest = gen_rtx_PLUS (Pmode, tp, reg);
8418         }
8419       else
8420         {
8421           /* Original scheme */
8422           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8423           dest = gen_reg_rtx (Pmode);
8424           emit_libcall_block (insns, dest, ret, x);
8425         }
8426       return dest;
8427
8428     case TLS_MODEL_LOCAL_DYNAMIC:
8429       if (TARGET_GNU2_TLS)
8430         {
8431           reg = arm_tls_descseq_addr (x, reg);
8432
8433           tp = arm_load_tp (NULL_RTX);
8434
8435           dest = gen_rtx_PLUS (Pmode, tp, reg);
8436         }
8437       else
8438         {
8439           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8440
8441           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8442              share the LDM result with other LD model accesses.  */
8443           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8444                                 UNSPEC_TLS);
8445           dest = gen_reg_rtx (Pmode);
8446           emit_libcall_block (insns, dest, ret, eqv);
8447
8448           /* Load the addend.  */
8449           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8450                                                      GEN_INT (TLS_LDO32)),
8451                                    UNSPEC_TLS);
8452           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8453           dest = gen_rtx_PLUS (Pmode, dest, addend);
8454         }
8455       return dest;
8456
8457     case TLS_MODEL_INITIAL_EXEC:
8458       labelno = GEN_INT (pic_labelno++);
8459       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8460       label = gen_rtx_CONST (VOIDmode, label);
8461       sum = gen_rtx_UNSPEC (Pmode,
8462                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8463                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8464                             UNSPEC_TLS);
8465       reg = load_tls_operand (sum, reg);
8466
8467       if (TARGET_ARM)
8468         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8469       else if (TARGET_THUMB2)
8470         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8471       else
8472         {
8473           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8474           emit_move_insn (reg, gen_const_mem (SImode, reg));
8475         }
8476
8477       tp = arm_load_tp (NULL_RTX);
8478
8479       return gen_rtx_PLUS (Pmode, tp, reg);
8480
8481     case TLS_MODEL_LOCAL_EXEC:
8482       tp = arm_load_tp (NULL_RTX);
8483
8484       reg = gen_rtx_UNSPEC (Pmode,
8485                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8486                             UNSPEC_TLS);
8487       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8488
8489       return gen_rtx_PLUS (Pmode, tp, reg);
8490
8491     default:
8492       abort ();
8493     }
8494 }
8495
8496 /* Try machine-dependent ways of modifying an illegitimate address
8497    to be legitimate.  If we find one, return the new, valid address.  */
8498 rtx
8499 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8500 {
8501   if (arm_tls_referenced_p (x))
8502     {
8503       rtx addend = NULL;
8504
8505       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8506         {
8507           addend = XEXP (XEXP (x, 0), 1);
8508           x = XEXP (XEXP (x, 0), 0);
8509         }
8510
8511       if (GET_CODE (x) != SYMBOL_REF)
8512         return x;
8513
8514       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8515
8516       x = legitimize_tls_address (x, NULL_RTX);
8517
8518       if (addend)
8519         {
8520           x = gen_rtx_PLUS (SImode, x, addend);
8521           orig_x = x;
8522         }
8523       else
8524         return x;
8525     }
8526
8527   if (!TARGET_ARM)
8528     {
8529       /* TODO: legitimize_address for Thumb2.  */
8530       if (TARGET_THUMB2)
8531         return x;
8532       return thumb_legitimize_address (x, orig_x, mode);
8533     }
8534
8535   if (GET_CODE (x) == PLUS)
8536     {
8537       rtx xop0 = XEXP (x, 0);
8538       rtx xop1 = XEXP (x, 1);
8539
8540       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8541         xop0 = force_reg (SImode, xop0);
8542
8543       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8544           && !symbol_mentioned_p (xop1))
8545         xop1 = force_reg (SImode, xop1);
8546
8547       if (ARM_BASE_REGISTER_RTX_P (xop0)
8548           && CONST_INT_P (xop1))
8549         {
8550           HOST_WIDE_INT n, low_n;
8551           rtx base_reg, val;
8552           n = INTVAL (xop1);
8553
8554           /* VFP addressing modes actually allow greater offsets, but for
8555              now we just stick with the lowest common denominator.  */
8556           if (mode == DImode || mode == DFmode)
8557             {
8558               low_n = n & 0x0f;
8559               n &= ~0x0f;
8560               if (low_n > 4)
8561                 {
8562                   n += 16;
8563                   low_n -= 16;
8564                 }
8565             }
8566           else
8567             {
8568               low_n = ((mode) == TImode ? 0
8569                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8570               n -= low_n;
8571             }
8572
8573           base_reg = gen_reg_rtx (SImode);
8574           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8575           emit_move_insn (base_reg, val);
8576           x = plus_constant (Pmode, base_reg, low_n);
8577         }
8578       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8579         x = gen_rtx_PLUS (SImode, xop0, xop1);
8580     }
8581
8582   /* XXX We don't allow MINUS any more -- see comment in
8583      arm_legitimate_address_outer_p ().  */
8584   else if (GET_CODE (x) == MINUS)
8585     {
8586       rtx xop0 = XEXP (x, 0);
8587       rtx xop1 = XEXP (x, 1);
8588
8589       if (CONSTANT_P (xop0))
8590         xop0 = force_reg (SImode, xop0);
8591
8592       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8593         xop1 = force_reg (SImode, xop1);
8594
8595       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8596         x = gen_rtx_MINUS (SImode, xop0, xop1);
8597     }
8598
8599   /* Make sure to take full advantage of the pre-indexed addressing mode
8600      with absolute addresses which often allows for the base register to
8601      be factorized for multiple adjacent memory references, and it might
8602      even allows for the mini pool to be avoided entirely. */
8603   else if (CONST_INT_P (x) && optimize > 0)
8604     {
8605       unsigned int bits;
8606       HOST_WIDE_INT mask, base, index;
8607       rtx base_reg;
8608
8609       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8610          use a 8-bit index. So let's use a 12-bit index for SImode only and
8611          hope that arm_gen_constant will enable ldrb to use more bits. */
8612       bits = (mode == SImode) ? 12 : 8;
8613       mask = (1 << bits) - 1;
8614       base = INTVAL (x) & ~mask;
8615       index = INTVAL (x) & mask;
8616       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8617         {
8618           /* It'll most probably be more efficient to generate the base
8619              with more bits set and use a negative index instead. */
8620           base |= mask;
8621           index -= mask;
8622         }
8623       base_reg = force_reg (SImode, GEN_INT (base));
8624       x = plus_constant (Pmode, base_reg, index);
8625     }
8626
8627   if (flag_pic)
8628     {
8629       /* We need to find and carefully transform any SYMBOL and LABEL
8630          references; so go back to the original address expression.  */
8631       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8632
8633       if (new_x != orig_x)
8634         x = new_x;
8635     }
8636
8637   return x;
8638 }
8639
8640
8641 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8642    to be legitimate.  If we find one, return the new, valid address.  */
8643 rtx
8644 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8645 {
8646   if (GET_CODE (x) == PLUS
8647       && CONST_INT_P (XEXP (x, 1))
8648       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8649           || INTVAL (XEXP (x, 1)) < 0))
8650     {
8651       rtx xop0 = XEXP (x, 0);
8652       rtx xop1 = XEXP (x, 1);
8653       HOST_WIDE_INT offset = INTVAL (xop1);
8654
8655       /* Try and fold the offset into a biasing of the base register and
8656          then offsetting that.  Don't do this when optimizing for space
8657          since it can cause too many CSEs.  */
8658       if (optimize_size && offset >= 0
8659           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8660         {
8661           HOST_WIDE_INT delta;
8662
8663           if (offset >= 256)
8664             delta = offset - (256 - GET_MODE_SIZE (mode));
8665           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8666             delta = 31 * GET_MODE_SIZE (mode);
8667           else
8668             delta = offset & (~31 * GET_MODE_SIZE (mode));
8669
8670           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8671                                 NULL_RTX);
8672           x = plus_constant (Pmode, xop0, delta);
8673         }
8674       else if (offset < 0 && offset > -256)
8675         /* Small negative offsets are best done with a subtract before the
8676            dereference, forcing these into a register normally takes two
8677            instructions.  */
8678         x = force_operand (x, NULL_RTX);
8679       else
8680         {
8681           /* For the remaining cases, force the constant into a register.  */
8682           xop1 = force_reg (SImode, xop1);
8683           x = gen_rtx_PLUS (SImode, xop0, xop1);
8684         }
8685     }
8686   else if (GET_CODE (x) == PLUS
8687            && s_register_operand (XEXP (x, 1), SImode)
8688            && !s_register_operand (XEXP (x, 0), SImode))
8689     {
8690       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8691
8692       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8693     }
8694
8695   if (flag_pic)
8696     {
8697       /* We need to find and carefully transform any SYMBOL and LABEL
8698          references; so go back to the original address expression.  */
8699       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8700
8701       if (new_x != orig_x)
8702         x = new_x;
8703     }
8704
8705   return x;
8706 }
8707
8708 /* Return TRUE if X contains any TLS symbol references.  */
8709
8710 bool
8711 arm_tls_referenced_p (rtx x)
8712 {
8713   if (! TARGET_HAVE_TLS)
8714     return false;
8715
8716   subrtx_iterator::array_type array;
8717   FOR_EACH_SUBRTX (iter, array, x, ALL)
8718     {
8719       const_rtx x = *iter;
8720       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8721         {
8722           /* ARM currently does not provide relocations to encode TLS variables
8723              into AArch32 instructions, only data, so there is no way to
8724              currently implement these if a literal pool is disabled.  */
8725           if (arm_disable_literal_pool)
8726             sorry ("accessing thread-local storage is not currently supported "
8727                    "with -mpure-code or -mslow-flash-data");
8728
8729           return true;
8730         }
8731
8732       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8733          TLS offsets, not real symbol references.  */
8734       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8735         iter.skip_subrtxes ();
8736     }
8737   return false;
8738 }
8739
8740 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8741
8742    On the ARM, allow any integer (invalid ones are removed later by insn
8743    patterns), nice doubles and symbol_refs which refer to the function's
8744    constant pool XXX.
8745
8746    When generating pic allow anything.  */
8747
8748 static bool
8749 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8750 {
8751   return flag_pic || !label_mentioned_p (x);
8752 }
8753
8754 static bool
8755 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8756 {
8757   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8758      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8759      for ARMv8-M Baseline or later the result is valid.  */
8760   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8761     x = XEXP (x, 0);
8762
8763   return (CONST_INT_P (x)
8764           || CONST_DOUBLE_P (x)
8765           || CONSTANT_ADDRESS_P (x)
8766           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8767           || flag_pic);
8768 }
8769
8770 static bool
8771 arm_legitimate_constant_p (machine_mode mode, rtx x)
8772 {
8773   return (!arm_cannot_force_const_mem (mode, x)
8774           && (TARGET_32BIT
8775               ? arm_legitimate_constant_p_1 (mode, x)
8776               : thumb_legitimate_constant_p (mode, x)));
8777 }
8778
8779 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8780
8781 static bool
8782 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8783 {
8784   rtx base, offset;
8785
8786   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8787     {
8788       split_const (x, &base, &offset);
8789       if (GET_CODE (base) == SYMBOL_REF
8790           && !offset_within_block_p (base, INTVAL (offset)))
8791         return true;
8792     }
8793   return arm_tls_referenced_p (x);
8794 }
8795 \f
8796 #define REG_OR_SUBREG_REG(X)                                            \
8797   (REG_P (X)                                                    \
8798    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8799
8800 #define REG_OR_SUBREG_RTX(X)                    \
8801    (REG_P (X) ? (X) : SUBREG_REG (X))
8802
8803 static inline int
8804 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8805 {
8806   machine_mode mode = GET_MODE (x);
8807   int total, words;
8808
8809   switch (code)
8810     {
8811     case ASHIFT:
8812     case ASHIFTRT:
8813     case LSHIFTRT:
8814     case ROTATERT:
8815       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8816
8817     case PLUS:
8818     case MINUS:
8819     case COMPARE:
8820     case NEG:
8821     case NOT:
8822       return COSTS_N_INSNS (1);
8823
8824     case MULT:
8825       if (arm_arch6m && arm_m_profile_small_mul)
8826         return COSTS_N_INSNS (32);
8827
8828       if (CONST_INT_P (XEXP (x, 1)))
8829         {
8830           int cycles = 0;
8831           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8832
8833           while (i)
8834             {
8835               i >>= 2;
8836               cycles++;
8837             }
8838           return COSTS_N_INSNS (2) + cycles;
8839         }
8840       return COSTS_N_INSNS (1) + 16;
8841
8842     case SET:
8843       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8844          the mode.  */
8845       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8846       return (COSTS_N_INSNS (words)
8847               + 4 * ((MEM_P (SET_SRC (x)))
8848                      + MEM_P (SET_DEST (x))));
8849
8850     case CONST_INT:
8851       if (outer == SET)
8852         {
8853           if (UINTVAL (x) < 256
8854               /* 16-bit constant.  */
8855               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8856             return 0;
8857           if (thumb_shiftable_const (INTVAL (x)))
8858             return COSTS_N_INSNS (2);
8859           return COSTS_N_INSNS (3);
8860         }
8861       else if ((outer == PLUS || outer == COMPARE)
8862                && INTVAL (x) < 256 && INTVAL (x) > -256)
8863         return 0;
8864       else if ((outer == IOR || outer == XOR || outer == AND)
8865                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8866         return COSTS_N_INSNS (1);
8867       else if (outer == AND)
8868         {
8869           int i;
8870           /* This duplicates the tests in the andsi3 expander.  */
8871           for (i = 9; i <= 31; i++)
8872             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8873                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8874               return COSTS_N_INSNS (2);
8875         }
8876       else if (outer == ASHIFT || outer == ASHIFTRT
8877                || outer == LSHIFTRT)
8878         return 0;
8879       return COSTS_N_INSNS (2);
8880
8881     case CONST:
8882     case CONST_DOUBLE:
8883     case LABEL_REF:
8884     case SYMBOL_REF:
8885       return COSTS_N_INSNS (3);
8886
8887     case UDIV:
8888     case UMOD:
8889     case DIV:
8890     case MOD:
8891       return 100;
8892
8893     case TRUNCATE:
8894       return 99;
8895
8896     case AND:
8897     case XOR:
8898     case IOR:
8899       /* XXX guess.  */
8900       return 8;
8901
8902     case MEM:
8903       /* XXX another guess.  */
8904       /* Memory costs quite a lot for the first word, but subsequent words
8905          load at the equivalent of a single insn each.  */
8906       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8907               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8908                  ? 4 : 0));
8909
8910     case IF_THEN_ELSE:
8911       /* XXX a guess.  */
8912       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8913         return 14;
8914       return 2;
8915
8916     case SIGN_EXTEND:
8917     case ZERO_EXTEND:
8918       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8919       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8920
8921       if (mode == SImode)
8922         return total;
8923
8924       if (arm_arch6)
8925         return total + COSTS_N_INSNS (1);
8926
8927       /* Assume a two-shift sequence.  Increase the cost slightly so
8928          we prefer actual shifts over an extend operation.  */
8929       return total + 1 + COSTS_N_INSNS (2);
8930
8931     default:
8932       return 99;
8933     }
8934 }
8935
8936 /* Estimates the size cost of thumb1 instructions.
8937    For now most of the code is copied from thumb1_rtx_costs. We need more
8938    fine grain tuning when we have more related test cases.  */
8939 static inline int
8940 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8941 {
8942   machine_mode mode = GET_MODE (x);
8943   int words, cost;
8944
8945   switch (code)
8946     {
8947     case ASHIFT:
8948     case ASHIFTRT:
8949     case LSHIFTRT:
8950     case ROTATERT:
8951       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8952
8953     case PLUS:
8954     case MINUS:
8955       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8956          defined by RTL expansion, especially for the expansion of
8957          multiplication.  */
8958       if ((GET_CODE (XEXP (x, 0)) == MULT
8959            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8960           || (GET_CODE (XEXP (x, 1)) == MULT
8961               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8962         return COSTS_N_INSNS (2);
8963       /* Fall through.  */
8964     case COMPARE:
8965     case NEG:
8966     case NOT:
8967       return COSTS_N_INSNS (1);
8968
8969     case MULT:
8970       if (CONST_INT_P (XEXP (x, 1)))
8971         {
8972           /* Thumb1 mul instruction can't operate on const. We must Load it
8973              into a register first.  */
8974           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8975           /* For the targets which have a very small and high-latency multiply
8976              unit, we prefer to synthesize the mult with up to 5 instructions,
8977              giving a good balance between size and performance.  */
8978           if (arm_arch6m && arm_m_profile_small_mul)
8979             return COSTS_N_INSNS (5);
8980           else
8981             return COSTS_N_INSNS (1) + const_size;
8982         }
8983       return COSTS_N_INSNS (1);
8984
8985     case SET:
8986       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8987          the mode.  */
8988       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8989       cost = COSTS_N_INSNS (words);
8990       if (satisfies_constraint_J (SET_SRC (x))
8991           || satisfies_constraint_K (SET_SRC (x))
8992              /* Too big an immediate for a 2-byte mov, using MOVT.  */
8993           || (CONST_INT_P (SET_SRC (x))
8994               && UINTVAL (SET_SRC (x)) >= 256
8995               && TARGET_HAVE_MOVT
8996               && satisfies_constraint_j (SET_SRC (x)))
8997              /* thumb1_movdi_insn.  */
8998           || ((words > 1) && MEM_P (SET_SRC (x))))
8999         cost += COSTS_N_INSNS (1);
9000       return cost;
9001
9002     case CONST_INT:
9003       if (outer == SET)
9004         {
9005           if (UINTVAL (x) < 256)
9006             return COSTS_N_INSNS (1);
9007           /* movw is 4byte long.  */
9008           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9009             return COSTS_N_INSNS (2);
9010           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9011           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9012             return COSTS_N_INSNS (2);
9013           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9014           if (thumb_shiftable_const (INTVAL (x)))
9015             return COSTS_N_INSNS (2);
9016           return COSTS_N_INSNS (3);
9017         }
9018       else if ((outer == PLUS || outer == COMPARE)
9019                && INTVAL (x) < 256 && INTVAL (x) > -256)
9020         return 0;
9021       else if ((outer == IOR || outer == XOR || outer == AND)
9022                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9023         return COSTS_N_INSNS (1);
9024       else if (outer == AND)
9025         {
9026           int i;
9027           /* This duplicates the tests in the andsi3 expander.  */
9028           for (i = 9; i <= 31; i++)
9029             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9030                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9031               return COSTS_N_INSNS (2);
9032         }
9033       else if (outer == ASHIFT || outer == ASHIFTRT
9034                || outer == LSHIFTRT)
9035         return 0;
9036       return COSTS_N_INSNS (2);
9037
9038     case CONST:
9039     case CONST_DOUBLE:
9040     case LABEL_REF:
9041     case SYMBOL_REF:
9042       return COSTS_N_INSNS (3);
9043
9044     case UDIV:
9045     case UMOD:
9046     case DIV:
9047     case MOD:
9048       return 100;
9049
9050     case TRUNCATE:
9051       return 99;
9052
9053     case AND:
9054     case XOR:
9055     case IOR:
9056       return COSTS_N_INSNS (1);
9057
9058     case MEM:
9059       return (COSTS_N_INSNS (1)
9060               + COSTS_N_INSNS (1)
9061                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9062               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9063                  ? COSTS_N_INSNS (1) : 0));
9064
9065     case IF_THEN_ELSE:
9066       /* XXX a guess.  */
9067       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9068         return 14;
9069       return 2;
9070
9071     case ZERO_EXTEND:
9072       /* XXX still guessing.  */
9073       switch (GET_MODE (XEXP (x, 0)))
9074         {
9075           case E_QImode:
9076             return (1 + (mode == DImode ? 4 : 0)
9077                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9078
9079           case E_HImode:
9080             return (4 + (mode == DImode ? 4 : 0)
9081                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9082
9083           case E_SImode:
9084             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9085
9086           default:
9087             return 99;
9088         }
9089
9090     default:
9091       return 99;
9092     }
9093 }
9094
9095 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9096    operand, then return the operand that is being shifted.  If the shift
9097    is not by a constant, then set SHIFT_REG to point to the operand.
9098    Return NULL if OP is not a shifter operand.  */
9099 static rtx
9100 shifter_op_p (rtx op, rtx *shift_reg)
9101 {
9102   enum rtx_code code = GET_CODE (op);
9103
9104   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9105       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9106     return XEXP (op, 0);
9107   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9108     return XEXP (op, 0);
9109   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9110            || code == ASHIFTRT)
9111     {
9112       if (!CONST_INT_P (XEXP (op, 1)))
9113         *shift_reg = XEXP (op, 1);
9114       return XEXP (op, 0);
9115     }
9116
9117   return NULL;
9118 }
9119
9120 static bool
9121 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9122 {
9123   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9124   rtx_code code = GET_CODE (x);
9125   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9126
9127   switch (XINT (x, 1))
9128     {
9129     case UNSPEC_UNALIGNED_LOAD:
9130       /* We can only do unaligned loads into the integer unit, and we can't
9131          use LDM or LDRD.  */
9132       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9133       if (speed_p)
9134         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9135                   + extra_cost->ldst.load_unaligned);
9136
9137 #ifdef NOT_YET
9138       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9139                                  ADDR_SPACE_GENERIC, speed_p);
9140 #endif
9141       return true;
9142
9143     case UNSPEC_UNALIGNED_STORE:
9144       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9145       if (speed_p)
9146         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9147                   + extra_cost->ldst.store_unaligned);
9148
9149       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9150 #ifdef NOT_YET
9151       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9152                                  ADDR_SPACE_GENERIC, speed_p);
9153 #endif
9154       return true;
9155
9156     case UNSPEC_VRINTZ:
9157     case UNSPEC_VRINTP:
9158     case UNSPEC_VRINTM:
9159     case UNSPEC_VRINTR:
9160     case UNSPEC_VRINTX:
9161     case UNSPEC_VRINTA:
9162       if (speed_p)
9163         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9164
9165       return true;
9166     default:
9167       *cost = COSTS_N_INSNS (2);
9168       break;
9169     }
9170   return true;
9171 }
9172
9173 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9174    call (one insn for -Os) and then one for processing the result.  */
9175 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9176
9177 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9178         do                                                              \
9179           {                                                             \
9180             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9181             if (shift_op != NULL                                        \
9182                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9183               {                                                         \
9184                 if (shift_reg)                                          \
9185                   {                                                     \
9186                     if (speed_p)                                        \
9187                       *cost += extra_cost->alu.arith_shift_reg;         \
9188                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9189                                        ASHIFT, 1, speed_p);             \
9190                   }                                                     \
9191                 else if (speed_p)                                       \
9192                   *cost += extra_cost->alu.arith_shift;                 \
9193                                                                         \
9194                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9195                                     ASHIFT, 0, speed_p)                 \
9196                           + rtx_cost (XEXP (x, 1 - IDX),                \
9197                                       GET_MODE (shift_op),              \
9198                                       OP, 1, speed_p));                 \
9199                 return true;                                            \
9200               }                                                         \
9201           }                                                             \
9202         while (0);
9203
9204 /* RTX costs.  Make an estimate of the cost of executing the operation
9205    X, which is contained with an operation with code OUTER_CODE.
9206    SPEED_P indicates whether the cost desired is the performance cost,
9207    or the size cost.  The estimate is stored in COST and the return
9208    value is TRUE if the cost calculation is final, or FALSE if the
9209    caller should recurse through the operands of X to add additional
9210    costs.
9211
9212    We currently make no attempt to model the size savings of Thumb-2
9213    16-bit instructions.  At the normal points in compilation where
9214    this code is called we have no measure of whether the condition
9215    flags are live or not, and thus no realistic way to determine what
9216    the size will eventually be.  */
9217 static bool
9218 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9219                    const struct cpu_cost_table *extra_cost,
9220                    int *cost, bool speed_p)
9221 {
9222   machine_mode mode = GET_MODE (x);
9223
9224   *cost = COSTS_N_INSNS (1);
9225
9226   if (TARGET_THUMB1)
9227     {
9228       if (speed_p)
9229         *cost = thumb1_rtx_costs (x, code, outer_code);
9230       else
9231         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9232       return true;
9233     }
9234
9235   switch (code)
9236     {
9237     case SET:
9238       *cost = 0;
9239       /* SET RTXs don't have a mode so we get it from the destination.  */
9240       mode = GET_MODE (SET_DEST (x));
9241
9242       if (REG_P (SET_SRC (x))
9243           && REG_P (SET_DEST (x)))
9244         {
9245           /* Assume that most copies can be done with a single insn,
9246              unless we don't have HW FP, in which case everything
9247              larger than word mode will require two insns.  */
9248           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9249                                    && GET_MODE_SIZE (mode) > 4)
9250                                   || mode == DImode)
9251                                  ? 2 : 1);
9252           /* Conditional register moves can be encoded
9253              in 16 bits in Thumb mode.  */
9254           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9255             *cost >>= 1;
9256
9257           return true;
9258         }
9259
9260       if (CONST_INT_P (SET_SRC (x)))
9261         {
9262           /* Handle CONST_INT here, since the value doesn't have a mode
9263              and we would otherwise be unable to work out the true cost.  */
9264           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9265                             0, speed_p);
9266           outer_code = SET;
9267           /* Slightly lower the cost of setting a core reg to a constant.
9268              This helps break up chains and allows for better scheduling.  */
9269           if (REG_P (SET_DEST (x))
9270               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9271             *cost -= 1;
9272           x = SET_SRC (x);
9273           /* Immediate moves with an immediate in the range [0, 255] can be
9274              encoded in 16 bits in Thumb mode.  */
9275           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9276               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9277             *cost >>= 1;
9278           goto const_int_cost;
9279         }
9280
9281       return false;
9282
9283     case MEM:
9284       /* A memory access costs 1 insn if the mode is small, or the address is
9285          a single register, otherwise it costs one insn per word.  */
9286       if (REG_P (XEXP (x, 0)))
9287         *cost = COSTS_N_INSNS (1);
9288       else if (flag_pic
9289                && GET_CODE (XEXP (x, 0)) == PLUS
9290                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9291         /* This will be split into two instructions.
9292            See arm.md:calculate_pic_address.  */
9293         *cost = COSTS_N_INSNS (2);
9294       else
9295         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9296
9297       /* For speed optimizations, add the costs of the address and
9298          accessing memory.  */
9299       if (speed_p)
9300 #ifdef NOT_YET
9301         *cost += (extra_cost->ldst.load
9302                   + arm_address_cost (XEXP (x, 0), mode,
9303                                       ADDR_SPACE_GENERIC, speed_p));
9304 #else
9305         *cost += extra_cost->ldst.load;
9306 #endif
9307       return true;
9308
9309     case PARALLEL:
9310     {
9311    /* Calculations of LDM costs are complex.  We assume an initial cost
9312    (ldm_1st) which will load the number of registers mentioned in
9313    ldm_regs_per_insn_1st registers; then each additional
9314    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9315    formula for N regs is thus:
9316
9317    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9318                              + ldm_regs_per_insn_subsequent - 1)
9319                             / ldm_regs_per_insn_subsequent).
9320
9321    Additional costs may also be added for addressing.  A similar
9322    formula is used for STM.  */
9323
9324       bool is_ldm = load_multiple_operation (x, SImode);
9325       bool is_stm = store_multiple_operation (x, SImode);
9326
9327       if (is_ldm || is_stm)
9328         {
9329           if (speed_p)
9330             {
9331               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9332               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9333                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9334                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9335               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9336                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9337                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9338
9339               *cost += regs_per_insn_1st
9340                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9341                                             + regs_per_insn_sub - 1)
9342                                           / regs_per_insn_sub);
9343               return true;
9344             }
9345
9346         }
9347       return false;
9348     }
9349     case DIV:
9350     case UDIV:
9351       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9352           && (mode == SFmode || !TARGET_VFP_SINGLE))
9353         *cost += COSTS_N_INSNS (speed_p
9354                                ? extra_cost->fp[mode != SFmode].div : 0);
9355       else if (mode == SImode && TARGET_IDIV)
9356         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9357       else
9358         *cost = LIBCALL_COST (2);
9359
9360       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9361          possible udiv is prefered.  */
9362       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9363       return false;     /* All arguments must be in registers.  */
9364
9365     case MOD:
9366       /* MOD by a power of 2 can be expanded as:
9367          rsbs    r1, r0, #0
9368          and     r0, r0, #(n - 1)
9369          and     r1, r1, #(n - 1)
9370          rsbpl   r0, r1, #0.  */
9371       if (CONST_INT_P (XEXP (x, 1))
9372           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9373           && mode == SImode)
9374         {
9375           *cost += COSTS_N_INSNS (3);
9376
9377           if (speed_p)
9378             *cost += 2 * extra_cost->alu.logical
9379                      + extra_cost->alu.arith;
9380           return true;
9381         }
9382
9383     /* Fall-through.  */
9384     case UMOD:
9385       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9386          possible udiv is prefered.  */
9387       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9388       return false;     /* All arguments must be in registers.  */
9389
9390     case ROTATE:
9391       if (mode == SImode && REG_P (XEXP (x, 1)))
9392         {
9393           *cost += (COSTS_N_INSNS (1)
9394                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9395           if (speed_p)
9396             *cost += extra_cost->alu.shift_reg;
9397           return true;
9398         }
9399       /* Fall through */
9400     case ROTATERT:
9401     case ASHIFT:
9402     case LSHIFTRT:
9403     case ASHIFTRT:
9404       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9405         {
9406           *cost += (COSTS_N_INSNS (2)
9407                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9408           if (speed_p)
9409             *cost += 2 * extra_cost->alu.shift;
9410           return true;
9411         }
9412       else if (mode == SImode)
9413         {
9414           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9415           /* Slightly disparage register shifts at -Os, but not by much.  */
9416           if (!CONST_INT_P (XEXP (x, 1)))
9417             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9418                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9419           return true;
9420         }
9421       else if (GET_MODE_CLASS (mode) == MODE_INT
9422                && GET_MODE_SIZE (mode) < 4)
9423         {
9424           if (code == ASHIFT)
9425             {
9426               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9427               /* Slightly disparage register shifts at -Os, but not by
9428                  much.  */
9429               if (!CONST_INT_P (XEXP (x, 1)))
9430                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9431                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9432             }
9433           else if (code == LSHIFTRT || code == ASHIFTRT)
9434             {
9435               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9436                 {
9437                   /* Can use SBFX/UBFX.  */
9438                   if (speed_p)
9439                     *cost += extra_cost->alu.bfx;
9440                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9441                 }
9442               else
9443                 {
9444                   *cost += COSTS_N_INSNS (1);
9445                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9446                   if (speed_p)
9447                     {
9448                       if (CONST_INT_P (XEXP (x, 1)))
9449                         *cost += 2 * extra_cost->alu.shift;
9450                       else
9451                         *cost += (extra_cost->alu.shift
9452                                   + extra_cost->alu.shift_reg);
9453                     }
9454                   else
9455                     /* Slightly disparage register shifts.  */
9456                     *cost += !CONST_INT_P (XEXP (x, 1));
9457                 }
9458             }
9459           else /* Rotates.  */
9460             {
9461               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9462               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9463               if (speed_p)
9464                 {
9465                   if (CONST_INT_P (XEXP (x, 1)))
9466                     *cost += (2 * extra_cost->alu.shift
9467                               + extra_cost->alu.log_shift);
9468                   else
9469                     *cost += (extra_cost->alu.shift
9470                               + extra_cost->alu.shift_reg
9471                               + extra_cost->alu.log_shift_reg);
9472                 }
9473             }
9474           return true;
9475         }
9476
9477       *cost = LIBCALL_COST (2);
9478       return false;
9479
9480     case BSWAP:
9481       if (arm_arch6)
9482         {
9483           if (mode == SImode)
9484             {
9485               if (speed_p)
9486                 *cost += extra_cost->alu.rev;
9487
9488               return false;
9489             }
9490         }
9491       else
9492         {
9493         /* No rev instruction available.  Look at arm_legacy_rev
9494            and thumb_legacy_rev for the form of RTL used then.  */
9495           if (TARGET_THUMB)
9496             {
9497               *cost += COSTS_N_INSNS (9);
9498
9499               if (speed_p)
9500                 {
9501                   *cost += 6 * extra_cost->alu.shift;
9502                   *cost += 3 * extra_cost->alu.logical;
9503                 }
9504             }
9505           else
9506             {
9507               *cost += COSTS_N_INSNS (4);
9508
9509               if (speed_p)
9510                 {
9511                   *cost += 2 * extra_cost->alu.shift;
9512                   *cost += extra_cost->alu.arith_shift;
9513                   *cost += 2 * extra_cost->alu.logical;
9514                 }
9515             }
9516           return true;
9517         }
9518       return false;
9519
9520     case MINUS:
9521       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9522           && (mode == SFmode || !TARGET_VFP_SINGLE))
9523         {
9524           if (GET_CODE (XEXP (x, 0)) == MULT
9525               || GET_CODE (XEXP (x, 1)) == MULT)
9526             {
9527               rtx mul_op0, mul_op1, sub_op;
9528
9529               if (speed_p)
9530                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9531
9532               if (GET_CODE (XEXP (x, 0)) == MULT)
9533                 {
9534                   mul_op0 = XEXP (XEXP (x, 0), 0);
9535                   mul_op1 = XEXP (XEXP (x, 0), 1);
9536                   sub_op = XEXP (x, 1);
9537                 }
9538               else
9539                 {
9540                   mul_op0 = XEXP (XEXP (x, 1), 0);
9541                   mul_op1 = XEXP (XEXP (x, 1), 1);
9542                   sub_op = XEXP (x, 0);
9543                 }
9544
9545               /* The first operand of the multiply may be optionally
9546                  negated.  */
9547               if (GET_CODE (mul_op0) == NEG)
9548                 mul_op0 = XEXP (mul_op0, 0);
9549
9550               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9551                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9552                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9553
9554               return true;
9555             }
9556
9557           if (speed_p)
9558             *cost += extra_cost->fp[mode != SFmode].addsub;
9559           return false;
9560         }
9561
9562       if (mode == SImode)
9563         {
9564           rtx shift_by_reg = NULL;
9565           rtx shift_op;
9566           rtx non_shift_op;
9567
9568           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9569           if (shift_op == NULL)
9570             {
9571               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9572               non_shift_op = XEXP (x, 0);
9573             }
9574           else
9575             non_shift_op = XEXP (x, 1);
9576
9577           if (shift_op != NULL)
9578             {
9579               if (shift_by_reg != NULL)
9580                 {
9581                   if (speed_p)
9582                     *cost += extra_cost->alu.arith_shift_reg;
9583                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9584                 }
9585               else if (speed_p)
9586                 *cost += extra_cost->alu.arith_shift;
9587
9588               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9589               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9590               return true;
9591             }
9592
9593           if (arm_arch_thumb2
9594               && GET_CODE (XEXP (x, 1)) == MULT)
9595             {
9596               /* MLS.  */
9597               if (speed_p)
9598                 *cost += extra_cost->mult[0].add;
9599               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9600               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9601               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9602               return true;
9603             }
9604
9605           if (CONST_INT_P (XEXP (x, 0)))
9606             {
9607               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9608                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9609                                             NULL_RTX, 1, 0);
9610               *cost = COSTS_N_INSNS (insns);
9611               if (speed_p)
9612                 *cost += insns * extra_cost->alu.arith;
9613               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9614               return true;
9615             }
9616           else if (speed_p)
9617             *cost += extra_cost->alu.arith;
9618
9619           return false;
9620         }
9621
9622       if (GET_MODE_CLASS (mode) == MODE_INT
9623           && GET_MODE_SIZE (mode) < 4)
9624         {
9625           rtx shift_op, shift_reg;
9626           shift_reg = NULL;
9627
9628           /* We check both sides of the MINUS for shifter operands since,
9629              unlike PLUS, it's not commutative.  */
9630
9631           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9632           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9633
9634           /* Slightly disparage, as we might need to widen the result.  */
9635           *cost += 1;
9636           if (speed_p)
9637             *cost += extra_cost->alu.arith;
9638
9639           if (CONST_INT_P (XEXP (x, 0)))
9640             {
9641               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9642               return true;
9643             }
9644
9645           return false;
9646         }
9647
9648       if (mode == DImode)
9649         {
9650           *cost += COSTS_N_INSNS (1);
9651
9652           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9653             {
9654               rtx op1 = XEXP (x, 1);
9655
9656               if (speed_p)
9657                 *cost += 2 * extra_cost->alu.arith;
9658
9659               if (GET_CODE (op1) == ZERO_EXTEND)
9660                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9661                                    0, speed_p);
9662               else
9663                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9664               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9665                                  0, speed_p);
9666               return true;
9667             }
9668           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9669             {
9670               if (speed_p)
9671                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9672               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9673                                   0, speed_p)
9674                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9675               return true;
9676             }
9677           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9678                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9679             {
9680               if (speed_p)
9681                 *cost += (extra_cost->alu.arith
9682                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9683                              ? extra_cost->alu.arith
9684                              : extra_cost->alu.arith_shift));
9685               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9686                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9687                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9688               return true;
9689             }
9690
9691           if (speed_p)
9692             *cost += 2 * extra_cost->alu.arith;
9693           return false;
9694         }
9695
9696       /* Vector mode?  */
9697
9698       *cost = LIBCALL_COST (2);
9699       return false;
9700
9701     case PLUS:
9702       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9703           && (mode == SFmode || !TARGET_VFP_SINGLE))
9704         {
9705           if (GET_CODE (XEXP (x, 0)) == MULT)
9706             {
9707               rtx mul_op0, mul_op1, add_op;
9708
9709               if (speed_p)
9710                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9711
9712               mul_op0 = XEXP (XEXP (x, 0), 0);
9713               mul_op1 = XEXP (XEXP (x, 0), 1);
9714               add_op = XEXP (x, 1);
9715
9716               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9717                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9718                         + rtx_cost (add_op, mode, code, 0, speed_p));
9719
9720               return true;
9721             }
9722
9723           if (speed_p)
9724             *cost += extra_cost->fp[mode != SFmode].addsub;
9725           return false;
9726         }
9727       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9728         {
9729           *cost = LIBCALL_COST (2);
9730           return false;
9731         }
9732
9733         /* Narrow modes can be synthesized in SImode, but the range
9734            of useful sub-operations is limited.  Check for shift operations
9735            on one of the operands.  Only left shifts can be used in the
9736            narrow modes.  */
9737       if (GET_MODE_CLASS (mode) == MODE_INT
9738           && GET_MODE_SIZE (mode) < 4)
9739         {
9740           rtx shift_op, shift_reg;
9741           shift_reg = NULL;
9742
9743           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9744
9745           if (CONST_INT_P (XEXP (x, 1)))
9746             {
9747               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9748                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9749                                             NULL_RTX, 1, 0);
9750               *cost = COSTS_N_INSNS (insns);
9751               if (speed_p)
9752                 *cost += insns * extra_cost->alu.arith;
9753               /* Slightly penalize a narrow operation as the result may
9754                  need widening.  */
9755               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9756               return true;
9757             }
9758
9759           /* Slightly penalize a narrow operation as the result may
9760              need widening.  */
9761           *cost += 1;
9762           if (speed_p)
9763             *cost += extra_cost->alu.arith;
9764
9765           return false;
9766         }
9767
9768       if (mode == SImode)
9769         {
9770           rtx shift_op, shift_reg;
9771
9772           if (TARGET_INT_SIMD
9773               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9774                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9775             {
9776               /* UXTA[BH] or SXTA[BH].  */
9777               if (speed_p)
9778                 *cost += extra_cost->alu.extend_arith;
9779               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9780                                   0, speed_p)
9781                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9782               return true;
9783             }
9784
9785           shift_reg = NULL;
9786           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9787           if (shift_op != NULL)
9788             {
9789               if (shift_reg)
9790                 {
9791                   if (speed_p)
9792                     *cost += extra_cost->alu.arith_shift_reg;
9793                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9794                 }
9795               else if (speed_p)
9796                 *cost += extra_cost->alu.arith_shift;
9797
9798               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9799                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9800               return true;
9801             }
9802           if (GET_CODE (XEXP (x, 0)) == MULT)
9803             {
9804               rtx mul_op = XEXP (x, 0);
9805
9806               if (TARGET_DSP_MULTIPLY
9807                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9808                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9809                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9810                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9811                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9812                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9813                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9814                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9815                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9816                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9817                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9818                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9819                                       == 16))))))
9820                 {
9821                   /* SMLA[BT][BT].  */
9822                   if (speed_p)
9823                     *cost += extra_cost->mult[0].extend_add;
9824                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9825                                       SIGN_EXTEND, 0, speed_p)
9826                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9827                                         SIGN_EXTEND, 0, speed_p)
9828                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9829                   return true;
9830                 }
9831
9832               if (speed_p)
9833                 *cost += extra_cost->mult[0].add;
9834               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9835                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9836                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9837               return true;
9838             }
9839           if (CONST_INT_P (XEXP (x, 1)))
9840             {
9841               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9842                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9843                                             NULL_RTX, 1, 0);
9844               *cost = COSTS_N_INSNS (insns);
9845               if (speed_p)
9846                 *cost += insns * extra_cost->alu.arith;
9847               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9848               return true;
9849             }
9850           else if (speed_p)
9851             *cost += extra_cost->alu.arith;
9852
9853           return false;
9854         }
9855
9856       if (mode == DImode)
9857         {
9858           if (arm_arch3m
9859               && GET_CODE (XEXP (x, 0)) == MULT
9860               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9861                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9862                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9863                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9864             {
9865               if (speed_p)
9866                 *cost += extra_cost->mult[1].extend_add;
9867               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9868                                   ZERO_EXTEND, 0, speed_p)
9869                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9870                                     ZERO_EXTEND, 0, speed_p)
9871                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9872               return true;
9873             }
9874
9875           *cost += COSTS_N_INSNS (1);
9876
9877           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9878               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9879             {
9880               if (speed_p)
9881                 *cost += (extra_cost->alu.arith
9882                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9883                              ? extra_cost->alu.arith
9884                              : extra_cost->alu.arith_shift));
9885
9886               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9887                                   0, speed_p)
9888                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9889               return true;
9890             }
9891
9892           if (speed_p)
9893             *cost += 2 * extra_cost->alu.arith;
9894           return false;
9895         }
9896
9897       /* Vector mode?  */
9898       *cost = LIBCALL_COST (2);
9899       return false;
9900     case IOR:
9901       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9902         {
9903           if (speed_p)
9904             *cost += extra_cost->alu.rev;
9905
9906           return true;
9907         }
9908     /* Fall through.  */
9909     case AND: case XOR:
9910       if (mode == SImode)
9911         {
9912           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9913           rtx op0 = XEXP (x, 0);
9914           rtx shift_op, shift_reg;
9915
9916           if (subcode == NOT
9917               && (code == AND
9918                   || (code == IOR && TARGET_THUMB2)))
9919             op0 = XEXP (op0, 0);
9920
9921           shift_reg = NULL;
9922           shift_op = shifter_op_p (op0, &shift_reg);
9923           if (shift_op != NULL)
9924             {
9925               if (shift_reg)
9926                 {
9927                   if (speed_p)
9928                     *cost += extra_cost->alu.log_shift_reg;
9929                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9930                 }
9931               else if (speed_p)
9932                 *cost += extra_cost->alu.log_shift;
9933
9934               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9935                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9936               return true;
9937             }
9938
9939           if (CONST_INT_P (XEXP (x, 1)))
9940             {
9941               int insns = arm_gen_constant (code, SImode, NULL_RTX,
9942                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9943                                             NULL_RTX, 1, 0);
9944
9945               *cost = COSTS_N_INSNS (insns);
9946               if (speed_p)
9947                 *cost += insns * extra_cost->alu.logical;
9948               *cost += rtx_cost (op0, mode, code, 0, speed_p);
9949               return true;
9950             }
9951
9952           if (speed_p)
9953             *cost += extra_cost->alu.logical;
9954           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9955                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9956           return true;
9957         }
9958
9959       if (mode == DImode)
9960         {
9961           rtx op0 = XEXP (x, 0);
9962           enum rtx_code subcode = GET_CODE (op0);
9963
9964           *cost += COSTS_N_INSNS (1);
9965
9966           if (subcode == NOT
9967               && (code == AND
9968                   || (code == IOR && TARGET_THUMB2)))
9969             op0 = XEXP (op0, 0);
9970
9971           if (GET_CODE (op0) == ZERO_EXTEND)
9972             {
9973               if (speed_p)
9974                 *cost += 2 * extra_cost->alu.logical;
9975
9976               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9977                                   0, speed_p)
9978                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9979               return true;
9980             }
9981           else if (GET_CODE (op0) == SIGN_EXTEND)
9982             {
9983               if (speed_p)
9984                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9985
9986               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9987                                   0, speed_p)
9988                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9989               return true;
9990             }
9991
9992           if (speed_p)
9993             *cost += 2 * extra_cost->alu.logical;
9994
9995           return true;
9996         }
9997       /* Vector mode?  */
9998
9999       *cost = LIBCALL_COST (2);
10000       return false;
10001
10002     case MULT:
10003       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10004           && (mode == SFmode || !TARGET_VFP_SINGLE))
10005         {
10006           rtx op0 = XEXP (x, 0);
10007
10008           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10009             op0 = XEXP (op0, 0);
10010
10011           if (speed_p)
10012             *cost += extra_cost->fp[mode != SFmode].mult;
10013
10014           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10015                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10016           return true;
10017         }
10018       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10019         {
10020           *cost = LIBCALL_COST (2);
10021           return false;
10022         }
10023
10024       if (mode == SImode)
10025         {
10026           if (TARGET_DSP_MULTIPLY
10027               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10028                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10029                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10030                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10031                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10032                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10033                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10034                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10035                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10036                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10037                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10038                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10039                                   == 16))))))
10040             {
10041               /* SMUL[TB][TB].  */
10042               if (speed_p)
10043                 *cost += extra_cost->mult[0].extend;
10044               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10045                                  SIGN_EXTEND, 0, speed_p);
10046               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10047                                  SIGN_EXTEND, 1, speed_p);
10048               return true;
10049             }
10050           if (speed_p)
10051             *cost += extra_cost->mult[0].simple;
10052           return false;
10053         }
10054
10055       if (mode == DImode)
10056         {
10057           if (arm_arch3m
10058               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10059                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10060                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10061                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10062             {
10063               if (speed_p)
10064                 *cost += extra_cost->mult[1].extend;
10065               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10066                                   ZERO_EXTEND, 0, speed_p)
10067                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10068                                     ZERO_EXTEND, 0, speed_p));
10069               return true;
10070             }
10071
10072           *cost = LIBCALL_COST (2);
10073           return false;
10074         }
10075
10076       /* Vector mode?  */
10077       *cost = LIBCALL_COST (2);
10078       return false;
10079
10080     case NEG:
10081       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10082           && (mode == SFmode || !TARGET_VFP_SINGLE))
10083         {
10084           if (GET_CODE (XEXP (x, 0)) == MULT)
10085             {
10086               /* VNMUL.  */
10087               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10088               return true;
10089             }
10090
10091           if (speed_p)
10092             *cost += extra_cost->fp[mode != SFmode].neg;
10093
10094           return false;
10095         }
10096       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10097         {
10098           *cost = LIBCALL_COST (1);
10099           return false;
10100         }
10101
10102       if (mode == SImode)
10103         {
10104           if (GET_CODE (XEXP (x, 0)) == ABS)
10105             {
10106               *cost += COSTS_N_INSNS (1);
10107               /* Assume the non-flag-changing variant.  */
10108               if (speed_p)
10109                 *cost += (extra_cost->alu.log_shift
10110                           + extra_cost->alu.arith_shift);
10111               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10112               return true;
10113             }
10114
10115           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10116               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10117             {
10118               *cost += COSTS_N_INSNS (1);
10119               /* No extra cost for MOV imm and MVN imm.  */
10120               /* If the comparison op is using the flags, there's no further
10121                  cost, otherwise we need to add the cost of the comparison.  */
10122               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10123                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10124                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10125                 {
10126                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10127                   *cost += (COSTS_N_INSNS (1)
10128                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10129                                         0, speed_p)
10130                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10131                                         1, speed_p));
10132                   if (speed_p)
10133                     *cost += extra_cost->alu.arith;
10134                 }
10135               return true;
10136             }
10137
10138           if (speed_p)
10139             *cost += extra_cost->alu.arith;
10140           return false;
10141         }
10142
10143       if (GET_MODE_CLASS (mode) == MODE_INT
10144           && GET_MODE_SIZE (mode) < 4)
10145         {
10146           /* Slightly disparage, as we might need an extend operation.  */
10147           *cost += 1;
10148           if (speed_p)
10149             *cost += extra_cost->alu.arith;
10150           return false;
10151         }
10152
10153       if (mode == DImode)
10154         {
10155           *cost += COSTS_N_INSNS (1);
10156           if (speed_p)
10157             *cost += 2 * extra_cost->alu.arith;
10158           return false;
10159         }
10160
10161       /* Vector mode?  */
10162       *cost = LIBCALL_COST (1);
10163       return false;
10164
10165     case NOT:
10166       if (mode == SImode)
10167         {
10168           rtx shift_op;
10169           rtx shift_reg = NULL;
10170
10171           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10172
10173           if (shift_op)
10174             {
10175               if (shift_reg != NULL)
10176                 {
10177                   if (speed_p)
10178                     *cost += extra_cost->alu.log_shift_reg;
10179                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10180                 }
10181               else if (speed_p)
10182                 *cost += extra_cost->alu.log_shift;
10183               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10184               return true;
10185             }
10186
10187           if (speed_p)
10188             *cost += extra_cost->alu.logical;
10189           return false;
10190         }
10191       if (mode == DImode)
10192         {
10193           *cost += COSTS_N_INSNS (1);
10194           return false;
10195         }
10196
10197       /* Vector mode?  */
10198
10199       *cost += LIBCALL_COST (1);
10200       return false;
10201
10202     case IF_THEN_ELSE:
10203       {
10204         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10205           {
10206             *cost += COSTS_N_INSNS (3);
10207             return true;
10208           }
10209         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10210         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10211
10212         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10213         /* Assume that if one arm of the if_then_else is a register,
10214            that it will be tied with the result and eliminate the
10215            conditional insn.  */
10216         if (REG_P (XEXP (x, 1)))
10217           *cost += op2cost;
10218         else if (REG_P (XEXP (x, 2)))
10219           *cost += op1cost;
10220         else
10221           {
10222             if (speed_p)
10223               {
10224                 if (extra_cost->alu.non_exec_costs_exec)
10225                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10226                 else
10227                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10228               }
10229             else
10230               *cost += op1cost + op2cost;
10231           }
10232       }
10233       return true;
10234
10235     case COMPARE:
10236       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10237         *cost = 0;
10238       else
10239         {
10240           machine_mode op0mode;
10241           /* We'll mostly assume that the cost of a compare is the cost of the
10242              LHS.  However, there are some notable exceptions.  */
10243
10244           /* Floating point compares are never done as side-effects.  */
10245           op0mode = GET_MODE (XEXP (x, 0));
10246           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10247               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10248             {
10249               if (speed_p)
10250                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10251
10252               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10253                 {
10254                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10255                   return true;
10256                 }
10257
10258               return false;
10259             }
10260           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10261             {
10262               *cost = LIBCALL_COST (2);
10263               return false;
10264             }
10265
10266           /* DImode compares normally take two insns.  */
10267           if (op0mode == DImode)
10268             {
10269               *cost += COSTS_N_INSNS (1);
10270               if (speed_p)
10271                 *cost += 2 * extra_cost->alu.arith;
10272               return false;
10273             }
10274
10275           if (op0mode == SImode)
10276             {
10277               rtx shift_op;
10278               rtx shift_reg;
10279
10280               if (XEXP (x, 1) == const0_rtx
10281                   && !(REG_P (XEXP (x, 0))
10282                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10283                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10284                 {
10285                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10286
10287                   /* Multiply operations that set the flags are often
10288                      significantly more expensive.  */
10289                   if (speed_p
10290                       && GET_CODE (XEXP (x, 0)) == MULT
10291                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10292                     *cost += extra_cost->mult[0].flag_setting;
10293
10294                   if (speed_p
10295                       && GET_CODE (XEXP (x, 0)) == PLUS
10296                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10297                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10298                                                             0), 1), mode))
10299                     *cost += extra_cost->mult[0].flag_setting;
10300                   return true;
10301                 }
10302
10303               shift_reg = NULL;
10304               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10305               if (shift_op != NULL)
10306                 {
10307                   if (shift_reg != NULL)
10308                     {
10309                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10310                                          1, speed_p);
10311                       if (speed_p)
10312                         *cost += extra_cost->alu.arith_shift_reg;
10313                     }
10314                   else if (speed_p)
10315                     *cost += extra_cost->alu.arith_shift;
10316                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10317                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10318                   return true;
10319                 }
10320
10321               if (speed_p)
10322                 *cost += extra_cost->alu.arith;
10323               if (CONST_INT_P (XEXP (x, 1))
10324                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10325                 {
10326                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10327                   return true;
10328                 }
10329               return false;
10330             }
10331
10332           /* Vector mode?  */
10333
10334           *cost = LIBCALL_COST (2);
10335           return false;
10336         }
10337       return true;
10338
10339     case EQ:
10340     case NE:
10341     case LT:
10342     case LE:
10343     case GT:
10344     case GE:
10345     case LTU:
10346     case LEU:
10347     case GEU:
10348     case GTU:
10349     case ORDERED:
10350     case UNORDERED:
10351     case UNEQ:
10352     case UNLE:
10353     case UNLT:
10354     case UNGE:
10355     case UNGT:
10356     case LTGT:
10357       if (outer_code == SET)
10358         {
10359           /* Is it a store-flag operation?  */
10360           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10361               && XEXP (x, 1) == const0_rtx)
10362             {
10363               /* Thumb also needs an IT insn.  */
10364               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10365               return true;
10366             }
10367           if (XEXP (x, 1) == const0_rtx)
10368             {
10369               switch (code)
10370                 {
10371                 case LT:
10372                   /* LSR Rd, Rn, #31.  */
10373                   if (speed_p)
10374                     *cost += extra_cost->alu.shift;
10375                   break;
10376
10377                 case EQ:
10378                   /* RSBS T1, Rn, #0
10379                      ADC  Rd, Rn, T1.  */
10380
10381                 case NE:
10382                   /* SUBS T1, Rn, #1
10383                      SBC  Rd, Rn, T1.  */
10384                   *cost += COSTS_N_INSNS (1);
10385                   break;
10386
10387                 case LE:
10388                   /* RSBS T1, Rn, Rn, LSR #31
10389                      ADC  Rd, Rn, T1. */
10390                   *cost += COSTS_N_INSNS (1);
10391                   if (speed_p)
10392                     *cost += extra_cost->alu.arith_shift;
10393                   break;
10394
10395                 case GT:
10396                   /* RSB  Rd, Rn, Rn, ASR #1
10397                      LSR  Rd, Rd, #31.  */
10398                   *cost += COSTS_N_INSNS (1);
10399                   if (speed_p)
10400                     *cost += (extra_cost->alu.arith_shift
10401                               + extra_cost->alu.shift);
10402                   break;
10403
10404                 case GE:
10405                   /* ASR  Rd, Rn, #31
10406                      ADD  Rd, Rn, #1.  */
10407                   *cost += COSTS_N_INSNS (1);
10408                   if (speed_p)
10409                     *cost += extra_cost->alu.shift;
10410                   break;
10411
10412                 default:
10413                   /* Remaining cases are either meaningless or would take
10414                      three insns anyway.  */
10415                   *cost = COSTS_N_INSNS (3);
10416                   break;
10417                 }
10418               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10419               return true;
10420             }
10421           else
10422             {
10423               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10424               if (CONST_INT_P (XEXP (x, 1))
10425                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10426                 {
10427                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10428                   return true;
10429                 }
10430
10431               return false;
10432             }
10433         }
10434       /* Not directly inside a set.  If it involves the condition code
10435          register it must be the condition for a branch, cond_exec or
10436          I_T_E operation.  Since the comparison is performed elsewhere
10437          this is just the control part which has no additional
10438          cost.  */
10439       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10440                && XEXP (x, 1) == const0_rtx)
10441         {
10442           *cost = 0;
10443           return true;
10444         }
10445       return false;
10446
10447     case ABS:
10448       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10449           && (mode == SFmode || !TARGET_VFP_SINGLE))
10450         {
10451           if (speed_p)
10452             *cost += extra_cost->fp[mode != SFmode].neg;
10453
10454           return false;
10455         }
10456       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10457         {
10458           *cost = LIBCALL_COST (1);
10459           return false;
10460         }
10461
10462       if (mode == SImode)
10463         {
10464           if (speed_p)
10465             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10466           return false;
10467         }
10468       /* Vector mode?  */
10469       *cost = LIBCALL_COST (1);
10470       return false;
10471
10472     case SIGN_EXTEND:
10473       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10474           && MEM_P (XEXP (x, 0)))
10475         {
10476           if (mode == DImode)
10477             *cost += COSTS_N_INSNS (1);
10478
10479           if (!speed_p)
10480             return true;
10481
10482           if (GET_MODE (XEXP (x, 0)) == SImode)
10483             *cost += extra_cost->ldst.load;
10484           else
10485             *cost += extra_cost->ldst.load_sign_extend;
10486
10487           if (mode == DImode)
10488             *cost += extra_cost->alu.shift;
10489
10490           return true;
10491         }
10492
10493       /* Widening from less than 32-bits requires an extend operation.  */
10494       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10495         {
10496           /* We have SXTB/SXTH.  */
10497           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10498           if (speed_p)
10499             *cost += extra_cost->alu.extend;
10500         }
10501       else if (GET_MODE (XEXP (x, 0)) != SImode)
10502         {
10503           /* Needs two shifts.  */
10504           *cost += COSTS_N_INSNS (1);
10505           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10506           if (speed_p)
10507             *cost += 2 * extra_cost->alu.shift;
10508         }
10509
10510       /* Widening beyond 32-bits requires one more insn.  */
10511       if (mode == DImode)
10512         {
10513           *cost += COSTS_N_INSNS (1);
10514           if (speed_p)
10515             *cost += extra_cost->alu.shift;
10516         }
10517
10518       return true;
10519
10520     case ZERO_EXTEND:
10521       if ((arm_arch4
10522            || GET_MODE (XEXP (x, 0)) == SImode
10523            || GET_MODE (XEXP (x, 0)) == QImode)
10524           && MEM_P (XEXP (x, 0)))
10525         {
10526           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10527
10528           if (mode == DImode)
10529             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10530
10531           return true;
10532         }
10533
10534       /* Widening from less than 32-bits requires an extend operation.  */
10535       if (GET_MODE (XEXP (x, 0)) == QImode)
10536         {
10537           /* UXTB can be a shorter instruction in Thumb2, but it might
10538              be slower than the AND Rd, Rn, #255 alternative.  When
10539              optimizing for speed it should never be slower to use
10540              AND, and we don't really model 16-bit vs 32-bit insns
10541              here.  */
10542           if (speed_p)
10543             *cost += extra_cost->alu.logical;
10544         }
10545       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10546         {
10547           /* We have UXTB/UXTH.  */
10548           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10549           if (speed_p)
10550             *cost += extra_cost->alu.extend;
10551         }
10552       else if (GET_MODE (XEXP (x, 0)) != SImode)
10553         {
10554           /* Needs two shifts.  It's marginally preferable to use
10555              shifts rather than two BIC instructions as the second
10556              shift may merge with a subsequent insn as a shifter
10557              op.  */
10558           *cost = COSTS_N_INSNS (2);
10559           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10560           if (speed_p)
10561             *cost += 2 * extra_cost->alu.shift;
10562         }
10563
10564       /* Widening beyond 32-bits requires one more insn.  */
10565       if (mode == DImode)
10566         {
10567           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10568         }
10569
10570       return true;
10571
10572     case CONST_INT:
10573       *cost = 0;
10574       /* CONST_INT has no mode, so we cannot tell for sure how many
10575          insns are really going to be needed.  The best we can do is
10576          look at the value passed.  If it fits in SImode, then assume
10577          that's the mode it will be used for.  Otherwise assume it
10578          will be used in DImode.  */
10579       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10580         mode = SImode;
10581       else
10582         mode = DImode;
10583
10584       /* Avoid blowing up in arm_gen_constant ().  */
10585       if (!(outer_code == PLUS
10586             || outer_code == AND
10587             || outer_code == IOR
10588             || outer_code == XOR
10589             || outer_code == MINUS))
10590         outer_code = SET;
10591
10592     const_int_cost:
10593       if (mode == SImode)
10594         {
10595           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10596                                                     INTVAL (x), NULL, NULL,
10597                                                     0, 0));
10598           /* Extra costs?  */
10599         }
10600       else
10601         {
10602           *cost += COSTS_N_INSNS (arm_gen_constant
10603                                   (outer_code, SImode, NULL,
10604                                    trunc_int_for_mode (INTVAL (x), SImode),
10605                                    NULL, NULL, 0, 0)
10606                                   + arm_gen_constant (outer_code, SImode, NULL,
10607                                                       INTVAL (x) >> 32, NULL,
10608                                                       NULL, 0, 0));
10609           /* Extra costs?  */
10610         }
10611
10612       return true;
10613
10614     case CONST:
10615     case LABEL_REF:
10616     case SYMBOL_REF:
10617       if (speed_p)
10618         {
10619           if (arm_arch_thumb2 && !flag_pic)
10620             *cost += COSTS_N_INSNS (1);
10621           else
10622             *cost += extra_cost->ldst.load;
10623         }
10624       else
10625         *cost += COSTS_N_INSNS (1);
10626
10627       if (flag_pic)
10628         {
10629           *cost += COSTS_N_INSNS (1);
10630           if (speed_p)
10631             *cost += extra_cost->alu.arith;
10632         }
10633
10634       return true;
10635
10636     case CONST_FIXED:
10637       *cost = COSTS_N_INSNS (4);
10638       /* Fixme.  */
10639       return true;
10640
10641     case CONST_DOUBLE:
10642       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10643           && (mode == SFmode || !TARGET_VFP_SINGLE))
10644         {
10645           if (vfp3_const_double_rtx (x))
10646             {
10647               if (speed_p)
10648                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10649               return true;
10650             }
10651
10652           if (speed_p)
10653             {
10654               if (mode == DFmode)
10655                 *cost += extra_cost->ldst.loadd;
10656               else
10657                 *cost += extra_cost->ldst.loadf;
10658             }
10659           else
10660             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10661
10662           return true;
10663         }
10664       *cost = COSTS_N_INSNS (4);
10665       return true;
10666
10667     case CONST_VECTOR:
10668       /* Fixme.  */
10669       if (TARGET_NEON
10670           && TARGET_HARD_FLOAT
10671           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10672           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10673         *cost = COSTS_N_INSNS (1);
10674       else
10675         *cost = COSTS_N_INSNS (4);
10676       return true;
10677
10678     case HIGH:
10679     case LO_SUM:
10680       /* When optimizing for size, we prefer constant pool entries to
10681          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10682       if (!speed_p)
10683         *cost += 1;
10684       return true;
10685
10686     case CLZ:
10687       if (speed_p)
10688         *cost += extra_cost->alu.clz;
10689       return false;
10690
10691     case SMIN:
10692       if (XEXP (x, 1) == const0_rtx)
10693         {
10694           if (speed_p)
10695             *cost += extra_cost->alu.log_shift;
10696           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10697           return true;
10698         }
10699       /* Fall through.  */
10700     case SMAX:
10701     case UMIN:
10702     case UMAX:
10703       *cost += COSTS_N_INSNS (1);
10704       return false;
10705
10706     case TRUNCATE:
10707       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10708           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10709           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10710           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10711           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10712                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10713               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10714                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10715                       == ZERO_EXTEND))))
10716         {
10717           if (speed_p)
10718             *cost += extra_cost->mult[1].extend;
10719           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10720                               ZERO_EXTEND, 0, speed_p)
10721                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10722                                 ZERO_EXTEND, 0, speed_p));
10723           return true;
10724         }
10725       *cost = LIBCALL_COST (1);
10726       return false;
10727
10728     case UNSPEC_VOLATILE:
10729     case UNSPEC:
10730       return arm_unspec_cost (x, outer_code, speed_p, cost);
10731
10732     case PC:
10733       /* Reading the PC is like reading any other register.  Writing it
10734          is more expensive, but we take that into account elsewhere.  */
10735       *cost = 0;
10736       return true;
10737
10738     case ZERO_EXTRACT:
10739       /* TODO: Simple zero_extract of bottom bits using AND.  */
10740       /* Fall through.  */
10741     case SIGN_EXTRACT:
10742       if (arm_arch6
10743           && mode == SImode
10744           && CONST_INT_P (XEXP (x, 1))
10745           && CONST_INT_P (XEXP (x, 2)))
10746         {
10747           if (speed_p)
10748             *cost += extra_cost->alu.bfx;
10749           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10750           return true;
10751         }
10752       /* Without UBFX/SBFX, need to resort to shift operations.  */
10753       *cost += COSTS_N_INSNS (1);
10754       if (speed_p)
10755         *cost += 2 * extra_cost->alu.shift;
10756       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10757       return true;
10758
10759     case FLOAT_EXTEND:
10760       if (TARGET_HARD_FLOAT)
10761         {
10762           if (speed_p)
10763             *cost += extra_cost->fp[mode == DFmode].widen;
10764           if (!TARGET_VFP5
10765               && GET_MODE (XEXP (x, 0)) == HFmode)
10766             {
10767               /* Pre v8, widening HF->DF is a two-step process, first
10768                  widening to SFmode.  */
10769               *cost += COSTS_N_INSNS (1);
10770               if (speed_p)
10771                 *cost += extra_cost->fp[0].widen;
10772             }
10773           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10774           return true;
10775         }
10776
10777       *cost = LIBCALL_COST (1);
10778       return false;
10779
10780     case FLOAT_TRUNCATE:
10781       if (TARGET_HARD_FLOAT)
10782         {
10783           if (speed_p)
10784             *cost += extra_cost->fp[mode == DFmode].narrow;
10785           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10786           return true;
10787           /* Vector modes?  */
10788         }
10789       *cost = LIBCALL_COST (1);
10790       return false;
10791
10792     case FMA:
10793       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10794         {
10795           rtx op0 = XEXP (x, 0);
10796           rtx op1 = XEXP (x, 1);
10797           rtx op2 = XEXP (x, 2);
10798
10799
10800           /* vfms or vfnma.  */
10801           if (GET_CODE (op0) == NEG)
10802             op0 = XEXP (op0, 0);
10803
10804           /* vfnms or vfnma.  */
10805           if (GET_CODE (op2) == NEG)
10806             op2 = XEXP (op2, 0);
10807
10808           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10809           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10810           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10811
10812           if (speed_p)
10813             *cost += extra_cost->fp[mode ==DFmode].fma;
10814
10815           return true;
10816         }
10817
10818       *cost = LIBCALL_COST (3);
10819       return false;
10820
10821     case FIX:
10822     case UNSIGNED_FIX:
10823       if (TARGET_HARD_FLOAT)
10824         {
10825           /* The *combine_vcvtf2i reduces a vmul+vcvt into
10826              a vcvt fixed-point conversion.  */
10827           if (code == FIX && mode == SImode
10828               && GET_CODE (XEXP (x, 0)) == FIX
10829               && GET_MODE (XEXP (x, 0)) == SFmode
10830               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10831               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10832                  > 0)
10833             {
10834               if (speed_p)
10835                 *cost += extra_cost->fp[0].toint;
10836
10837               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10838                                  code, 0, speed_p);
10839               return true;
10840             }
10841
10842           if (GET_MODE_CLASS (mode) == MODE_INT)
10843             {
10844               mode = GET_MODE (XEXP (x, 0));
10845               if (speed_p)
10846                 *cost += extra_cost->fp[mode == DFmode].toint;
10847               /* Strip of the 'cost' of rounding towards zero.  */
10848               if (GET_CODE (XEXP (x, 0)) == FIX)
10849                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10850                                    0, speed_p);
10851               else
10852                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10853               /* ??? Increase the cost to deal with transferring from
10854                  FP -> CORE registers?  */
10855               return true;
10856             }
10857           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10858                    && TARGET_VFP5)
10859             {
10860               if (speed_p)
10861                 *cost += extra_cost->fp[mode == DFmode].roundint;
10862               return false;
10863             }
10864           /* Vector costs? */
10865         }
10866       *cost = LIBCALL_COST (1);
10867       return false;
10868
10869     case FLOAT:
10870     case UNSIGNED_FLOAT:
10871       if (TARGET_HARD_FLOAT)
10872         {
10873           /* ??? Increase the cost to deal with transferring from CORE
10874              -> FP registers?  */
10875           if (speed_p)
10876             *cost += extra_cost->fp[mode == DFmode].fromint;
10877           return false;
10878         }
10879       *cost = LIBCALL_COST (1);
10880       return false;
10881
10882     case CALL:
10883       return true;
10884
10885     case ASM_OPERANDS:
10886       {
10887       /* Just a guess.  Guess number of instructions in the asm
10888          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
10889          though (see PR60663).  */
10890         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10891         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10892
10893         *cost = COSTS_N_INSNS (asm_length + num_operands);
10894         return true;
10895       }
10896     default:
10897       if (mode != VOIDmode)
10898         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10899       else
10900         *cost = COSTS_N_INSNS (4); /* Who knows?  */
10901       return false;
10902     }
10903 }
10904
10905 #undef HANDLE_NARROW_SHIFT_ARITH
10906
10907 /* RTX costs entry point.  */
10908
10909 static bool
10910 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10911                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10912 {
10913   bool result;
10914   int code = GET_CODE (x);
10915   gcc_assert (current_tune->insn_extra_cost);
10916
10917   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
10918                                 (enum rtx_code) outer_code,
10919                                 current_tune->insn_extra_cost,
10920                                 total, speed);
10921
10922   if (dump_file && (dump_flags & TDF_DETAILS))
10923     {
10924       print_rtl_single (dump_file, x);
10925       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10926                *total, result ? "final" : "partial");
10927     }
10928   return result;
10929 }
10930
10931 /* All address computations that can be done are free, but rtx cost returns
10932    the same for practically all of them.  So we weight the different types
10933    of address here in the order (most pref first):
10934    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
10935 static inline int
10936 arm_arm_address_cost (rtx x)
10937 {
10938   enum rtx_code c  = GET_CODE (x);
10939
10940   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10941     return 0;
10942   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10943     return 10;
10944
10945   if (c == PLUS)
10946     {
10947       if (CONST_INT_P (XEXP (x, 1)))
10948         return 2;
10949
10950       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10951         return 3;
10952
10953       return 4;
10954     }
10955
10956   return 6;
10957 }
10958
10959 static inline int
10960 arm_thumb_address_cost (rtx x)
10961 {
10962   enum rtx_code c  = GET_CODE (x);
10963
10964   if (c == REG)
10965     return 1;
10966   if (c == PLUS
10967       && REG_P (XEXP (x, 0))
10968       && CONST_INT_P (XEXP (x, 1)))
10969     return 1;
10970
10971   return 2;
10972 }
10973
10974 static int
10975 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10976                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10977 {
10978   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10979 }
10980
10981 /* Adjust cost hook for XScale.  */
10982 static bool
10983 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10984                           int * cost)
10985 {
10986   /* Some true dependencies can have a higher cost depending
10987      on precisely how certain input operands are used.  */
10988   if (dep_type == 0
10989       && recog_memoized (insn) >= 0
10990       && recog_memoized (dep) >= 0)
10991     {
10992       int shift_opnum = get_attr_shift (insn);
10993       enum attr_type attr_type = get_attr_type (dep);
10994
10995       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10996          operand for INSN.  If we have a shifted input operand and the
10997          instruction we depend on is another ALU instruction, then we may
10998          have to account for an additional stall.  */
10999       if (shift_opnum != 0
11000           && (attr_type == TYPE_ALU_SHIFT_IMM
11001               || attr_type == TYPE_ALUS_SHIFT_IMM
11002               || attr_type == TYPE_LOGIC_SHIFT_IMM
11003               || attr_type == TYPE_LOGICS_SHIFT_IMM
11004               || attr_type == TYPE_ALU_SHIFT_REG
11005               || attr_type == TYPE_ALUS_SHIFT_REG
11006               || attr_type == TYPE_LOGIC_SHIFT_REG
11007               || attr_type == TYPE_LOGICS_SHIFT_REG
11008               || attr_type == TYPE_MOV_SHIFT
11009               || attr_type == TYPE_MVN_SHIFT
11010               || attr_type == TYPE_MOV_SHIFT_REG
11011               || attr_type == TYPE_MVN_SHIFT_REG))
11012         {
11013           rtx shifted_operand;
11014           int opno;
11015
11016           /* Get the shifted operand.  */
11017           extract_insn (insn);
11018           shifted_operand = recog_data.operand[shift_opnum];
11019
11020           /* Iterate over all the operands in DEP.  If we write an operand
11021              that overlaps with SHIFTED_OPERAND, then we have increase the
11022              cost of this dependency.  */
11023           extract_insn (dep);
11024           preprocess_constraints (dep);
11025           for (opno = 0; opno < recog_data.n_operands; opno++)
11026             {
11027               /* We can ignore strict inputs.  */
11028               if (recog_data.operand_type[opno] == OP_IN)
11029                 continue;
11030
11031               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11032                                            shifted_operand))
11033                 {
11034                   *cost = 2;
11035                   return false;
11036                 }
11037             }
11038         }
11039     }
11040   return true;
11041 }
11042
11043 /* Adjust cost hook for Cortex A9.  */
11044 static bool
11045 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11046                              int * cost)
11047 {
11048   switch (dep_type)
11049     {
11050     case REG_DEP_ANTI:
11051       *cost = 0;
11052       return false;
11053
11054     case REG_DEP_TRUE:
11055     case REG_DEP_OUTPUT:
11056         if (recog_memoized (insn) >= 0
11057             && recog_memoized (dep) >= 0)
11058           {
11059             if (GET_CODE (PATTERN (insn)) == SET)
11060               {
11061                 if (GET_MODE_CLASS
11062                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11063                   || GET_MODE_CLASS
11064                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11065                   {
11066                     enum attr_type attr_type_insn = get_attr_type (insn);
11067                     enum attr_type attr_type_dep = get_attr_type (dep);
11068
11069                     /* By default all dependencies of the form
11070                        s0 = s0 <op> s1
11071                        s0 = s0 <op> s2
11072                        have an extra latency of 1 cycle because
11073                        of the input and output dependency in this
11074                        case. However this gets modeled as an true
11075                        dependency and hence all these checks.  */
11076                     if (REG_P (SET_DEST (PATTERN (insn)))
11077                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11078                       {
11079                         /* FMACS is a special case where the dependent
11080                            instruction can be issued 3 cycles before
11081                            the normal latency in case of an output
11082                            dependency.  */
11083                         if ((attr_type_insn == TYPE_FMACS
11084                              || attr_type_insn == TYPE_FMACD)
11085                             && (attr_type_dep == TYPE_FMACS
11086                                 || attr_type_dep == TYPE_FMACD))
11087                           {
11088                             if (dep_type == REG_DEP_OUTPUT)
11089                               *cost = insn_default_latency (dep) - 3;
11090                             else
11091                               *cost = insn_default_latency (dep);
11092                             return false;
11093                           }
11094                         else
11095                           {
11096                             if (dep_type == REG_DEP_OUTPUT)
11097                               *cost = insn_default_latency (dep) + 1;
11098                             else
11099                               *cost = insn_default_latency (dep);
11100                           }
11101                         return false;
11102                       }
11103                   }
11104               }
11105           }
11106         break;
11107
11108     default:
11109       gcc_unreachable ();
11110     }
11111
11112   return true;
11113 }
11114
11115 /* Adjust cost hook for FA726TE.  */
11116 static bool
11117 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11118                            int * cost)
11119 {
11120   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11121      have penalty of 3.  */
11122   if (dep_type == REG_DEP_TRUE
11123       && recog_memoized (insn) >= 0
11124       && recog_memoized (dep) >= 0
11125       && get_attr_conds (dep) == CONDS_SET)
11126     {
11127       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11128       if (get_attr_conds (insn) == CONDS_USE
11129           && get_attr_type (insn) != TYPE_BRANCH)
11130         {
11131           *cost = 3;
11132           return false;
11133         }
11134
11135       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11136           || get_attr_conds (insn) == CONDS_USE)
11137         {
11138           *cost = 0;
11139           return false;
11140         }
11141     }
11142
11143   return true;
11144 }
11145
11146 /* Implement TARGET_REGISTER_MOVE_COST.
11147
11148    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11149    it is typically more expensive than a single memory access.  We set
11150    the cost to less than two memory accesses so that floating
11151    point to integer conversion does not go through memory.  */
11152
11153 int
11154 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11155                         reg_class_t from, reg_class_t to)
11156 {
11157   if (TARGET_32BIT)
11158     {
11159       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11160           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11161         return 15;
11162       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11163                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11164         return 4;
11165       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11166         return 20;
11167       else
11168         return 2;
11169     }
11170   else
11171     {
11172       if (from == HI_REGS || to == HI_REGS)
11173         return 4;
11174       else
11175         return 2;
11176     }
11177 }
11178
11179 /* Implement TARGET_MEMORY_MOVE_COST.  */
11180
11181 int
11182 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11183                       bool in ATTRIBUTE_UNUSED)
11184 {
11185   if (TARGET_32BIT)
11186     return 10;
11187   else
11188     {
11189       if (GET_MODE_SIZE (mode) < 4)
11190         return 8;
11191       else
11192         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11193     }
11194 }
11195
11196 /* Vectorizer cost model implementation.  */
11197
11198 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11199 static int
11200 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11201                                 tree vectype,
11202                                 int misalign ATTRIBUTE_UNUSED)
11203 {
11204   unsigned elements;
11205
11206   switch (type_of_cost)
11207     {
11208       case scalar_stmt:
11209         return current_tune->vec_costs->scalar_stmt_cost;
11210
11211       case scalar_load:
11212         return current_tune->vec_costs->scalar_load_cost;
11213
11214       case scalar_store:
11215         return current_tune->vec_costs->scalar_store_cost;
11216
11217       case vector_stmt:
11218         return current_tune->vec_costs->vec_stmt_cost;
11219
11220       case vector_load:
11221         return current_tune->vec_costs->vec_align_load_cost;
11222
11223       case vector_store:
11224         return current_tune->vec_costs->vec_store_cost;
11225
11226       case vec_to_scalar:
11227         return current_tune->vec_costs->vec_to_scalar_cost;
11228
11229       case scalar_to_vec:
11230         return current_tune->vec_costs->scalar_to_vec_cost;
11231
11232       case unaligned_load:
11233         return current_tune->vec_costs->vec_unalign_load_cost;
11234
11235       case unaligned_store:
11236         return current_tune->vec_costs->vec_unalign_store_cost;
11237
11238       case cond_branch_taken:
11239         return current_tune->vec_costs->cond_taken_branch_cost;
11240
11241       case cond_branch_not_taken:
11242         return current_tune->vec_costs->cond_not_taken_branch_cost;
11243
11244       case vec_perm:
11245       case vec_promote_demote:
11246         return current_tune->vec_costs->vec_stmt_cost;
11247
11248       case vec_construct:
11249         elements = TYPE_VECTOR_SUBPARTS (vectype);
11250         return elements / 2 + 1;
11251
11252       default:
11253         gcc_unreachable ();
11254     }
11255 }
11256
11257 /* Implement targetm.vectorize.add_stmt_cost.  */
11258
11259 static unsigned
11260 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11261                    struct _stmt_vec_info *stmt_info, int misalign,
11262                    enum vect_cost_model_location where)
11263 {
11264   unsigned *cost = (unsigned *) data;
11265   unsigned retval = 0;
11266
11267   if (flag_vect_cost_model)
11268     {
11269       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11270       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11271
11272       /* Statements in an inner loop relative to the loop being
11273          vectorized are weighted more heavily.  The value here is
11274          arbitrary and could potentially be improved with analysis.  */
11275       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11276         count *= 50;  /* FIXME.  */
11277
11278       retval = (unsigned) (count * stmt_cost);
11279       cost[where] += retval;
11280     }
11281
11282   return retval;
11283 }
11284
11285 /* Return true if and only if this insn can dual-issue only as older.  */
11286 static bool
11287 cortexa7_older_only (rtx_insn *insn)
11288 {
11289   if (recog_memoized (insn) < 0)
11290     return false;
11291
11292   switch (get_attr_type (insn))
11293     {
11294     case TYPE_ALU_DSP_REG:
11295     case TYPE_ALU_SREG:
11296     case TYPE_ALUS_SREG:
11297     case TYPE_LOGIC_REG:
11298     case TYPE_LOGICS_REG:
11299     case TYPE_ADC_REG:
11300     case TYPE_ADCS_REG:
11301     case TYPE_ADR:
11302     case TYPE_BFM:
11303     case TYPE_REV:
11304     case TYPE_MVN_REG:
11305     case TYPE_SHIFT_IMM:
11306     case TYPE_SHIFT_REG:
11307     case TYPE_LOAD_BYTE:
11308     case TYPE_LOAD1:
11309     case TYPE_STORE1:
11310     case TYPE_FFARITHS:
11311     case TYPE_FADDS:
11312     case TYPE_FFARITHD:
11313     case TYPE_FADDD:
11314     case TYPE_FMOV:
11315     case TYPE_F_CVT:
11316     case TYPE_FCMPS:
11317     case TYPE_FCMPD:
11318     case TYPE_FCONSTS:
11319     case TYPE_FCONSTD:
11320     case TYPE_FMULS:
11321     case TYPE_FMACS:
11322     case TYPE_FMULD:
11323     case TYPE_FMACD:
11324     case TYPE_FDIVS:
11325     case TYPE_FDIVD:
11326     case TYPE_F_MRC:
11327     case TYPE_F_MRRC:
11328     case TYPE_F_FLAG:
11329     case TYPE_F_LOADS:
11330     case TYPE_F_STORES:
11331       return true;
11332     default:
11333       return false;
11334     }
11335 }
11336
11337 /* Return true if and only if this insn can dual-issue as younger.  */
11338 static bool
11339 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11340 {
11341   if (recog_memoized (insn) < 0)
11342     {
11343       if (verbose > 5)
11344         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11345       return false;
11346     }
11347
11348   switch (get_attr_type (insn))
11349     {
11350     case TYPE_ALU_IMM:
11351     case TYPE_ALUS_IMM:
11352     case TYPE_LOGIC_IMM:
11353     case TYPE_LOGICS_IMM:
11354     case TYPE_EXTEND:
11355     case TYPE_MVN_IMM:
11356     case TYPE_MOV_IMM:
11357     case TYPE_MOV_REG:
11358     case TYPE_MOV_SHIFT:
11359     case TYPE_MOV_SHIFT_REG:
11360     case TYPE_BRANCH:
11361     case TYPE_CALL:
11362       return true;
11363     default:
11364       return false;
11365     }
11366 }
11367
11368
11369 /* Look for an instruction that can dual issue only as an older
11370    instruction, and move it in front of any instructions that can
11371    dual-issue as younger, while preserving the relative order of all
11372    other instructions in the ready list.  This is a hueuristic to help
11373    dual-issue in later cycles, by postponing issue of more flexible
11374    instructions.  This heuristic may affect dual issue opportunities
11375    in the current cycle.  */
11376 static void
11377 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11378                         int *n_readyp, int clock)
11379 {
11380   int i;
11381   int first_older_only = -1, first_younger = -1;
11382
11383   if (verbose > 5)
11384     fprintf (file,
11385              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11386              clock,
11387              *n_readyp);
11388
11389   /* Traverse the ready list from the head (the instruction to issue
11390      first), and looking for the first instruction that can issue as
11391      younger and the first instruction that can dual-issue only as
11392      older.  */
11393   for (i = *n_readyp - 1; i >= 0; i--)
11394     {
11395       rtx_insn *insn = ready[i];
11396       if (cortexa7_older_only (insn))
11397         {
11398           first_older_only = i;
11399           if (verbose > 5)
11400             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11401           break;
11402         }
11403       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11404         first_younger = i;
11405     }
11406
11407   /* Nothing to reorder because either no younger insn found or insn
11408      that can dual-issue only as older appears before any insn that
11409      can dual-issue as younger.  */
11410   if (first_younger == -1)
11411     {
11412       if (verbose > 5)
11413         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11414       return;
11415     }
11416
11417   /* Nothing to reorder because no older-only insn in the ready list.  */
11418   if (first_older_only == -1)
11419     {
11420       if (verbose > 5)
11421         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11422       return;
11423     }
11424
11425   /* Move first_older_only insn before first_younger.  */
11426   if (verbose > 5)
11427     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11428              INSN_UID(ready [first_older_only]),
11429              INSN_UID(ready [first_younger]));
11430   rtx_insn *first_older_only_insn = ready [first_older_only];
11431   for (i = first_older_only; i < first_younger; i++)
11432     {
11433       ready[i] = ready[i+1];
11434     }
11435
11436   ready[i] = first_older_only_insn;
11437   return;
11438 }
11439
11440 /* Implement TARGET_SCHED_REORDER. */
11441 static int
11442 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11443                    int clock)
11444 {
11445   switch (arm_tune)
11446     {
11447     case TARGET_CPU_cortexa7:
11448       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11449       break;
11450     default:
11451       /* Do nothing for other cores.  */
11452       break;
11453     }
11454
11455   return arm_issue_rate ();
11456 }
11457
11458 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11459    It corrects the value of COST based on the relationship between
11460    INSN and DEP through the dependence LINK.  It returns the new
11461    value. There is a per-core adjust_cost hook to adjust scheduler costs
11462    and the per-core hook can choose to completely override the generic
11463    adjust_cost function. Only put bits of code into arm_adjust_cost that
11464    are common across all cores.  */
11465 static int
11466 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11467                  unsigned int)
11468 {
11469   rtx i_pat, d_pat;
11470
11471  /* When generating Thumb-1 code, we want to place flag-setting operations
11472     close to a conditional branch which depends on them, so that we can
11473     omit the comparison. */
11474   if (TARGET_THUMB1
11475       && dep_type == 0
11476       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11477       && recog_memoized (dep) >= 0
11478       && get_attr_conds (dep) == CONDS_SET)
11479     return 0;
11480
11481   if (current_tune->sched_adjust_cost != NULL)
11482     {
11483       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11484         return cost;
11485     }
11486
11487   /* XXX Is this strictly true?  */
11488   if (dep_type == REG_DEP_ANTI
11489       || dep_type == REG_DEP_OUTPUT)
11490     return 0;
11491
11492   /* Call insns don't incur a stall, even if they follow a load.  */
11493   if (dep_type == 0
11494       && CALL_P (insn))
11495     return 1;
11496
11497   if ((i_pat = single_set (insn)) != NULL
11498       && MEM_P (SET_SRC (i_pat))
11499       && (d_pat = single_set (dep)) != NULL
11500       && MEM_P (SET_DEST (d_pat)))
11501     {
11502       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11503       /* This is a load after a store, there is no conflict if the load reads
11504          from a cached area.  Assume that loads from the stack, and from the
11505          constant pool are cached, and that others will miss.  This is a
11506          hack.  */
11507
11508       if ((GET_CODE (src_mem) == SYMBOL_REF
11509            && CONSTANT_POOL_ADDRESS_P (src_mem))
11510           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11511           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11512           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11513         return 1;
11514     }
11515
11516   return cost;
11517 }
11518
11519 int
11520 arm_max_conditional_execute (void)
11521 {
11522   return max_insns_skipped;
11523 }
11524
11525 static int
11526 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11527 {
11528   if (TARGET_32BIT)
11529     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11530   else
11531     return (optimize > 0) ? 2 : 0;
11532 }
11533
11534 static int
11535 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11536 {
11537   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11538 }
11539
11540 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11541    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11542    sequences of non-executed instructions in IT blocks probably take the same
11543    amount of time as executed instructions (and the IT instruction itself takes
11544    space in icache).  This function was experimentally determined to give good
11545    results on a popular embedded benchmark.  */
11546
11547 static int
11548 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11549 {
11550   return (TARGET_32BIT && speed_p) ? 1
11551          : arm_default_branch_cost (speed_p, predictable_p);
11552 }
11553
11554 static int
11555 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11556 {
11557   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11558 }
11559
11560 static bool fp_consts_inited = false;
11561
11562 static REAL_VALUE_TYPE value_fp0;
11563
11564 static void
11565 init_fp_table (void)
11566 {
11567   REAL_VALUE_TYPE r;
11568
11569   r = REAL_VALUE_ATOF ("0", DFmode);
11570   value_fp0 = r;
11571   fp_consts_inited = true;
11572 }
11573
11574 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11575 int
11576 arm_const_double_rtx (rtx x)
11577 {
11578   const REAL_VALUE_TYPE *r;
11579
11580   if (!fp_consts_inited)
11581     init_fp_table ();
11582
11583   r = CONST_DOUBLE_REAL_VALUE (x);
11584   if (REAL_VALUE_MINUS_ZERO (*r))
11585     return 0;
11586
11587   if (real_equal (r, &value_fp0))
11588     return 1;
11589
11590   return 0;
11591 }
11592
11593 /* VFPv3 has a fairly wide range of representable immediates, formed from
11594    "quarter-precision" floating-point values. These can be evaluated using this
11595    formula (with ^ for exponentiation):
11596
11597      -1^s * n * 2^-r
11598
11599    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11600    16 <= n <= 31 and 0 <= r <= 7.
11601
11602    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11603
11604      - A (most-significant) is the sign bit.
11605      - BCD are the exponent (encoded as r XOR 3).
11606      - EFGH are the mantissa (encoded as n - 16).
11607 */
11608
11609 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11610    fconst[sd] instruction, or -1 if X isn't suitable.  */
11611 static int
11612 vfp3_const_double_index (rtx x)
11613 {
11614   REAL_VALUE_TYPE r, m;
11615   int sign, exponent;
11616   unsigned HOST_WIDE_INT mantissa, mant_hi;
11617   unsigned HOST_WIDE_INT mask;
11618   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11619   bool fail;
11620
11621   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11622     return -1;
11623
11624   r = *CONST_DOUBLE_REAL_VALUE (x);
11625
11626   /* We can't represent these things, so detect them first.  */
11627   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11628     return -1;
11629
11630   /* Extract sign, exponent and mantissa.  */
11631   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11632   r = real_value_abs (&r);
11633   exponent = REAL_EXP (&r);
11634   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11635      highest (sign) bit, with a fixed binary point at bit point_pos.
11636      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11637      bits for the mantissa, this may fail (low bits would be lost).  */
11638   real_ldexp (&m, &r, point_pos - exponent);
11639   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11640   mantissa = w.elt (0);
11641   mant_hi = w.elt (1);
11642
11643   /* If there are bits set in the low part of the mantissa, we can't
11644      represent this value.  */
11645   if (mantissa != 0)
11646     return -1;
11647
11648   /* Now make it so that mantissa contains the most-significant bits, and move
11649      the point_pos to indicate that the least-significant bits have been
11650      discarded.  */
11651   point_pos -= HOST_BITS_PER_WIDE_INT;
11652   mantissa = mant_hi;
11653
11654   /* We can permit four significant bits of mantissa only, plus a high bit
11655      which is always 1.  */
11656   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11657   if ((mantissa & mask) != 0)
11658     return -1;
11659
11660   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11661   mantissa >>= point_pos - 5;
11662
11663   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11664      floating-point immediate zero with Neon using an integer-zero load, but
11665      that case is handled elsewhere.)  */
11666   if (mantissa == 0)
11667     return -1;
11668
11669   gcc_assert (mantissa >= 16 && mantissa <= 31);
11670
11671   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11672      normalized significands are in the range [1, 2). (Our mantissa is shifted
11673      left 4 places at this point relative to normalized IEEE754 values).  GCC
11674      internally uses [0.5, 1) (see real.c), so the exponent returned from
11675      REAL_EXP must be altered.  */
11676   exponent = 5 - exponent;
11677
11678   if (exponent < 0 || exponent > 7)
11679     return -1;
11680
11681   /* Sign, mantissa and exponent are now in the correct form to plug into the
11682      formula described in the comment above.  */
11683   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11684 }
11685
11686 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11687 int
11688 vfp3_const_double_rtx (rtx x)
11689 {
11690   if (!TARGET_VFP3)
11691     return 0;
11692
11693   return vfp3_const_double_index (x) != -1;
11694 }
11695
11696 /* Recognize immediates which can be used in various Neon instructions. Legal
11697    immediates are described by the following table (for VMVN variants, the
11698    bitwise inverse of the constant shown is recognized. In either case, VMOV
11699    is output and the correct instruction to use for a given constant is chosen
11700    by the assembler). The constant shown is replicated across all elements of
11701    the destination vector.
11702
11703    insn elems variant constant (binary)
11704    ---- ----- ------- -----------------
11705    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11706    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11707    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11708    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11709    vmov  i16     4    00000000 abcdefgh
11710    vmov  i16     5    abcdefgh 00000000
11711    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11712    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11713    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11714    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11715    vmvn  i16    10    00000000 abcdefgh
11716    vmvn  i16    11    abcdefgh 00000000
11717    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11718    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11719    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11720    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11721    vmov   i8    16    abcdefgh
11722    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11723                       eeeeeeee ffffffff gggggggg hhhhhhhh
11724    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11725    vmov  f32    19    00000000 00000000 00000000 00000000
11726
11727    For case 18, B = !b. Representable values are exactly those accepted by
11728    vfp3_const_double_index, but are output as floating-point numbers rather
11729    than indices.
11730
11731    For case 19, we will change it to vmov.i32 when assembling.
11732
11733    Variants 0-5 (inclusive) may also be used as immediates for the second
11734    operand of VORR/VBIC instructions.
11735
11736    The INVERSE argument causes the bitwise inverse of the given operand to be
11737    recognized instead (used for recognizing legal immediates for the VAND/VORN
11738    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11739    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11740    output, rather than the real insns vbic/vorr).
11741
11742    INVERSE makes no difference to the recognition of float vectors.
11743
11744    The return value is the variant of immediate as shown in the above table, or
11745    -1 if the given value doesn't match any of the listed patterns.
11746 */
11747 static int
11748 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11749                       rtx *modconst, int *elementwidth)
11750 {
11751 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11752   matches = 1;                                  \
11753   for (i = 0; i < idx; i += (STRIDE))           \
11754     if (!(TEST))                                \
11755       matches = 0;                              \
11756   if (matches)                                  \
11757     {                                           \
11758       immtype = (CLASS);                        \
11759       elsize = (ELSIZE);                        \
11760       break;                                    \
11761     }
11762
11763   unsigned int i, elsize = 0, idx = 0, n_elts;
11764   unsigned int innersize;
11765   unsigned char bytes[16];
11766   int immtype = -1, matches;
11767   unsigned int invmask = inverse ? 0xff : 0;
11768   bool vector = GET_CODE (op) == CONST_VECTOR;
11769
11770   if (vector)
11771     n_elts = CONST_VECTOR_NUNITS (op);
11772   else
11773     {
11774       n_elts = 1;
11775       if (mode == VOIDmode)
11776         mode = DImode;
11777     }
11778
11779   innersize = GET_MODE_UNIT_SIZE (mode);
11780
11781   /* Vectors of float constants.  */
11782   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11783     {
11784       rtx el0 = CONST_VECTOR_ELT (op, 0);
11785
11786       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11787         return -1;
11788
11789       /* FP16 vectors cannot be represented.  */
11790       if (GET_MODE_INNER (mode) == HFmode)
11791         return -1;
11792
11793       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11794          are distinct in this context.  */
11795       if (!const_vec_duplicate_p (op))
11796         return -1;
11797
11798       if (modconst)
11799         *modconst = CONST_VECTOR_ELT (op, 0);
11800
11801       if (elementwidth)
11802         *elementwidth = 0;
11803
11804       if (el0 == CONST0_RTX (GET_MODE (el0)))
11805         return 19;
11806       else
11807         return 18;
11808     }
11809
11810   /* The tricks done in the code below apply for little-endian vector layout.
11811      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11812      FIXME: Implement logic for big-endian vectors.  */
11813   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11814     return -1;
11815
11816   /* Splat vector constant out into a byte vector.  */
11817   for (i = 0; i < n_elts; i++)
11818     {
11819       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11820       unsigned HOST_WIDE_INT elpart;
11821
11822       gcc_assert (CONST_INT_P (el));
11823       elpart = INTVAL (el);
11824
11825       for (unsigned int byte = 0; byte < innersize; byte++)
11826         {
11827           bytes[idx++] = (elpart & 0xff) ^ invmask;
11828           elpart >>= BITS_PER_UNIT;
11829         }
11830     }
11831
11832   /* Sanity check.  */
11833   gcc_assert (idx == GET_MODE_SIZE (mode));
11834
11835   do
11836     {
11837       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11838                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11839
11840       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11841                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11842
11843       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11844                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11845
11846       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11847                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11848
11849       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11850
11851       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11852
11853       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11854                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11855
11856       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11857                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11858
11859       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11860                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11861
11862       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11863                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11864
11865       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11866
11867       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11868
11869       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11870                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11871
11872       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11873                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11874
11875       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11876                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11877
11878       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11879                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11880
11881       CHECK (1, 8, 16, bytes[i] == bytes[0]);
11882
11883       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11884                         && bytes[i] == bytes[(i + 8) % idx]);
11885     }
11886   while (0);
11887
11888   if (immtype == -1)
11889     return -1;
11890
11891   if (elementwidth)
11892     *elementwidth = elsize;
11893
11894   if (modconst)
11895     {
11896       unsigned HOST_WIDE_INT imm = 0;
11897
11898       /* Un-invert bytes of recognized vector, if necessary.  */
11899       if (invmask != 0)
11900         for (i = 0; i < idx; i++)
11901           bytes[i] ^= invmask;
11902
11903       if (immtype == 17)
11904         {
11905           /* FIXME: Broken on 32-bit H_W_I hosts.  */
11906           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11907
11908           for (i = 0; i < 8; i++)
11909             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11910                    << (i * BITS_PER_UNIT);
11911
11912           *modconst = GEN_INT (imm);
11913         }
11914       else
11915         {
11916           unsigned HOST_WIDE_INT imm = 0;
11917
11918           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11919             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11920
11921           *modconst = GEN_INT (imm);
11922         }
11923     }
11924
11925   return immtype;
11926 #undef CHECK
11927 }
11928
11929 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11930    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11931    float elements), and a modified constant (whatever should be output for a
11932    VMOV) in *MODCONST.  */
11933
11934 int
11935 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11936                                rtx *modconst, int *elementwidth)
11937 {
11938   rtx tmpconst;
11939   int tmpwidth;
11940   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11941
11942   if (retval == -1)
11943     return 0;
11944
11945   if (modconst)
11946     *modconst = tmpconst;
11947
11948   if (elementwidth)
11949     *elementwidth = tmpwidth;
11950
11951   return 1;
11952 }
11953
11954 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
11955    the immediate is valid, write a constant suitable for using as an operand
11956    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11957    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
11958
11959 int
11960 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11961                                 rtx *modconst, int *elementwidth)
11962 {
11963   rtx tmpconst;
11964   int tmpwidth;
11965   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11966
11967   if (retval < 0 || retval > 5)
11968     return 0;
11969
11970   if (modconst)
11971     *modconst = tmpconst;
11972
11973   if (elementwidth)
11974     *elementwidth = tmpwidth;
11975
11976   return 1;
11977 }
11978
11979 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
11980    the immediate is valid, write a constant suitable for using as an operand
11981    to VSHR/VSHL to *MODCONST and the corresponding element width to
11982    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11983    because they have different limitations.  */
11984
11985 int
11986 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11987                                 rtx *modconst, int *elementwidth,
11988                                 bool isleftshift)
11989 {
11990   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11991   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11992   unsigned HOST_WIDE_INT last_elt = 0;
11993   unsigned HOST_WIDE_INT maxshift;
11994
11995   /* Split vector constant out into a byte vector.  */
11996   for (i = 0; i < n_elts; i++)
11997     {
11998       rtx el = CONST_VECTOR_ELT (op, i);
11999       unsigned HOST_WIDE_INT elpart;
12000
12001       if (CONST_INT_P (el))
12002         elpart = INTVAL (el);
12003       else if (CONST_DOUBLE_P (el))
12004         return 0;
12005       else
12006         gcc_unreachable ();
12007
12008       if (i != 0 && elpart != last_elt)
12009         return 0;
12010
12011       last_elt = elpart;
12012     }
12013
12014   /* Shift less than element size.  */
12015   maxshift = innersize * 8;
12016
12017   if (isleftshift)
12018     {
12019       /* Left shift immediate value can be from 0 to <size>-1.  */
12020       if (last_elt >= maxshift)
12021         return 0;
12022     }
12023   else
12024     {
12025       /* Right shift immediate value can be from 1 to <size>.  */
12026       if (last_elt == 0 || last_elt > maxshift)
12027         return 0;
12028     }
12029
12030   if (elementwidth)
12031     *elementwidth = innersize * 8;
12032
12033   if (modconst)
12034     *modconst = CONST_VECTOR_ELT (op, 0);
12035
12036   return 1;
12037 }
12038
12039 /* Return a string suitable for output of Neon immediate logic operation
12040    MNEM.  */
12041
12042 char *
12043 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12044                              int inverse, int quad)
12045 {
12046   int width, is_valid;
12047   static char templ[40];
12048
12049   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12050
12051   gcc_assert (is_valid != 0);
12052
12053   if (quad)
12054     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12055   else
12056     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12057
12058   return templ;
12059 }
12060
12061 /* Return a string suitable for output of Neon immediate shift operation
12062    (VSHR or VSHL) MNEM.  */
12063
12064 char *
12065 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12066                              machine_mode mode, int quad,
12067                              bool isleftshift)
12068 {
12069   int width, is_valid;
12070   static char templ[40];
12071
12072   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12073   gcc_assert (is_valid != 0);
12074
12075   if (quad)
12076     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12077   else
12078     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12079
12080   return templ;
12081 }
12082
12083 /* Output a sequence of pairwise operations to implement a reduction.
12084    NOTE: We do "too much work" here, because pairwise operations work on two
12085    registers-worth of operands in one go. Unfortunately we can't exploit those
12086    extra calculations to do the full operation in fewer steps, I don't think.
12087    Although all vector elements of the result but the first are ignored, we
12088    actually calculate the same result in each of the elements. An alternative
12089    such as initially loading a vector with zero to use as each of the second
12090    operands would use up an additional register and take an extra instruction,
12091    for no particular gain.  */
12092
12093 void
12094 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12095                       rtx (*reduc) (rtx, rtx, rtx))
12096 {
12097   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12098   rtx tmpsum = op1;
12099
12100   for (i = parts / 2; i >= 1; i /= 2)
12101     {
12102       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12103       emit_insn (reduc (dest, tmpsum, tmpsum));
12104       tmpsum = dest;
12105     }
12106 }
12107
12108 /* If VALS is a vector constant that can be loaded into a register
12109    using VDUP, generate instructions to do so and return an RTX to
12110    assign to the register.  Otherwise return NULL_RTX.  */
12111
12112 static rtx
12113 neon_vdup_constant (rtx vals)
12114 {
12115   machine_mode mode = GET_MODE (vals);
12116   machine_mode inner_mode = GET_MODE_INNER (mode);
12117   rtx x;
12118
12119   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12120     return NULL_RTX;
12121
12122   if (!const_vec_duplicate_p (vals, &x))
12123     /* The elements are not all the same.  We could handle repeating
12124        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12125        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12126        vdup.i16).  */
12127     return NULL_RTX;
12128
12129   /* We can load this constant by using VDUP and a constant in a
12130      single ARM register.  This will be cheaper than a vector
12131      load.  */
12132
12133   x = copy_to_mode_reg (inner_mode, x);
12134   return gen_rtx_VEC_DUPLICATE (mode, x);
12135 }
12136
12137 /* Generate code to load VALS, which is a PARALLEL containing only
12138    constants (for vec_init) or CONST_VECTOR, efficiently into a
12139    register.  Returns an RTX to copy into the register, or NULL_RTX
12140    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12141
12142 rtx
12143 neon_make_constant (rtx vals)
12144 {
12145   machine_mode mode = GET_MODE (vals);
12146   rtx target;
12147   rtx const_vec = NULL_RTX;
12148   int n_elts = GET_MODE_NUNITS (mode);
12149   int n_const = 0;
12150   int i;
12151
12152   if (GET_CODE (vals) == CONST_VECTOR)
12153     const_vec = vals;
12154   else if (GET_CODE (vals) == PARALLEL)
12155     {
12156       /* A CONST_VECTOR must contain only CONST_INTs and
12157          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12158          Only store valid constants in a CONST_VECTOR.  */
12159       for (i = 0; i < n_elts; ++i)
12160         {
12161           rtx x = XVECEXP (vals, 0, i);
12162           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12163             n_const++;
12164         }
12165       if (n_const == n_elts)
12166         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12167     }
12168   else
12169     gcc_unreachable ();
12170
12171   if (const_vec != NULL
12172       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12173     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12174     return const_vec;
12175   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12176     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12177        pipeline cycle; creating the constant takes one or two ARM
12178        pipeline cycles.  */
12179     return target;
12180   else if (const_vec != NULL_RTX)
12181     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12182        (for either double or quad vectors).  We can not take advantage
12183        of single-cycle VLD1 because we need a PC-relative addressing
12184        mode.  */
12185     return const_vec;
12186   else
12187     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12188        We can not construct an initializer.  */
12189     return NULL_RTX;
12190 }
12191
12192 /* Initialize vector TARGET to VALS.  */
12193
12194 void
12195 neon_expand_vector_init (rtx target, rtx vals)
12196 {
12197   machine_mode mode = GET_MODE (target);
12198   machine_mode inner_mode = GET_MODE_INNER (mode);
12199   int n_elts = GET_MODE_NUNITS (mode);
12200   int n_var = 0, one_var = -1;
12201   bool all_same = true;
12202   rtx x, mem;
12203   int i;
12204
12205   for (i = 0; i < n_elts; ++i)
12206     {
12207       x = XVECEXP (vals, 0, i);
12208       if (!CONSTANT_P (x))
12209         ++n_var, one_var = i;
12210
12211       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12212         all_same = false;
12213     }
12214
12215   if (n_var == 0)
12216     {
12217       rtx constant = neon_make_constant (vals);
12218       if (constant != NULL_RTX)
12219         {
12220           emit_move_insn (target, constant);
12221           return;
12222         }
12223     }
12224
12225   /* Splat a single non-constant element if we can.  */
12226   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12227     {
12228       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12229       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12230       return;
12231     }
12232
12233   /* One field is non-constant.  Load constant then overwrite varying
12234      field.  This is more efficient than using the stack.  */
12235   if (n_var == 1)
12236     {
12237       rtx copy = copy_rtx (vals);
12238       rtx index = GEN_INT (one_var);
12239
12240       /* Load constant part of vector, substitute neighboring value for
12241          varying element.  */
12242       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12243       neon_expand_vector_init (target, copy);
12244
12245       /* Insert variable.  */
12246       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12247       switch (mode)
12248         {
12249         case E_V8QImode:
12250           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12251           break;
12252         case E_V16QImode:
12253           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12254           break;
12255         case E_V4HImode:
12256           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12257           break;
12258         case E_V8HImode:
12259           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12260           break;
12261         case E_V2SImode:
12262           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12263           break;
12264         case E_V4SImode:
12265           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12266           break;
12267         case E_V2SFmode:
12268           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12269           break;
12270         case E_V4SFmode:
12271           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12272           break;
12273         case E_V2DImode:
12274           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12275           break;
12276         default:
12277           gcc_unreachable ();
12278         }
12279       return;
12280     }
12281
12282   /* Construct the vector in memory one field at a time
12283      and load the whole vector.  */
12284   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12285   for (i = 0; i < n_elts; i++)
12286     emit_move_insn (adjust_address_nv (mem, inner_mode,
12287                                     i * GET_MODE_SIZE (inner_mode)),
12288                     XVECEXP (vals, 0, i));
12289   emit_move_insn (target, mem);
12290 }
12291
12292 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12293    ERR if it doesn't.  EXP indicates the source location, which includes the
12294    inlining history for intrinsics.  */
12295
12296 static void
12297 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12298               const_tree exp, const char *desc)
12299 {
12300   HOST_WIDE_INT lane;
12301
12302   gcc_assert (CONST_INT_P (operand));
12303
12304   lane = INTVAL (operand);
12305
12306   if (lane < low || lane >= high)
12307     {
12308       if (exp)
12309         error ("%K%s %wd out of range %wd - %wd",
12310                exp, desc, lane, low, high - 1);
12311       else
12312         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12313     }
12314 }
12315
12316 /* Bounds-check lanes.  */
12317
12318 void
12319 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12320                   const_tree exp)
12321 {
12322   bounds_check (operand, low, high, exp, "lane");
12323 }
12324
12325 /* Bounds-check constants.  */
12326
12327 void
12328 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12329 {
12330   bounds_check (operand, low, high, NULL_TREE, "constant");
12331 }
12332
12333 HOST_WIDE_INT
12334 neon_element_bits (machine_mode mode)
12335 {
12336   return GET_MODE_UNIT_BITSIZE (mode);
12337 }
12338
12339 \f
12340 /* Predicates for `match_operand' and `match_operator'.  */
12341
12342 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12343    WB is true if full writeback address modes are allowed and is false
12344    if limited writeback address modes (POST_INC and PRE_DEC) are
12345    allowed.  */
12346
12347 int
12348 arm_coproc_mem_operand (rtx op, bool wb)
12349 {
12350   rtx ind;
12351
12352   /* Reject eliminable registers.  */
12353   if (! (reload_in_progress || reload_completed || lra_in_progress)
12354       && (   reg_mentioned_p (frame_pointer_rtx, op)
12355           || reg_mentioned_p (arg_pointer_rtx, op)
12356           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12357           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12358           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12359           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12360     return FALSE;
12361
12362   /* Constants are converted into offsets from labels.  */
12363   if (!MEM_P (op))
12364     return FALSE;
12365
12366   ind = XEXP (op, 0);
12367
12368   if (reload_completed
12369       && (GET_CODE (ind) == LABEL_REF
12370           || (GET_CODE (ind) == CONST
12371               && GET_CODE (XEXP (ind, 0)) == PLUS
12372               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12373               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12374     return TRUE;
12375
12376   /* Match: (mem (reg)).  */
12377   if (REG_P (ind))
12378     return arm_address_register_rtx_p (ind, 0);
12379
12380   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12381      acceptable in any case (subject to verification by
12382      arm_address_register_rtx_p).  We need WB to be true to accept
12383      PRE_INC and POST_DEC.  */
12384   if (GET_CODE (ind) == POST_INC
12385       || GET_CODE (ind) == PRE_DEC
12386       || (wb
12387           && (GET_CODE (ind) == PRE_INC
12388               || GET_CODE (ind) == POST_DEC)))
12389     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12390
12391   if (wb
12392       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12393       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12394       && GET_CODE (XEXP (ind, 1)) == PLUS
12395       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12396     ind = XEXP (ind, 1);
12397
12398   /* Match:
12399      (plus (reg)
12400            (const)).  */
12401   if (GET_CODE (ind) == PLUS
12402       && REG_P (XEXP (ind, 0))
12403       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12404       && CONST_INT_P (XEXP (ind, 1))
12405       && INTVAL (XEXP (ind, 1)) > -1024
12406       && INTVAL (XEXP (ind, 1)) <  1024
12407       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12408     return TRUE;
12409
12410   return FALSE;
12411 }
12412
12413 /* Return TRUE if OP is a memory operand which we can load or store a vector
12414    to/from. TYPE is one of the following values:
12415     0 - Vector load/stor (vldr)
12416     1 - Core registers (ldm)
12417     2 - Element/structure loads (vld1)
12418  */
12419 int
12420 neon_vector_mem_operand (rtx op, int type, bool strict)
12421 {
12422   rtx ind;
12423
12424   /* Reject eliminable registers.  */
12425   if (strict && ! (reload_in_progress || reload_completed)
12426       && (reg_mentioned_p (frame_pointer_rtx, op)
12427           || reg_mentioned_p (arg_pointer_rtx, op)
12428           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12429           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12430           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12431           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12432     return FALSE;
12433
12434   /* Constants are converted into offsets from labels.  */
12435   if (!MEM_P (op))
12436     return FALSE;
12437
12438   ind = XEXP (op, 0);
12439
12440   if (reload_completed
12441       && (GET_CODE (ind) == LABEL_REF
12442           || (GET_CODE (ind) == CONST
12443               && GET_CODE (XEXP (ind, 0)) == PLUS
12444               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12445               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12446     return TRUE;
12447
12448   /* Match: (mem (reg)).  */
12449   if (REG_P (ind))
12450     return arm_address_register_rtx_p (ind, 0);
12451
12452   /* Allow post-increment with Neon registers.  */
12453   if ((type != 1 && GET_CODE (ind) == POST_INC)
12454       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12455     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12456
12457   /* Allow post-increment by register for VLDn */
12458   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12459       && GET_CODE (XEXP (ind, 1)) == PLUS
12460       && REG_P (XEXP (XEXP (ind, 1), 1)))
12461      return true;
12462
12463   /* Match:
12464      (plus (reg)
12465           (const)).  */
12466   if (type == 0
12467       && GET_CODE (ind) == PLUS
12468       && REG_P (XEXP (ind, 0))
12469       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12470       && CONST_INT_P (XEXP (ind, 1))
12471       && INTVAL (XEXP (ind, 1)) > -1024
12472       /* For quad modes, we restrict the constant offset to be slightly less
12473          than what the instruction format permits.  We have no such constraint
12474          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12475       && (INTVAL (XEXP (ind, 1))
12476           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12477       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12478     return TRUE;
12479
12480   return FALSE;
12481 }
12482
12483 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12484    type.  */
12485 int
12486 neon_struct_mem_operand (rtx op)
12487 {
12488   rtx ind;
12489
12490   /* Reject eliminable registers.  */
12491   if (! (reload_in_progress || reload_completed)
12492       && (   reg_mentioned_p (frame_pointer_rtx, op)
12493           || reg_mentioned_p (arg_pointer_rtx, op)
12494           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12495           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12496           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12497           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12498     return FALSE;
12499
12500   /* Constants are converted into offsets from labels.  */
12501   if (!MEM_P (op))
12502     return FALSE;
12503
12504   ind = XEXP (op, 0);
12505
12506   if (reload_completed
12507       && (GET_CODE (ind) == LABEL_REF
12508           || (GET_CODE (ind) == CONST
12509               && GET_CODE (XEXP (ind, 0)) == PLUS
12510               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12511               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12512     return TRUE;
12513
12514   /* Match: (mem (reg)).  */
12515   if (REG_P (ind))
12516     return arm_address_register_rtx_p (ind, 0);
12517
12518   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12519   if (GET_CODE (ind) == POST_INC
12520       || GET_CODE (ind) == PRE_DEC)
12521     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12522
12523   return FALSE;
12524 }
12525
12526 /* Return true if X is a register that will be eliminated later on.  */
12527 int
12528 arm_eliminable_register (rtx x)
12529 {
12530   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12531                        || REGNO (x) == ARG_POINTER_REGNUM
12532                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12533                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12534 }
12535
12536 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12537    coprocessor registers.  Otherwise return NO_REGS.  */
12538
12539 enum reg_class
12540 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12541 {
12542   if (mode == HFmode)
12543     {
12544       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12545         return GENERAL_REGS;
12546       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12547         return NO_REGS;
12548       return GENERAL_REGS;
12549     }
12550
12551   /* The neon move patterns handle all legitimate vector and struct
12552      addresses.  */
12553   if (TARGET_NEON
12554       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12555       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12556           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12557           || VALID_NEON_STRUCT_MODE (mode)))
12558     return NO_REGS;
12559
12560   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12561     return NO_REGS;
12562
12563   return GENERAL_REGS;
12564 }
12565
12566 /* Values which must be returned in the most-significant end of the return
12567    register.  */
12568
12569 static bool
12570 arm_return_in_msb (const_tree valtype)
12571 {
12572   return (TARGET_AAPCS_BASED
12573           && BYTES_BIG_ENDIAN
12574           && (AGGREGATE_TYPE_P (valtype)
12575               || TREE_CODE (valtype) == COMPLEX_TYPE
12576               || FIXED_POINT_TYPE_P (valtype)));
12577 }
12578
12579 /* Return TRUE if X references a SYMBOL_REF.  */
12580 int
12581 symbol_mentioned_p (rtx x)
12582 {
12583   const char * fmt;
12584   int i;
12585
12586   if (GET_CODE (x) == SYMBOL_REF)
12587     return 1;
12588
12589   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12590      are constant offsets, not symbols.  */
12591   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12592     return 0;
12593
12594   fmt = GET_RTX_FORMAT (GET_CODE (x));
12595
12596   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12597     {
12598       if (fmt[i] == 'E')
12599         {
12600           int j;
12601
12602           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12603             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12604               return 1;
12605         }
12606       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12607         return 1;
12608     }
12609
12610   return 0;
12611 }
12612
12613 /* Return TRUE if X references a LABEL_REF.  */
12614 int
12615 label_mentioned_p (rtx x)
12616 {
12617   const char * fmt;
12618   int i;
12619
12620   if (GET_CODE (x) == LABEL_REF)
12621     return 1;
12622
12623   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12624      instruction, but they are constant offsets, not symbols.  */
12625   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12626     return 0;
12627
12628   fmt = GET_RTX_FORMAT (GET_CODE (x));
12629   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12630     {
12631       if (fmt[i] == 'E')
12632         {
12633           int j;
12634
12635           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12636             if (label_mentioned_p (XVECEXP (x, i, j)))
12637               return 1;
12638         }
12639       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12640         return 1;
12641     }
12642
12643   return 0;
12644 }
12645
12646 int
12647 tls_mentioned_p (rtx x)
12648 {
12649   switch (GET_CODE (x))
12650     {
12651     case CONST:
12652       return tls_mentioned_p (XEXP (x, 0));
12653
12654     case UNSPEC:
12655       if (XINT (x, 1) == UNSPEC_TLS)
12656         return 1;
12657
12658     /* Fall through.  */
12659     default:
12660       return 0;
12661     }
12662 }
12663
12664 /* Must not copy any rtx that uses a pc-relative address.
12665    Also, disallow copying of load-exclusive instructions that
12666    may appear after splitting of compare-and-swap-style operations
12667    so as to prevent those loops from being transformed away from their
12668    canonical forms (see PR 69904).  */
12669
12670 static bool
12671 arm_cannot_copy_insn_p (rtx_insn *insn)
12672 {
12673   /* The tls call insn cannot be copied, as it is paired with a data
12674      word.  */
12675   if (recog_memoized (insn) == CODE_FOR_tlscall)
12676     return true;
12677
12678   subrtx_iterator::array_type array;
12679   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12680     {
12681       const_rtx x = *iter;
12682       if (GET_CODE (x) == UNSPEC
12683           && (XINT (x, 1) == UNSPEC_PIC_BASE
12684               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12685         return true;
12686     }
12687
12688   rtx set = single_set (insn);
12689   if (set)
12690     {
12691       rtx src = SET_SRC (set);
12692       if (GET_CODE (src) == ZERO_EXTEND)
12693         src = XEXP (src, 0);
12694
12695       /* Catch the load-exclusive and load-acquire operations.  */
12696       if (GET_CODE (src) == UNSPEC_VOLATILE
12697           && (XINT (src, 1) == VUNSPEC_LL
12698               || XINT (src, 1) == VUNSPEC_LAX))
12699         return true;
12700     }
12701   return false;
12702 }
12703
12704 enum rtx_code
12705 minmax_code (rtx x)
12706 {
12707   enum rtx_code code = GET_CODE (x);
12708
12709   switch (code)
12710     {
12711     case SMAX:
12712       return GE;
12713     case SMIN:
12714       return LE;
12715     case UMIN:
12716       return LEU;
12717     case UMAX:
12718       return GEU;
12719     default:
12720       gcc_unreachable ();
12721     }
12722 }
12723
12724 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12725
12726 bool
12727 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12728                         int *mask, bool *signed_sat)
12729 {
12730   /* The high bound must be a power of two minus one.  */
12731   int log = exact_log2 (INTVAL (hi_bound) + 1);
12732   if (log == -1)
12733     return false;
12734
12735   /* The low bound is either zero (for usat) or one less than the
12736      negation of the high bound (for ssat).  */
12737   if (INTVAL (lo_bound) == 0)
12738     {
12739       if (mask)
12740         *mask = log;
12741       if (signed_sat)
12742         *signed_sat = false;
12743
12744       return true;
12745     }
12746
12747   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12748     {
12749       if (mask)
12750         *mask = log + 1;
12751       if (signed_sat)
12752         *signed_sat = true;
12753
12754       return true;
12755     }
12756
12757   return false;
12758 }
12759
12760 /* Return 1 if memory locations are adjacent.  */
12761 int
12762 adjacent_mem_locations (rtx a, rtx b)
12763 {
12764   /* We don't guarantee to preserve the order of these memory refs.  */
12765   if (volatile_refs_p (a) || volatile_refs_p (b))
12766     return 0;
12767
12768   if ((REG_P (XEXP (a, 0))
12769        || (GET_CODE (XEXP (a, 0)) == PLUS
12770            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12771       && (REG_P (XEXP (b, 0))
12772           || (GET_CODE (XEXP (b, 0)) == PLUS
12773               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12774     {
12775       HOST_WIDE_INT val0 = 0, val1 = 0;
12776       rtx reg0, reg1;
12777       int val_diff;
12778
12779       if (GET_CODE (XEXP (a, 0)) == PLUS)
12780         {
12781           reg0 = XEXP (XEXP (a, 0), 0);
12782           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12783         }
12784       else
12785         reg0 = XEXP (a, 0);
12786
12787       if (GET_CODE (XEXP (b, 0)) == PLUS)
12788         {
12789           reg1 = XEXP (XEXP (b, 0), 0);
12790           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12791         }
12792       else
12793         reg1 = XEXP (b, 0);
12794
12795       /* Don't accept any offset that will require multiple
12796          instructions to handle, since this would cause the
12797          arith_adjacentmem pattern to output an overlong sequence.  */
12798       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12799         return 0;
12800
12801       /* Don't allow an eliminable register: register elimination can make
12802          the offset too large.  */
12803       if (arm_eliminable_register (reg0))
12804         return 0;
12805
12806       val_diff = val1 - val0;
12807
12808       if (arm_ld_sched)
12809         {
12810           /* If the target has load delay slots, then there's no benefit
12811              to using an ldm instruction unless the offset is zero and
12812              we are optimizing for size.  */
12813           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12814                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12815                   && (val_diff == 4 || val_diff == -4));
12816         }
12817
12818       return ((REGNO (reg0) == REGNO (reg1))
12819               && (val_diff == 4 || val_diff == -4));
12820     }
12821
12822   return 0;
12823 }
12824
12825 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12826    for load operations, false for store operations.  CONSECUTIVE is true
12827    if the register numbers in the operation must be consecutive in the register
12828    bank. RETURN_PC is true if value is to be loaded in PC.
12829    The pattern we are trying to match for load is:
12830      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12831       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12832        :
12833        :
12834       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12835      ]
12836      where
12837      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12838      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12839      3.  If consecutive is TRUE, then for kth register being loaded,
12840          REGNO (R_dk) = REGNO (R_d0) + k.
12841    The pattern for store is similar.  */
12842 bool
12843 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12844                      bool consecutive, bool return_pc)
12845 {
12846   HOST_WIDE_INT count = XVECLEN (op, 0);
12847   rtx reg, mem, addr;
12848   unsigned regno;
12849   unsigned first_regno;
12850   HOST_WIDE_INT i = 1, base = 0, offset = 0;
12851   rtx elt;
12852   bool addr_reg_in_reglist = false;
12853   bool update = false;
12854   int reg_increment;
12855   int offset_adj;
12856   int regs_per_val;
12857
12858   /* If not in SImode, then registers must be consecutive
12859      (e.g., VLDM instructions for DFmode).  */
12860   gcc_assert ((mode == SImode) || consecutive);
12861   /* Setting return_pc for stores is illegal.  */
12862   gcc_assert (!return_pc || load);
12863
12864   /* Set up the increments and the regs per val based on the mode.  */
12865   reg_increment = GET_MODE_SIZE (mode);
12866   regs_per_val = reg_increment / 4;
12867   offset_adj = return_pc ? 1 : 0;
12868
12869   if (count <= 1
12870       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12871       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12872     return false;
12873
12874   /* Check if this is a write-back.  */
12875   elt = XVECEXP (op, 0, offset_adj);
12876   if (GET_CODE (SET_SRC (elt)) == PLUS)
12877     {
12878       i++;
12879       base = 1;
12880       update = true;
12881
12882       /* The offset adjustment must be the number of registers being
12883          popped times the size of a single register.  */
12884       if (!REG_P (SET_DEST (elt))
12885           || !REG_P (XEXP (SET_SRC (elt), 0))
12886           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12887           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12888           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12889              ((count - 1 - offset_adj) * reg_increment))
12890         return false;
12891     }
12892
12893   i = i + offset_adj;
12894   base = base + offset_adj;
12895   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12896      success depends on the type: VLDM can do just one reg,
12897      LDM must do at least two.  */
12898   if ((count <= i) && (mode == SImode))
12899       return false;
12900
12901   elt = XVECEXP (op, 0, i - 1);
12902   if (GET_CODE (elt) != SET)
12903     return false;
12904
12905   if (load)
12906     {
12907       reg = SET_DEST (elt);
12908       mem = SET_SRC (elt);
12909     }
12910   else
12911     {
12912       reg = SET_SRC (elt);
12913       mem = SET_DEST (elt);
12914     }
12915
12916   if (!REG_P (reg) || !MEM_P (mem))
12917     return false;
12918
12919   regno = REGNO (reg);
12920   first_regno = regno;
12921   addr = XEXP (mem, 0);
12922   if (GET_CODE (addr) == PLUS)
12923     {
12924       if (!CONST_INT_P (XEXP (addr, 1)))
12925         return false;
12926
12927       offset = INTVAL (XEXP (addr, 1));
12928       addr = XEXP (addr, 0);
12929     }
12930
12931   if (!REG_P (addr))
12932     return false;
12933
12934   /* Don't allow SP to be loaded unless it is also the base register. It
12935      guarantees that SP is reset correctly when an LDM instruction
12936      is interrupted. Otherwise, we might end up with a corrupt stack.  */
12937   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12938     return false;
12939
12940   for (; i < count; i++)
12941     {
12942       elt = XVECEXP (op, 0, i);
12943       if (GET_CODE (elt) != SET)
12944         return false;
12945
12946       if (load)
12947         {
12948           reg = SET_DEST (elt);
12949           mem = SET_SRC (elt);
12950         }
12951       else
12952         {
12953           reg = SET_SRC (elt);
12954           mem = SET_DEST (elt);
12955         }
12956
12957       if (!REG_P (reg)
12958           || GET_MODE (reg) != mode
12959           || REGNO (reg) <= regno
12960           || (consecutive
12961               && (REGNO (reg) !=
12962                   (unsigned int) (first_regno + regs_per_val * (i - base))))
12963           /* Don't allow SP to be loaded unless it is also the base register. It
12964              guarantees that SP is reset correctly when an LDM instruction
12965              is interrupted. Otherwise, we might end up with a corrupt stack.  */
12966           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12967           || !MEM_P (mem)
12968           || GET_MODE (mem) != mode
12969           || ((GET_CODE (XEXP (mem, 0)) != PLUS
12970                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12971                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12972                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12973                    offset + (i - base) * reg_increment))
12974               && (!REG_P (XEXP (mem, 0))
12975                   || offset + (i - base) * reg_increment != 0)))
12976         return false;
12977
12978       regno = REGNO (reg);
12979       if (regno == REGNO (addr))
12980         addr_reg_in_reglist = true;
12981     }
12982
12983   if (load)
12984     {
12985       if (update && addr_reg_in_reglist)
12986         return false;
12987
12988       /* For Thumb-1, address register is always modified - either by write-back
12989          or by explicit load.  If the pattern does not describe an update,
12990          then the address register must be in the list of loaded registers.  */
12991       if (TARGET_THUMB1)
12992         return update || addr_reg_in_reglist;
12993     }
12994
12995   return true;
12996 }
12997
12998 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12999    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13000    instruction.  ADD_OFFSET is nonzero if the base address register needs
13001    to be modified with an add instruction before we can use it.  */
13002
13003 static bool
13004 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13005                                  int nops, HOST_WIDE_INT add_offset)
13006  {
13007   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13008      if the offset isn't small enough.  The reason 2 ldrs are faster
13009      is because these ARMs are able to do more than one cache access
13010      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13011      whilst the ARM8 has a double bandwidth cache.  This means that
13012      these cores can do both an instruction fetch and a data fetch in
13013      a single cycle, so the trick of calculating the address into a
13014      scratch register (one of the result regs) and then doing a load
13015      multiple actually becomes slower (and no smaller in code size).
13016      That is the transformation
13017
13018         ldr     rd1, [rbase + offset]
13019         ldr     rd2, [rbase + offset + 4]
13020
13021      to
13022
13023         add     rd1, rbase, offset
13024         ldmia   rd1, {rd1, rd2}
13025
13026      produces worse code -- '3 cycles + any stalls on rd2' instead of
13027      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13028      access per cycle, the first sequence could never complete in less
13029      than 6 cycles, whereas the ldm sequence would only take 5 and
13030      would make better use of sequential accesses if not hitting the
13031      cache.
13032
13033      We cheat here and test 'arm_ld_sched' which we currently know to
13034      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13035      changes, then the test below needs to be reworked.  */
13036   if (nops == 2 && arm_ld_sched && add_offset != 0)
13037     return false;
13038
13039   /* XScale has load-store double instructions, but they have stricter
13040      alignment requirements than load-store multiple, so we cannot
13041      use them.
13042
13043      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13044      the pipeline until completion.
13045
13046         NREGS           CYCLES
13047           1               3
13048           2               4
13049           3               5
13050           4               6
13051
13052      An ldr instruction takes 1-3 cycles, but does not block the
13053      pipeline.
13054
13055         NREGS           CYCLES
13056           1              1-3
13057           2              2-6
13058           3              3-9
13059           4              4-12
13060
13061      Best case ldr will always win.  However, the more ldr instructions
13062      we issue, the less likely we are to be able to schedule them well.
13063      Using ldr instructions also increases code size.
13064
13065      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13066      for counts of 3 or 4 regs.  */
13067   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13068     return false;
13069   return true;
13070 }
13071
13072 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13073    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13074    an array ORDER which describes the sequence to use when accessing the
13075    offsets that produces an ascending order.  In this sequence, each
13076    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13077    must have been filled in with the lowest offset by the caller.
13078    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13079    we use to verify that ORDER produces an ascending order of registers.
13080    Return true if it was possible to construct such an order, false if
13081    not.  */
13082
13083 static bool
13084 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13085                       int *unsorted_regs)
13086 {
13087   int i;
13088   for (i = 1; i < nops; i++)
13089     {
13090       int j;
13091
13092       order[i] = order[i - 1];
13093       for (j = 0; j < nops; j++)
13094         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13095           {
13096             /* We must find exactly one offset that is higher than the
13097                previous one by 4.  */
13098             if (order[i] != order[i - 1])
13099               return false;
13100             order[i] = j;
13101           }
13102       if (order[i] == order[i - 1])
13103         return false;
13104       /* The register numbers must be ascending.  */
13105       if (unsorted_regs != NULL
13106           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13107         return false;
13108     }
13109   return true;
13110 }
13111
13112 /* Used to determine in a peephole whether a sequence of load
13113    instructions can be changed into a load-multiple instruction.
13114    NOPS is the number of separate load instructions we are examining.  The
13115    first NOPS entries in OPERANDS are the destination registers, the
13116    next NOPS entries are memory operands.  If this function is
13117    successful, *BASE is set to the common base register of the memory
13118    accesses; *LOAD_OFFSET is set to the first memory location's offset
13119    from that base register.
13120    REGS is an array filled in with the destination register numbers.
13121    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13122    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13123    the sequence of registers in REGS matches the loads from ascending memory
13124    locations, and the function verifies that the register numbers are
13125    themselves ascending.  If CHECK_REGS is false, the register numbers
13126    are stored in the order they are found in the operands.  */
13127 static int
13128 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13129                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13130 {
13131   int unsorted_regs[MAX_LDM_STM_OPS];
13132   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13133   int order[MAX_LDM_STM_OPS];
13134   rtx base_reg_rtx = NULL;
13135   int base_reg = -1;
13136   int i, ldm_case;
13137
13138   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13139      easily extended if required.  */
13140   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13141
13142   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13143
13144   /* Loop over the operands and check that the memory references are
13145      suitable (i.e. immediate offsets from the same base register).  At
13146      the same time, extract the target register, and the memory
13147      offsets.  */
13148   for (i = 0; i < nops; i++)
13149     {
13150       rtx reg;
13151       rtx offset;
13152
13153       /* Convert a subreg of a mem into the mem itself.  */
13154       if (GET_CODE (operands[nops + i]) == SUBREG)
13155         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13156
13157       gcc_assert (MEM_P (operands[nops + i]));
13158
13159       /* Don't reorder volatile memory references; it doesn't seem worth
13160          looking for the case where the order is ok anyway.  */
13161       if (MEM_VOLATILE_P (operands[nops + i]))
13162         return 0;
13163
13164       offset = const0_rtx;
13165
13166       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13167            || (GET_CODE (reg) == SUBREG
13168                && REG_P (reg = SUBREG_REG (reg))))
13169           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13170               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13171                   || (GET_CODE (reg) == SUBREG
13172                       && REG_P (reg = SUBREG_REG (reg))))
13173               && (CONST_INT_P (offset
13174                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13175         {
13176           if (i == 0)
13177             {
13178               base_reg = REGNO (reg);
13179               base_reg_rtx = reg;
13180               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13181                 return 0;
13182             }
13183           else if (base_reg != (int) REGNO (reg))
13184             /* Not addressed from the same base register.  */
13185             return 0;
13186
13187           unsorted_regs[i] = (REG_P (operands[i])
13188                               ? REGNO (operands[i])
13189                               : REGNO (SUBREG_REG (operands[i])));
13190
13191           /* If it isn't an integer register, or if it overwrites the
13192              base register but isn't the last insn in the list, then
13193              we can't do this.  */
13194           if (unsorted_regs[i] < 0
13195               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13196               || unsorted_regs[i] > 14
13197               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13198             return 0;
13199
13200           /* Don't allow SP to be loaded unless it is also the base
13201              register.  It guarantees that SP is reset correctly when
13202              an LDM instruction is interrupted.  Otherwise, we might
13203              end up with a corrupt stack.  */
13204           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13205             return 0;
13206
13207           unsorted_offsets[i] = INTVAL (offset);
13208           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13209             order[0] = i;
13210         }
13211       else
13212         /* Not a suitable memory address.  */
13213         return 0;
13214     }
13215
13216   /* All the useful information has now been extracted from the
13217      operands into unsorted_regs and unsorted_offsets; additionally,
13218      order[0] has been set to the lowest offset in the list.  Sort
13219      the offsets into order, verifying that they are adjacent, and
13220      check that the register numbers are ascending.  */
13221   if (!compute_offset_order (nops, unsorted_offsets, order,
13222                              check_regs ? unsorted_regs : NULL))
13223     return 0;
13224
13225   if (saved_order)
13226     memcpy (saved_order, order, sizeof order);
13227
13228   if (base)
13229     {
13230       *base = base_reg;
13231
13232       for (i = 0; i < nops; i++)
13233         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13234
13235       *load_offset = unsorted_offsets[order[0]];
13236     }
13237
13238   if (TARGET_THUMB1
13239       && !peep2_reg_dead_p (nops, base_reg_rtx))
13240     return 0;
13241
13242   if (unsorted_offsets[order[0]] == 0)
13243     ldm_case = 1; /* ldmia */
13244   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13245     ldm_case = 2; /* ldmib */
13246   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13247     ldm_case = 3; /* ldmda */
13248   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13249     ldm_case = 4; /* ldmdb */
13250   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13251            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13252     ldm_case = 5;
13253   else
13254     return 0;
13255
13256   if (!multiple_operation_profitable_p (false, nops,
13257                                         ldm_case == 5
13258                                         ? unsorted_offsets[order[0]] : 0))
13259     return 0;
13260
13261   return ldm_case;
13262 }
13263
13264 /* Used to determine in a peephole whether a sequence of store instructions can
13265    be changed into a store-multiple instruction.
13266    NOPS is the number of separate store instructions we are examining.
13267    NOPS_TOTAL is the total number of instructions recognized by the peephole
13268    pattern.
13269    The first NOPS entries in OPERANDS are the source registers, the next
13270    NOPS entries are memory operands.  If this function is successful, *BASE is
13271    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13272    to the first memory location's offset from that base register.  REGS is an
13273    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13274    likewise filled with the corresponding rtx's.
13275    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13276    numbers to an ascending order of stores.
13277    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13278    from ascending memory locations, and the function verifies that the register
13279    numbers are themselves ascending.  If CHECK_REGS is false, the register
13280    numbers are stored in the order they are found in the operands.  */
13281 static int
13282 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13283                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13284                          HOST_WIDE_INT *load_offset, bool check_regs)
13285 {
13286   int unsorted_regs[MAX_LDM_STM_OPS];
13287   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13288   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13289   int order[MAX_LDM_STM_OPS];
13290   int base_reg = -1;
13291   rtx base_reg_rtx = NULL;
13292   int i, stm_case;
13293
13294   /* Write back of base register is currently only supported for Thumb 1.  */
13295   int base_writeback = TARGET_THUMB1;
13296
13297   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13298      easily extended if required.  */
13299   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13300
13301   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13302
13303   /* Loop over the operands and check that the memory references are
13304      suitable (i.e. immediate offsets from the same base register).  At
13305      the same time, extract the target register, and the memory
13306      offsets.  */
13307   for (i = 0; i < nops; i++)
13308     {
13309       rtx reg;
13310       rtx offset;
13311
13312       /* Convert a subreg of a mem into the mem itself.  */
13313       if (GET_CODE (operands[nops + i]) == SUBREG)
13314         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13315
13316       gcc_assert (MEM_P (operands[nops + i]));
13317
13318       /* Don't reorder volatile memory references; it doesn't seem worth
13319          looking for the case where the order is ok anyway.  */
13320       if (MEM_VOLATILE_P (operands[nops + i]))
13321         return 0;
13322
13323       offset = const0_rtx;
13324
13325       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13326            || (GET_CODE (reg) == SUBREG
13327                && REG_P (reg = SUBREG_REG (reg))))
13328           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13329               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13330                   || (GET_CODE (reg) == SUBREG
13331                       && REG_P (reg = SUBREG_REG (reg))))
13332               && (CONST_INT_P (offset
13333                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13334         {
13335           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13336                                   ? operands[i] : SUBREG_REG (operands[i]));
13337           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13338
13339           if (i == 0)
13340             {
13341               base_reg = REGNO (reg);
13342               base_reg_rtx = reg;
13343               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13344                 return 0;
13345             }
13346           else if (base_reg != (int) REGNO (reg))
13347             /* Not addressed from the same base register.  */
13348             return 0;
13349
13350           /* If it isn't an integer register, then we can't do this.  */
13351           if (unsorted_regs[i] < 0
13352               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13353               /* The effects are unpredictable if the base register is
13354                  both updated and stored.  */
13355               || (base_writeback && unsorted_regs[i] == base_reg)
13356               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13357               || unsorted_regs[i] > 14)
13358             return 0;
13359
13360           unsorted_offsets[i] = INTVAL (offset);
13361           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13362             order[0] = i;
13363         }
13364       else
13365         /* Not a suitable memory address.  */
13366         return 0;
13367     }
13368
13369   /* All the useful information has now been extracted from the
13370      operands into unsorted_regs and unsorted_offsets; additionally,
13371      order[0] has been set to the lowest offset in the list.  Sort
13372      the offsets into order, verifying that they are adjacent, and
13373      check that the register numbers are ascending.  */
13374   if (!compute_offset_order (nops, unsorted_offsets, order,
13375                              check_regs ? unsorted_regs : NULL))
13376     return 0;
13377
13378   if (saved_order)
13379     memcpy (saved_order, order, sizeof order);
13380
13381   if (base)
13382     {
13383       *base = base_reg;
13384
13385       for (i = 0; i < nops; i++)
13386         {
13387           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13388           if (reg_rtxs)
13389             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13390         }
13391
13392       *load_offset = unsorted_offsets[order[0]];
13393     }
13394
13395   if (TARGET_THUMB1
13396       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13397     return 0;
13398
13399   if (unsorted_offsets[order[0]] == 0)
13400     stm_case = 1; /* stmia */
13401   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13402     stm_case = 2; /* stmib */
13403   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13404     stm_case = 3; /* stmda */
13405   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13406     stm_case = 4; /* stmdb */
13407   else
13408     return 0;
13409
13410   if (!multiple_operation_profitable_p (false, nops, 0))
13411     return 0;
13412
13413   return stm_case;
13414 }
13415 \f
13416 /* Routines for use in generating RTL.  */
13417
13418 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13419    the instruction; REGS and MEMS are arrays containing the operands.
13420    BASEREG is the base register to be used in addressing the memory operands.
13421    WBACK_OFFSET is nonzero if the instruction should update the base
13422    register.  */
13423
13424 static rtx
13425 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13426                          HOST_WIDE_INT wback_offset)
13427 {
13428   int i = 0, j;
13429   rtx result;
13430
13431   if (!multiple_operation_profitable_p (false, count, 0))
13432     {
13433       rtx seq;
13434
13435       start_sequence ();
13436
13437       for (i = 0; i < count; i++)
13438         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13439
13440       if (wback_offset != 0)
13441         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13442
13443       seq = get_insns ();
13444       end_sequence ();
13445
13446       return seq;
13447     }
13448
13449   result = gen_rtx_PARALLEL (VOIDmode,
13450                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13451   if (wback_offset != 0)
13452     {
13453       XVECEXP (result, 0, 0)
13454         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13455       i = 1;
13456       count++;
13457     }
13458
13459   for (j = 0; i < count; i++, j++)
13460     XVECEXP (result, 0, i)
13461       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13462
13463   return result;
13464 }
13465
13466 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13467    the instruction; REGS and MEMS are arrays containing the operands.
13468    BASEREG is the base register to be used in addressing the memory operands.
13469    WBACK_OFFSET is nonzero if the instruction should update the base
13470    register.  */
13471
13472 static rtx
13473 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13474                           HOST_WIDE_INT wback_offset)
13475 {
13476   int i = 0, j;
13477   rtx result;
13478
13479   if (GET_CODE (basereg) == PLUS)
13480     basereg = XEXP (basereg, 0);
13481
13482   if (!multiple_operation_profitable_p (false, count, 0))
13483     {
13484       rtx seq;
13485
13486       start_sequence ();
13487
13488       for (i = 0; i < count; i++)
13489         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13490
13491       if (wback_offset != 0)
13492         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13493
13494       seq = get_insns ();
13495       end_sequence ();
13496
13497       return seq;
13498     }
13499
13500   result = gen_rtx_PARALLEL (VOIDmode,
13501                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13502   if (wback_offset != 0)
13503     {
13504       XVECEXP (result, 0, 0)
13505         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13506       i = 1;
13507       count++;
13508     }
13509
13510   for (j = 0; i < count; i++, j++)
13511     XVECEXP (result, 0, i)
13512       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13513
13514   return result;
13515 }
13516
13517 /* Generate either a load-multiple or a store-multiple instruction.  This
13518    function can be used in situations where we can start with a single MEM
13519    rtx and adjust its address upwards.
13520    COUNT is the number of operations in the instruction, not counting a
13521    possible update of the base register.  REGS is an array containing the
13522    register operands.
13523    BASEREG is the base register to be used in addressing the memory operands,
13524    which are constructed from BASEMEM.
13525    WRITE_BACK specifies whether the generated instruction should include an
13526    update of the base register.
13527    OFFSETP is used to pass an offset to and from this function; this offset
13528    is not used when constructing the address (instead BASEMEM should have an
13529    appropriate offset in its address), it is used only for setting
13530    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13531
13532 static rtx
13533 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13534                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13535 {
13536   rtx mems[MAX_LDM_STM_OPS];
13537   HOST_WIDE_INT offset = *offsetp;
13538   int i;
13539
13540   gcc_assert (count <= MAX_LDM_STM_OPS);
13541
13542   if (GET_CODE (basereg) == PLUS)
13543     basereg = XEXP (basereg, 0);
13544
13545   for (i = 0; i < count; i++)
13546     {
13547       rtx addr = plus_constant (Pmode, basereg, i * 4);
13548       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13549       offset += 4;
13550     }
13551
13552   if (write_back)
13553     *offsetp = offset;
13554
13555   if (is_load)
13556     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13557                                     write_back ? 4 * count : 0);
13558   else
13559     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13560                                      write_back ? 4 * count : 0);
13561 }
13562
13563 rtx
13564 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13565                        rtx basemem, HOST_WIDE_INT *offsetp)
13566 {
13567   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13568                               offsetp);
13569 }
13570
13571 rtx
13572 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13573                         rtx basemem, HOST_WIDE_INT *offsetp)
13574 {
13575   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13576                               offsetp);
13577 }
13578
13579 /* Called from a peephole2 expander to turn a sequence of loads into an
13580    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13581    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13582    is true if we can reorder the registers because they are used commutatively
13583    subsequently.
13584    Returns true iff we could generate a new instruction.  */
13585
13586 bool
13587 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13588 {
13589   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13590   rtx mems[MAX_LDM_STM_OPS];
13591   int i, j, base_reg;
13592   rtx base_reg_rtx;
13593   HOST_WIDE_INT offset;
13594   int write_back = FALSE;
13595   int ldm_case;
13596   rtx addr;
13597
13598   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13599                                      &base_reg, &offset, !sort_regs);
13600
13601   if (ldm_case == 0)
13602     return false;
13603
13604   if (sort_regs)
13605     for (i = 0; i < nops - 1; i++)
13606       for (j = i + 1; j < nops; j++)
13607         if (regs[i] > regs[j])
13608           {
13609             int t = regs[i];
13610             regs[i] = regs[j];
13611             regs[j] = t;
13612           }
13613   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13614
13615   if (TARGET_THUMB1)
13616     {
13617       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13618       gcc_assert (ldm_case == 1 || ldm_case == 5);
13619       write_back = TRUE;
13620     }
13621
13622   if (ldm_case == 5)
13623     {
13624       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13625       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13626       offset = 0;
13627       if (!TARGET_THUMB1)
13628         base_reg_rtx = newbase;
13629     }
13630
13631   for (i = 0; i < nops; i++)
13632     {
13633       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13634       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13635                                               SImode, addr, 0);
13636     }
13637   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13638                                       write_back ? offset + i * 4 : 0));
13639   return true;
13640 }
13641
13642 /* Called from a peephole2 expander to turn a sequence of stores into an
13643    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13644    NOPS indicates how many separate stores we are trying to combine.
13645    Returns true iff we could generate a new instruction.  */
13646
13647 bool
13648 gen_stm_seq (rtx *operands, int nops)
13649 {
13650   int i;
13651   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13652   rtx mems[MAX_LDM_STM_OPS];
13653   int base_reg;
13654   rtx base_reg_rtx;
13655   HOST_WIDE_INT offset;
13656   int write_back = FALSE;
13657   int stm_case;
13658   rtx addr;
13659   bool base_reg_dies;
13660
13661   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13662                                       mem_order, &base_reg, &offset, true);
13663
13664   if (stm_case == 0)
13665     return false;
13666
13667   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13668
13669   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13670   if (TARGET_THUMB1)
13671     {
13672       gcc_assert (base_reg_dies);
13673       write_back = TRUE;
13674     }
13675
13676   if (stm_case == 5)
13677     {
13678       gcc_assert (base_reg_dies);
13679       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13680       offset = 0;
13681     }
13682
13683   addr = plus_constant (Pmode, base_reg_rtx, offset);
13684
13685   for (i = 0; i < nops; i++)
13686     {
13687       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13688       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13689                                               SImode, addr, 0);
13690     }
13691   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13692                                        write_back ? offset + i * 4 : 0));
13693   return true;
13694 }
13695
13696 /* Called from a peephole2 expander to turn a sequence of stores that are
13697    preceded by constant loads into an STM instruction.  OPERANDS are the
13698    operands found by the peephole matcher; NOPS indicates how many
13699    separate stores we are trying to combine; there are 2 * NOPS
13700    instructions in the peephole.
13701    Returns true iff we could generate a new instruction.  */
13702
13703 bool
13704 gen_const_stm_seq (rtx *operands, int nops)
13705 {
13706   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13707   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13708   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13709   rtx mems[MAX_LDM_STM_OPS];
13710   int base_reg;
13711   rtx base_reg_rtx;
13712   HOST_WIDE_INT offset;
13713   int write_back = FALSE;
13714   int stm_case;
13715   rtx addr;
13716   bool base_reg_dies;
13717   int i, j;
13718   HARD_REG_SET allocated;
13719
13720   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13721                                       mem_order, &base_reg, &offset, false);
13722
13723   if (stm_case == 0)
13724     return false;
13725
13726   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13727
13728   /* If the same register is used more than once, try to find a free
13729      register.  */
13730   CLEAR_HARD_REG_SET (allocated);
13731   for (i = 0; i < nops; i++)
13732     {
13733       for (j = i + 1; j < nops; j++)
13734         if (regs[i] == regs[j])
13735           {
13736             rtx t = peep2_find_free_register (0, nops * 2,
13737                                               TARGET_THUMB1 ? "l" : "r",
13738                                               SImode, &allocated);
13739             if (t == NULL_RTX)
13740               return false;
13741             reg_rtxs[i] = t;
13742             regs[i] = REGNO (t);
13743           }
13744     }
13745
13746   /* Compute an ordering that maps the register numbers to an ascending
13747      sequence.  */
13748   reg_order[0] = 0;
13749   for (i = 0; i < nops; i++)
13750     if (regs[i] < regs[reg_order[0]])
13751       reg_order[0] = i;
13752
13753   for (i = 1; i < nops; i++)
13754     {
13755       int this_order = reg_order[i - 1];
13756       for (j = 0; j < nops; j++)
13757         if (regs[j] > regs[reg_order[i - 1]]
13758             && (this_order == reg_order[i - 1]
13759                 || regs[j] < regs[this_order]))
13760           this_order = j;
13761       reg_order[i] = this_order;
13762     }
13763
13764   /* Ensure that registers that must be live after the instruction end
13765      up with the correct value.  */
13766   for (i = 0; i < nops; i++)
13767     {
13768       int this_order = reg_order[i];
13769       if ((this_order != mem_order[i]
13770            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13771           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13772         return false;
13773     }
13774
13775   /* Load the constants.  */
13776   for (i = 0; i < nops; i++)
13777     {
13778       rtx op = operands[2 * nops + mem_order[i]];
13779       sorted_regs[i] = regs[reg_order[i]];
13780       emit_move_insn (reg_rtxs[reg_order[i]], op);
13781     }
13782
13783   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13784
13785   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13786   if (TARGET_THUMB1)
13787     {
13788       gcc_assert (base_reg_dies);
13789       write_back = TRUE;
13790     }
13791
13792   if (stm_case == 5)
13793     {
13794       gcc_assert (base_reg_dies);
13795       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13796       offset = 0;
13797     }
13798
13799   addr = plus_constant (Pmode, base_reg_rtx, offset);
13800
13801   for (i = 0; i < nops; i++)
13802     {
13803       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13804       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13805                                               SImode, addr, 0);
13806     }
13807   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13808                                        write_back ? offset + i * 4 : 0));
13809   return true;
13810 }
13811
13812 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13813    unaligned copies on processors which support unaligned semantics for those
13814    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13815    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13816    An interleave factor of 1 (the minimum) will perform no interleaving.
13817    Load/store multiple are used for aligned addresses where possible.  */
13818
13819 static void
13820 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13821                                    HOST_WIDE_INT length,
13822                                    unsigned int interleave_factor)
13823 {
13824   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13825   int *regnos = XALLOCAVEC (int, interleave_factor);
13826   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13827   HOST_WIDE_INT i, j;
13828   HOST_WIDE_INT remaining = length, words;
13829   rtx halfword_tmp = NULL, byte_tmp = NULL;
13830   rtx dst, src;
13831   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13832   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13833   HOST_WIDE_INT srcoffset, dstoffset;
13834   HOST_WIDE_INT src_autoinc, dst_autoinc;
13835   rtx mem, addr;
13836
13837   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13838
13839   /* Use hard registers if we have aligned source or destination so we can use
13840      load/store multiple with contiguous registers.  */
13841   if (dst_aligned || src_aligned)
13842     for (i = 0; i < interleave_factor; i++)
13843       regs[i] = gen_rtx_REG (SImode, i);
13844   else
13845     for (i = 0; i < interleave_factor; i++)
13846       regs[i] = gen_reg_rtx (SImode);
13847
13848   dst = copy_addr_to_reg (XEXP (dstbase, 0));
13849   src = copy_addr_to_reg (XEXP (srcbase, 0));
13850
13851   srcoffset = dstoffset = 0;
13852
13853   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13854      For copying the last bytes we want to subtract this offset again.  */
13855   src_autoinc = dst_autoinc = 0;
13856
13857   for (i = 0; i < interleave_factor; i++)
13858     regnos[i] = i;
13859
13860   /* Copy BLOCK_SIZE_BYTES chunks.  */
13861
13862   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13863     {
13864       /* Load words.  */
13865       if (src_aligned && interleave_factor > 1)
13866         {
13867           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13868                                             TRUE, srcbase, &srcoffset));
13869           src_autoinc += UNITS_PER_WORD * interleave_factor;
13870         }
13871       else
13872         {
13873           for (j = 0; j < interleave_factor; j++)
13874             {
13875               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13876                                                  - src_autoinc));
13877               mem = adjust_automodify_address (srcbase, SImode, addr,
13878                                                srcoffset + j * UNITS_PER_WORD);
13879               emit_insn (gen_unaligned_loadsi (regs[j], mem));
13880             }
13881           srcoffset += block_size_bytes;
13882         }
13883
13884       /* Store words.  */
13885       if (dst_aligned && interleave_factor > 1)
13886         {
13887           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13888                                              TRUE, dstbase, &dstoffset));
13889           dst_autoinc += UNITS_PER_WORD * interleave_factor;
13890         }
13891       else
13892         {
13893           for (j = 0; j < interleave_factor; j++)
13894             {
13895               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13896                                                  - dst_autoinc));
13897               mem = adjust_automodify_address (dstbase, SImode, addr,
13898                                                dstoffset + j * UNITS_PER_WORD);
13899               emit_insn (gen_unaligned_storesi (mem, regs[j]));
13900             }
13901           dstoffset += block_size_bytes;
13902         }
13903
13904       remaining -= block_size_bytes;
13905     }
13906
13907   /* Copy any whole words left (note these aren't interleaved with any
13908      subsequent halfword/byte load/stores in the interests of simplicity).  */
13909
13910   words = remaining / UNITS_PER_WORD;
13911
13912   gcc_assert (words < interleave_factor);
13913
13914   if (src_aligned && words > 1)
13915     {
13916       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13917                                         &srcoffset));
13918       src_autoinc += UNITS_PER_WORD * words;
13919     }
13920   else
13921     {
13922       for (j = 0; j < words; j++)
13923         {
13924           addr = plus_constant (Pmode, src,
13925                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13926           mem = adjust_automodify_address (srcbase, SImode, addr,
13927                                            srcoffset + j * UNITS_PER_WORD);
13928           if (src_aligned)
13929             emit_move_insn (regs[j], mem);
13930           else
13931             emit_insn (gen_unaligned_loadsi (regs[j], mem));
13932         }
13933       srcoffset += words * UNITS_PER_WORD;
13934     }
13935
13936   if (dst_aligned && words > 1)
13937     {
13938       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13939                                          &dstoffset));
13940       dst_autoinc += words * UNITS_PER_WORD;
13941     }
13942   else
13943     {
13944       for (j = 0; j < words; j++)
13945         {
13946           addr = plus_constant (Pmode, dst,
13947                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13948           mem = adjust_automodify_address (dstbase, SImode, addr,
13949                                            dstoffset + j * UNITS_PER_WORD);
13950           if (dst_aligned)
13951             emit_move_insn (mem, regs[j]);
13952           else
13953             emit_insn (gen_unaligned_storesi (mem, regs[j]));
13954         }
13955       dstoffset += words * UNITS_PER_WORD;
13956     }
13957
13958   remaining -= words * UNITS_PER_WORD;
13959
13960   gcc_assert (remaining < 4);
13961
13962   /* Copy a halfword if necessary.  */
13963
13964   if (remaining >= 2)
13965     {
13966       halfword_tmp = gen_reg_rtx (SImode);
13967
13968       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13969       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13970       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13971
13972       /* Either write out immediately, or delay until we've loaded the last
13973          byte, depending on interleave factor.  */
13974       if (interleave_factor == 1)
13975         {
13976           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13977           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13978           emit_insn (gen_unaligned_storehi (mem,
13979                        gen_lowpart (HImode, halfword_tmp)));
13980           halfword_tmp = NULL;
13981           dstoffset += 2;
13982         }
13983
13984       remaining -= 2;
13985       srcoffset += 2;
13986     }
13987
13988   gcc_assert (remaining < 2);
13989
13990   /* Copy last byte.  */
13991
13992   if ((remaining & 1) != 0)
13993     {
13994       byte_tmp = gen_reg_rtx (SImode);
13995
13996       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13997       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13998       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13999
14000       if (interleave_factor == 1)
14001         {
14002           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14003           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14004           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14005           byte_tmp = NULL;
14006           dstoffset++;
14007         }
14008
14009       remaining--;
14010       srcoffset++;
14011     }
14012
14013   /* Store last halfword if we haven't done so already.  */
14014
14015   if (halfword_tmp)
14016     {
14017       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14018       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14019       emit_insn (gen_unaligned_storehi (mem,
14020                    gen_lowpart (HImode, halfword_tmp)));
14021       dstoffset += 2;
14022     }
14023
14024   /* Likewise for last byte.  */
14025
14026   if (byte_tmp)
14027     {
14028       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14029       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14030       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14031       dstoffset++;
14032     }
14033
14034   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14035 }
14036
14037 /* From mips_adjust_block_mem:
14038
14039    Helper function for doing a loop-based block operation on memory
14040    reference MEM.  Each iteration of the loop will operate on LENGTH
14041    bytes of MEM.
14042
14043    Create a new base register for use within the loop and point it to
14044    the start of MEM.  Create a new memory reference that uses this
14045    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14046
14047 static void
14048 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14049                       rtx *loop_mem)
14050 {
14051   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14052
14053   /* Although the new mem does not refer to a known location,
14054      it does keep up to LENGTH bytes of alignment.  */
14055   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14056   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14057 }
14058
14059 /* From mips_block_move_loop:
14060
14061    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14062    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14063    the memory regions do not overlap.  */
14064
14065 static void
14066 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14067                                unsigned int interleave_factor,
14068                                HOST_WIDE_INT bytes_per_iter)
14069 {
14070   rtx src_reg, dest_reg, final_src, test;
14071   HOST_WIDE_INT leftover;
14072
14073   leftover = length % bytes_per_iter;
14074   length -= leftover;
14075
14076   /* Create registers and memory references for use within the loop.  */
14077   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14078   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14079
14080   /* Calculate the value that SRC_REG should have after the last iteration of
14081      the loop.  */
14082   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14083                                    0, 0, OPTAB_WIDEN);
14084
14085   /* Emit the start of the loop.  */
14086   rtx_code_label *label = gen_label_rtx ();
14087   emit_label (label);
14088
14089   /* Emit the loop body.  */
14090   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14091                                      interleave_factor);
14092
14093   /* Move on to the next block.  */
14094   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14095   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14096
14097   /* Emit the loop condition.  */
14098   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14099   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14100
14101   /* Mop up any left-over bytes.  */
14102   if (leftover)
14103     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14104 }
14105
14106 /* Emit a block move when either the source or destination is unaligned (not
14107    aligned to a four-byte boundary).  This may need further tuning depending on
14108    core type, optimize_size setting, etc.  */
14109
14110 static int
14111 arm_movmemqi_unaligned (rtx *operands)
14112 {
14113   HOST_WIDE_INT length = INTVAL (operands[2]);
14114
14115   if (optimize_size)
14116     {
14117       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14118       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14119       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14120          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14121          or dst_aligned though: allow more interleaving in those cases since the
14122          resulting code can be smaller.  */
14123       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14124       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14125
14126       if (length > 12)
14127         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14128                                        interleave_factor, bytes_per_iter);
14129       else
14130         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14131                                            interleave_factor);
14132     }
14133   else
14134     {
14135       /* Note that the loop created by arm_block_move_unaligned_loop may be
14136          subject to loop unrolling, which makes tuning this condition a little
14137          redundant.  */
14138       if (length > 32)
14139         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14140       else
14141         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14142     }
14143
14144   return 1;
14145 }
14146
14147 int
14148 arm_gen_movmemqi (rtx *operands)
14149 {
14150   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14151   HOST_WIDE_INT srcoffset, dstoffset;
14152   rtx src, dst, srcbase, dstbase;
14153   rtx part_bytes_reg = NULL;
14154   rtx mem;
14155
14156   if (!CONST_INT_P (operands[2])
14157       || !CONST_INT_P (operands[3])
14158       || INTVAL (operands[2]) > 64)
14159     return 0;
14160
14161   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14162     return arm_movmemqi_unaligned (operands);
14163
14164   if (INTVAL (operands[3]) & 3)
14165     return 0;
14166
14167   dstbase = operands[0];
14168   srcbase = operands[1];
14169
14170   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14171   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14172
14173   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14174   out_words_to_go = INTVAL (operands[2]) / 4;
14175   last_bytes = INTVAL (operands[2]) & 3;
14176   dstoffset = srcoffset = 0;
14177
14178   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14179     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14180
14181   while (in_words_to_go >= 2)
14182     {
14183       if (in_words_to_go > 4)
14184         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14185                                           TRUE, srcbase, &srcoffset));
14186       else
14187         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14188                                           src, FALSE, srcbase,
14189                                           &srcoffset));
14190
14191       if (out_words_to_go)
14192         {
14193           if (out_words_to_go > 4)
14194             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14195                                                TRUE, dstbase, &dstoffset));
14196           else if (out_words_to_go != 1)
14197             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14198                                                out_words_to_go, dst,
14199                                                (last_bytes == 0
14200                                                 ? FALSE : TRUE),
14201                                                dstbase, &dstoffset));
14202           else
14203             {
14204               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14205               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14206               if (last_bytes != 0)
14207                 {
14208                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14209                   dstoffset += 4;
14210                 }
14211             }
14212         }
14213
14214       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14215       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14216     }
14217
14218   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14219   if (out_words_to_go)
14220     {
14221       rtx sreg;
14222
14223       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14224       sreg = copy_to_reg (mem);
14225
14226       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14227       emit_move_insn (mem, sreg);
14228       in_words_to_go--;
14229
14230       gcc_assert (!in_words_to_go);     /* Sanity check */
14231     }
14232
14233   if (in_words_to_go)
14234     {
14235       gcc_assert (in_words_to_go > 0);
14236
14237       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14238       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14239     }
14240
14241   gcc_assert (!last_bytes || part_bytes_reg);
14242
14243   if (BYTES_BIG_ENDIAN && last_bytes)
14244     {
14245       rtx tmp = gen_reg_rtx (SImode);
14246
14247       /* The bytes we want are in the top end of the word.  */
14248       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14249                               GEN_INT (8 * (4 - last_bytes))));
14250       part_bytes_reg = tmp;
14251
14252       while (last_bytes)
14253         {
14254           mem = adjust_automodify_address (dstbase, QImode,
14255                                            plus_constant (Pmode, dst,
14256                                                           last_bytes - 1),
14257                                            dstoffset + last_bytes - 1);
14258           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14259
14260           if (--last_bytes)
14261             {
14262               tmp = gen_reg_rtx (SImode);
14263               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14264               part_bytes_reg = tmp;
14265             }
14266         }
14267
14268     }
14269   else
14270     {
14271       if (last_bytes > 1)
14272         {
14273           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14274           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14275           last_bytes -= 2;
14276           if (last_bytes)
14277             {
14278               rtx tmp = gen_reg_rtx (SImode);
14279               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14280               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14281               part_bytes_reg = tmp;
14282               dstoffset += 2;
14283             }
14284         }
14285
14286       if (last_bytes)
14287         {
14288           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14289           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14290         }
14291     }
14292
14293   return 1;
14294 }
14295
14296 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14297 by mode size.  */
14298 inline static rtx
14299 next_consecutive_mem (rtx mem)
14300 {
14301   machine_mode mode = GET_MODE (mem);
14302   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14303   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14304
14305   return adjust_automodify_address (mem, mode, addr, offset);
14306 }
14307
14308 /* Copy using LDRD/STRD instructions whenever possible.
14309    Returns true upon success. */
14310 bool
14311 gen_movmem_ldrd_strd (rtx *operands)
14312 {
14313   unsigned HOST_WIDE_INT len;
14314   HOST_WIDE_INT align;
14315   rtx src, dst, base;
14316   rtx reg0;
14317   bool src_aligned, dst_aligned;
14318   bool src_volatile, dst_volatile;
14319
14320   gcc_assert (CONST_INT_P (operands[2]));
14321   gcc_assert (CONST_INT_P (operands[3]));
14322
14323   len = UINTVAL (operands[2]);
14324   if (len > 64)
14325     return false;
14326
14327   /* Maximum alignment we can assume for both src and dst buffers.  */
14328   align = INTVAL (operands[3]);
14329
14330   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14331     return false;
14332
14333   /* Place src and dst addresses in registers
14334      and update the corresponding mem rtx.  */
14335   dst = operands[0];
14336   dst_volatile = MEM_VOLATILE_P (dst);
14337   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14338   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14339   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14340
14341   src = operands[1];
14342   src_volatile = MEM_VOLATILE_P (src);
14343   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14344   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14345   src = adjust_automodify_address (src, VOIDmode, base, 0);
14346
14347   if (!unaligned_access && !(src_aligned && dst_aligned))
14348     return false;
14349
14350   if (src_volatile || dst_volatile)
14351     return false;
14352
14353   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14354   if (!(dst_aligned || src_aligned))
14355     return arm_gen_movmemqi (operands);
14356
14357   /* If the either src or dst is unaligned we'll be accessing it as pairs
14358      of unaligned SImode accesses.  Otherwise we can generate DImode
14359      ldrd/strd instructions.  */
14360   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14361   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14362
14363   while (len >= 8)
14364     {
14365       len -= 8;
14366       reg0 = gen_reg_rtx (DImode);
14367       rtx low_reg = NULL_RTX;
14368       rtx hi_reg = NULL_RTX;
14369
14370       if (!src_aligned || !dst_aligned)
14371         {
14372           low_reg = gen_lowpart (SImode, reg0);
14373           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14374         }
14375       if (src_aligned)
14376         emit_move_insn (reg0, src);
14377       else
14378         {
14379           emit_insn (gen_unaligned_loadsi (low_reg, src));
14380           src = next_consecutive_mem (src);
14381           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14382         }
14383
14384       if (dst_aligned)
14385         emit_move_insn (dst, reg0);
14386       else
14387         {
14388           emit_insn (gen_unaligned_storesi (dst, low_reg));
14389           dst = next_consecutive_mem (dst);
14390           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14391         }
14392
14393       src = next_consecutive_mem (src);
14394       dst = next_consecutive_mem (dst);
14395     }
14396
14397   gcc_assert (len < 8);
14398   if (len >= 4)
14399     {
14400       /* More than a word but less than a double-word to copy.  Copy a word.  */
14401       reg0 = gen_reg_rtx (SImode);
14402       src = adjust_address (src, SImode, 0);
14403       dst = adjust_address (dst, SImode, 0);
14404       if (src_aligned)
14405         emit_move_insn (reg0, src);
14406       else
14407         emit_insn (gen_unaligned_loadsi (reg0, src));
14408
14409       if (dst_aligned)
14410         emit_move_insn (dst, reg0);
14411       else
14412         emit_insn (gen_unaligned_storesi (dst, reg0));
14413
14414       src = next_consecutive_mem (src);
14415       dst = next_consecutive_mem (dst);
14416       len -= 4;
14417     }
14418
14419   if (len == 0)
14420     return true;
14421
14422   /* Copy the remaining bytes.  */
14423   if (len >= 2)
14424     {
14425       dst = adjust_address (dst, HImode, 0);
14426       src = adjust_address (src, HImode, 0);
14427       reg0 = gen_reg_rtx (SImode);
14428       if (src_aligned)
14429         emit_insn (gen_zero_extendhisi2 (reg0, src));
14430       else
14431         emit_insn (gen_unaligned_loadhiu (reg0, src));
14432
14433       if (dst_aligned)
14434         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14435       else
14436         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14437
14438       src = next_consecutive_mem (src);
14439       dst = next_consecutive_mem (dst);
14440       if (len == 2)
14441         return true;
14442     }
14443
14444   dst = adjust_address (dst, QImode, 0);
14445   src = adjust_address (src, QImode, 0);
14446   reg0 = gen_reg_rtx (QImode);
14447   emit_move_insn (reg0, src);
14448   emit_move_insn (dst, reg0);
14449   return true;
14450 }
14451
14452 /* Select a dominance comparison mode if possible for a test of the general
14453    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14454    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14455    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14456    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14457    In all cases OP will be either EQ or NE, but we don't need to know which
14458    here.  If we are unable to support a dominance comparison we return
14459    CC mode.  This will then fail to match for the RTL expressions that
14460    generate this call.  */
14461 machine_mode
14462 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14463 {
14464   enum rtx_code cond1, cond2;
14465   int swapped = 0;
14466
14467   /* Currently we will probably get the wrong result if the individual
14468      comparisons are not simple.  This also ensures that it is safe to
14469      reverse a comparison if necessary.  */
14470   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14471        != CCmode)
14472       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14473           != CCmode))
14474     return CCmode;
14475
14476   /* The if_then_else variant of this tests the second condition if the
14477      first passes, but is true if the first fails.  Reverse the first
14478      condition to get a true "inclusive-or" expression.  */
14479   if (cond_or == DOM_CC_NX_OR_Y)
14480     cond1 = reverse_condition (cond1);
14481
14482   /* If the comparisons are not equal, and one doesn't dominate the other,
14483      then we can't do this.  */
14484   if (cond1 != cond2
14485       && !comparison_dominates_p (cond1, cond2)
14486       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14487     return CCmode;
14488
14489   if (swapped)
14490     std::swap (cond1, cond2);
14491
14492   switch (cond1)
14493     {
14494     case EQ:
14495       if (cond_or == DOM_CC_X_AND_Y)
14496         return CC_DEQmode;
14497
14498       switch (cond2)
14499         {
14500         case EQ: return CC_DEQmode;
14501         case LE: return CC_DLEmode;
14502         case LEU: return CC_DLEUmode;
14503         case GE: return CC_DGEmode;
14504         case GEU: return CC_DGEUmode;
14505         default: gcc_unreachable ();
14506         }
14507
14508     case LT:
14509       if (cond_or == DOM_CC_X_AND_Y)
14510         return CC_DLTmode;
14511
14512       switch (cond2)
14513         {
14514         case  LT:
14515             return CC_DLTmode;
14516         case LE:
14517           return CC_DLEmode;
14518         case NE:
14519           return CC_DNEmode;
14520         default:
14521           gcc_unreachable ();
14522         }
14523
14524     case GT:
14525       if (cond_or == DOM_CC_X_AND_Y)
14526         return CC_DGTmode;
14527
14528       switch (cond2)
14529         {
14530         case GT:
14531           return CC_DGTmode;
14532         case GE:
14533           return CC_DGEmode;
14534         case NE:
14535           return CC_DNEmode;
14536         default:
14537           gcc_unreachable ();
14538         }
14539
14540     case LTU:
14541       if (cond_or == DOM_CC_X_AND_Y)
14542         return CC_DLTUmode;
14543
14544       switch (cond2)
14545         {
14546         case LTU:
14547           return CC_DLTUmode;
14548         case LEU:
14549           return CC_DLEUmode;
14550         case NE:
14551           return CC_DNEmode;
14552         default:
14553           gcc_unreachable ();
14554         }
14555
14556     case GTU:
14557       if (cond_or == DOM_CC_X_AND_Y)
14558         return CC_DGTUmode;
14559
14560       switch (cond2)
14561         {
14562         case GTU:
14563           return CC_DGTUmode;
14564         case GEU:
14565           return CC_DGEUmode;
14566         case NE:
14567           return CC_DNEmode;
14568         default:
14569           gcc_unreachable ();
14570         }
14571
14572     /* The remaining cases only occur when both comparisons are the
14573        same.  */
14574     case NE:
14575       gcc_assert (cond1 == cond2);
14576       return CC_DNEmode;
14577
14578     case LE:
14579       gcc_assert (cond1 == cond2);
14580       return CC_DLEmode;
14581
14582     case GE:
14583       gcc_assert (cond1 == cond2);
14584       return CC_DGEmode;
14585
14586     case LEU:
14587       gcc_assert (cond1 == cond2);
14588       return CC_DLEUmode;
14589
14590     case GEU:
14591       gcc_assert (cond1 == cond2);
14592       return CC_DGEUmode;
14593
14594     default:
14595       gcc_unreachable ();
14596     }
14597 }
14598
14599 machine_mode
14600 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14601 {
14602   /* All floating point compares return CCFP if it is an equality
14603      comparison, and CCFPE otherwise.  */
14604   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14605     {
14606       switch (op)
14607         {
14608         case EQ:
14609         case NE:
14610         case UNORDERED:
14611         case ORDERED:
14612         case UNLT:
14613         case UNLE:
14614         case UNGT:
14615         case UNGE:
14616         case UNEQ:
14617         case LTGT:
14618           return CCFPmode;
14619
14620         case LT:
14621         case LE:
14622         case GT:
14623         case GE:
14624           return CCFPEmode;
14625
14626         default:
14627           gcc_unreachable ();
14628         }
14629     }
14630
14631   /* A compare with a shifted operand.  Because of canonicalization, the
14632      comparison will have to be swapped when we emit the assembler.  */
14633   if (GET_MODE (y) == SImode
14634       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14635       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14636           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14637           || GET_CODE (x) == ROTATERT))
14638     return CC_SWPmode;
14639
14640   /* This operation is performed swapped, but since we only rely on the Z
14641      flag we don't need an additional mode.  */
14642   if (GET_MODE (y) == SImode
14643       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14644       && GET_CODE (x) == NEG
14645       && (op == EQ || op == NE))
14646     return CC_Zmode;
14647
14648   /* This is a special case that is used by combine to allow a
14649      comparison of a shifted byte load to be split into a zero-extend
14650      followed by a comparison of the shifted integer (only valid for
14651      equalities and unsigned inequalities).  */
14652   if (GET_MODE (x) == SImode
14653       && GET_CODE (x) == ASHIFT
14654       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14655       && GET_CODE (XEXP (x, 0)) == SUBREG
14656       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14657       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14658       && (op == EQ || op == NE
14659           || op == GEU || op == GTU || op == LTU || op == LEU)
14660       && CONST_INT_P (y))
14661     return CC_Zmode;
14662
14663   /* A construct for a conditional compare, if the false arm contains
14664      0, then both conditions must be true, otherwise either condition
14665      must be true.  Not all conditions are possible, so CCmode is
14666      returned if it can't be done.  */
14667   if (GET_CODE (x) == IF_THEN_ELSE
14668       && (XEXP (x, 2) == const0_rtx
14669           || XEXP (x, 2) == const1_rtx)
14670       && COMPARISON_P (XEXP (x, 0))
14671       && COMPARISON_P (XEXP (x, 1)))
14672     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14673                                          INTVAL (XEXP (x, 2)));
14674
14675   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14676   if (GET_CODE (x) == AND
14677       && (op == EQ || op == NE)
14678       && COMPARISON_P (XEXP (x, 0))
14679       && COMPARISON_P (XEXP (x, 1)))
14680     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14681                                          DOM_CC_X_AND_Y);
14682
14683   if (GET_CODE (x) == IOR
14684       && (op == EQ || op == NE)
14685       && COMPARISON_P (XEXP (x, 0))
14686       && COMPARISON_P (XEXP (x, 1)))
14687     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14688                                          DOM_CC_X_OR_Y);
14689
14690   /* An operation (on Thumb) where we want to test for a single bit.
14691      This is done by shifting that bit up into the top bit of a
14692      scratch register; we can then branch on the sign bit.  */
14693   if (TARGET_THUMB1
14694       && GET_MODE (x) == SImode
14695       && (op == EQ || op == NE)
14696       && GET_CODE (x) == ZERO_EXTRACT
14697       && XEXP (x, 1) == const1_rtx)
14698     return CC_Nmode;
14699
14700   /* An operation that sets the condition codes as a side-effect, the
14701      V flag is not set correctly, so we can only use comparisons where
14702      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14703      instead.)  */
14704   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14705   if (GET_MODE (x) == SImode
14706       && y == const0_rtx
14707       && (op == EQ || op == NE || op == LT || op == GE)
14708       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14709           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14710           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14711           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14712           || GET_CODE (x) == LSHIFTRT
14713           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14714           || GET_CODE (x) == ROTATERT
14715           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14716     return CC_NOOVmode;
14717
14718   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14719     return CC_Zmode;
14720
14721   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14722       && GET_CODE (x) == PLUS
14723       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14724     return CC_Cmode;
14725
14726   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14727     {
14728       switch (op)
14729         {
14730         case EQ:
14731         case NE:
14732           /* A DImode comparison against zero can be implemented by
14733              or'ing the two halves together.  */
14734           if (y == const0_rtx)
14735             return CC_Zmode;
14736
14737           /* We can do an equality test in three Thumb instructions.  */
14738           if (!TARGET_32BIT)
14739             return CC_Zmode;
14740
14741           /* FALLTHROUGH */
14742
14743         case LTU:
14744         case LEU:
14745         case GTU:
14746         case GEU:
14747           /* DImode unsigned comparisons can be implemented by cmp +
14748              cmpeq without a scratch register.  Not worth doing in
14749              Thumb-2.  */
14750           if (TARGET_32BIT)
14751             return CC_CZmode;
14752
14753           /* FALLTHROUGH */
14754
14755         case LT:
14756         case LE:
14757         case GT:
14758         case GE:
14759           /* DImode signed and unsigned comparisons can be implemented
14760              by cmp + sbcs with a scratch register, but that does not
14761              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14762           gcc_assert (op != EQ && op != NE);
14763           return CC_NCVmode;
14764
14765         default:
14766           gcc_unreachable ();
14767         }
14768     }
14769
14770   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14771     return GET_MODE (x);
14772
14773   return CCmode;
14774 }
14775
14776 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14777    return the rtx for register 0 in the proper mode.  FP means this is a
14778    floating point compare: I don't think that it is needed on the arm.  */
14779 rtx
14780 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14781 {
14782   machine_mode mode;
14783   rtx cc_reg;
14784   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14785
14786   /* We might have X as a constant, Y as a register because of the predicates
14787      used for cmpdi.  If so, force X to a register here.  */
14788   if (dimode_comparison && !REG_P (x))
14789     x = force_reg (DImode, x);
14790
14791   mode = SELECT_CC_MODE (code, x, y);
14792   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14793
14794   if (dimode_comparison
14795       && mode != CC_CZmode)
14796     {
14797       rtx clobber, set;
14798
14799       /* To compare two non-zero values for equality, XOR them and
14800          then compare against zero.  Not used for ARM mode; there
14801          CC_CZmode is cheaper.  */
14802       if (mode == CC_Zmode && y != const0_rtx)
14803         {
14804           gcc_assert (!reload_completed);
14805           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14806           y = const0_rtx;
14807         }
14808
14809       /* A scratch register is required.  */
14810       if (reload_completed)
14811         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14812       else
14813         scratch = gen_rtx_SCRATCH (SImode);
14814
14815       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14816       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14817       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14818     }
14819   else
14820     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14821
14822   return cc_reg;
14823 }
14824
14825 /* Generate a sequence of insns that will generate the correct return
14826    address mask depending on the physical architecture that the program
14827    is running on.  */
14828 rtx
14829 arm_gen_return_addr_mask (void)
14830 {
14831   rtx reg = gen_reg_rtx (Pmode);
14832
14833   emit_insn (gen_return_addr_mask (reg));
14834   return reg;
14835 }
14836
14837 void
14838 arm_reload_in_hi (rtx *operands)
14839 {
14840   rtx ref = operands[1];
14841   rtx base, scratch;
14842   HOST_WIDE_INT offset = 0;
14843
14844   if (GET_CODE (ref) == SUBREG)
14845     {
14846       offset = SUBREG_BYTE (ref);
14847       ref = SUBREG_REG (ref);
14848     }
14849
14850   if (REG_P (ref))
14851     {
14852       /* We have a pseudo which has been spilt onto the stack; there
14853          are two cases here: the first where there is a simple
14854          stack-slot replacement and a second where the stack-slot is
14855          out of range, or is used as a subreg.  */
14856       if (reg_equiv_mem (REGNO (ref)))
14857         {
14858           ref = reg_equiv_mem (REGNO (ref));
14859           base = find_replacement (&XEXP (ref, 0));
14860         }
14861       else
14862         /* The slot is out of range, or was dressed up in a SUBREG.  */
14863         base = reg_equiv_address (REGNO (ref));
14864
14865       /* PR 62554: If there is no equivalent memory location then just move
14866          the value as an SImode register move.  This happens when the target
14867          architecture variant does not have an HImode register move.  */
14868       if (base == NULL)
14869         {
14870           gcc_assert (REG_P (operands[0]));
14871           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14872                                 gen_rtx_SUBREG (SImode, ref, 0)));
14873           return;
14874         }
14875     }
14876   else
14877     base = find_replacement (&XEXP (ref, 0));
14878
14879   /* Handle the case where the address is too complex to be offset by 1.  */
14880   if (GET_CODE (base) == MINUS
14881       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14882     {
14883       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14884
14885       emit_set_insn (base_plus, base);
14886       base = base_plus;
14887     }
14888   else if (GET_CODE (base) == PLUS)
14889     {
14890       /* The addend must be CONST_INT, or we would have dealt with it above.  */
14891       HOST_WIDE_INT hi, lo;
14892
14893       offset += INTVAL (XEXP (base, 1));
14894       base = XEXP (base, 0);
14895
14896       /* Rework the address into a legal sequence of insns.  */
14897       /* Valid range for lo is -4095 -> 4095 */
14898       lo = (offset >= 0
14899             ? (offset & 0xfff)
14900             : -((-offset) & 0xfff));
14901
14902       /* Corner case, if lo is the max offset then we would be out of range
14903          once we have added the additional 1 below, so bump the msb into the
14904          pre-loading insn(s).  */
14905       if (lo == 4095)
14906         lo &= 0x7ff;
14907
14908       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14909              ^ (HOST_WIDE_INT) 0x80000000)
14910             - (HOST_WIDE_INT) 0x80000000);
14911
14912       gcc_assert (hi + lo == offset);
14913
14914       if (hi != 0)
14915         {
14916           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14917
14918           /* Get the base address; addsi3 knows how to handle constants
14919              that require more than one insn.  */
14920           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14921           base = base_plus;
14922           offset = lo;
14923         }
14924     }
14925
14926   /* Operands[2] may overlap operands[0] (though it won't overlap
14927      operands[1]), that's why we asked for a DImode reg -- so we can
14928      use the bit that does not overlap.  */
14929   if (REGNO (operands[2]) == REGNO (operands[0]))
14930     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14931   else
14932     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14933
14934   emit_insn (gen_zero_extendqisi2 (scratch,
14935                                    gen_rtx_MEM (QImode,
14936                                                 plus_constant (Pmode, base,
14937                                                                offset))));
14938   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14939                                    gen_rtx_MEM (QImode,
14940                                                 plus_constant (Pmode, base,
14941                                                                offset + 1))));
14942   if (!BYTES_BIG_ENDIAN)
14943     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14944                    gen_rtx_IOR (SImode,
14945                                 gen_rtx_ASHIFT
14946                                 (SImode,
14947                                  gen_rtx_SUBREG (SImode, operands[0], 0),
14948                                  GEN_INT (8)),
14949                                 scratch));
14950   else
14951     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14952                    gen_rtx_IOR (SImode,
14953                                 gen_rtx_ASHIFT (SImode, scratch,
14954                                                 GEN_INT (8)),
14955                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
14956 }
14957
14958 /* Handle storing a half-word to memory during reload by synthesizing as two
14959    byte stores.  Take care not to clobber the input values until after we
14960    have moved them somewhere safe.  This code assumes that if the DImode
14961    scratch in operands[2] overlaps either the input value or output address
14962    in some way, then that value must die in this insn (we absolutely need
14963    two scratch registers for some corner cases).  */
14964 void
14965 arm_reload_out_hi (rtx *operands)
14966 {
14967   rtx ref = operands[0];
14968   rtx outval = operands[1];
14969   rtx base, scratch;
14970   HOST_WIDE_INT offset = 0;
14971
14972   if (GET_CODE (ref) == SUBREG)
14973     {
14974       offset = SUBREG_BYTE (ref);
14975       ref = SUBREG_REG (ref);
14976     }
14977
14978   if (REG_P (ref))
14979     {
14980       /* We have a pseudo which has been spilt onto the stack; there
14981          are two cases here: the first where there is a simple
14982          stack-slot replacement and a second where the stack-slot is
14983          out of range, or is used as a subreg.  */
14984       if (reg_equiv_mem (REGNO (ref)))
14985         {
14986           ref = reg_equiv_mem (REGNO (ref));
14987           base = find_replacement (&XEXP (ref, 0));
14988         }
14989       else
14990         /* The slot is out of range, or was dressed up in a SUBREG.  */
14991         base = reg_equiv_address (REGNO (ref));
14992
14993       /* PR 62254: If there is no equivalent memory location then just move
14994          the value as an SImode register move.  This happens when the target
14995          architecture variant does not have an HImode register move.  */
14996       if (base == NULL)
14997         {
14998           gcc_assert (REG_P (outval) || SUBREG_P (outval));
14999
15000           if (REG_P (outval))
15001             {
15002               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15003                                     gen_rtx_SUBREG (SImode, outval, 0)));
15004             }
15005           else /* SUBREG_P (outval)  */
15006             {
15007               if (GET_MODE (SUBREG_REG (outval)) == SImode)
15008                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15009                                       SUBREG_REG (outval)));
15010               else
15011                 /* FIXME: Handle other cases ?  */
15012                 gcc_unreachable ();
15013             }
15014           return;
15015         }
15016     }
15017   else
15018     base = find_replacement (&XEXP (ref, 0));
15019
15020   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15021
15022   /* Handle the case where the address is too complex to be offset by 1.  */
15023   if (GET_CODE (base) == MINUS
15024       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15025     {
15026       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15027
15028       /* Be careful not to destroy OUTVAL.  */
15029       if (reg_overlap_mentioned_p (base_plus, outval))
15030         {
15031           /* Updating base_plus might destroy outval, see if we can
15032              swap the scratch and base_plus.  */
15033           if (!reg_overlap_mentioned_p (scratch, outval))
15034             std::swap (scratch, base_plus);
15035           else
15036             {
15037               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15038
15039               /* Be conservative and copy OUTVAL into the scratch now,
15040                  this should only be necessary if outval is a subreg
15041                  of something larger than a word.  */
15042               /* XXX Might this clobber base?  I can't see how it can,
15043                  since scratch is known to overlap with OUTVAL, and
15044                  must be wider than a word.  */
15045               emit_insn (gen_movhi (scratch_hi, outval));
15046               outval = scratch_hi;
15047             }
15048         }
15049
15050       emit_set_insn (base_plus, base);
15051       base = base_plus;
15052     }
15053   else if (GET_CODE (base) == PLUS)
15054     {
15055       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15056       HOST_WIDE_INT hi, lo;
15057
15058       offset += INTVAL (XEXP (base, 1));
15059       base = XEXP (base, 0);
15060
15061       /* Rework the address into a legal sequence of insns.  */
15062       /* Valid range for lo is -4095 -> 4095 */
15063       lo = (offset >= 0
15064             ? (offset & 0xfff)
15065             : -((-offset) & 0xfff));
15066
15067       /* Corner case, if lo is the max offset then we would be out of range
15068          once we have added the additional 1 below, so bump the msb into the
15069          pre-loading insn(s).  */
15070       if (lo == 4095)
15071         lo &= 0x7ff;
15072
15073       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15074              ^ (HOST_WIDE_INT) 0x80000000)
15075             - (HOST_WIDE_INT) 0x80000000);
15076
15077       gcc_assert (hi + lo == offset);
15078
15079       if (hi != 0)
15080         {
15081           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15082
15083           /* Be careful not to destroy OUTVAL.  */
15084           if (reg_overlap_mentioned_p (base_plus, outval))
15085             {
15086               /* Updating base_plus might destroy outval, see if we
15087                  can swap the scratch and base_plus.  */
15088               if (!reg_overlap_mentioned_p (scratch, outval))
15089                 std::swap (scratch, base_plus);
15090               else
15091                 {
15092                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15093
15094                   /* Be conservative and copy outval into scratch now,
15095                      this should only be necessary if outval is a
15096                      subreg of something larger than a word.  */
15097                   /* XXX Might this clobber base?  I can't see how it
15098                      can, since scratch is known to overlap with
15099                      outval.  */
15100                   emit_insn (gen_movhi (scratch_hi, outval));
15101                   outval = scratch_hi;
15102                 }
15103             }
15104
15105           /* Get the base address; addsi3 knows how to handle constants
15106              that require more than one insn.  */
15107           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15108           base = base_plus;
15109           offset = lo;
15110         }
15111     }
15112
15113   if (BYTES_BIG_ENDIAN)
15114     {
15115       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15116                                          plus_constant (Pmode, base,
15117                                                         offset + 1)),
15118                             gen_lowpart (QImode, outval)));
15119       emit_insn (gen_lshrsi3 (scratch,
15120                               gen_rtx_SUBREG (SImode, outval, 0),
15121                               GEN_INT (8)));
15122       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15123                                                                 offset)),
15124                             gen_lowpart (QImode, scratch)));
15125     }
15126   else
15127     {
15128       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15129                                                                 offset)),
15130                             gen_lowpart (QImode, outval)));
15131       emit_insn (gen_lshrsi3 (scratch,
15132                               gen_rtx_SUBREG (SImode, outval, 0),
15133                               GEN_INT (8)));
15134       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15135                                          plus_constant (Pmode, base,
15136                                                         offset + 1)),
15137                             gen_lowpart (QImode, scratch)));
15138     }
15139 }
15140
15141 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15142    (padded to the size of a word) should be passed in a register.  */
15143
15144 static bool
15145 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15146 {
15147   if (TARGET_AAPCS_BASED)
15148     return must_pass_in_stack_var_size (mode, type);
15149   else
15150     return must_pass_in_stack_var_size_or_pad (mode, type);
15151 }
15152
15153
15154 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15155    Return true if an argument passed on the stack should be padded upwards,
15156    i.e. if the least-significant byte has useful data.
15157    For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
15158    aggregate types are placed in the lowest memory address.  */
15159
15160 bool
15161 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15162 {
15163   if (!TARGET_AAPCS_BASED)
15164     return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15165
15166   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15167     return false;
15168
15169   return true;
15170 }
15171
15172
15173 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15174    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15175    register has useful data, and return the opposite if the most
15176    significant byte does.  */
15177
15178 bool
15179 arm_pad_reg_upward (machine_mode mode,
15180                     tree type, int first ATTRIBUTE_UNUSED)
15181 {
15182   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15183     {
15184       /* For AAPCS, small aggregates, small fixed-point types,
15185          and small complex types are always padded upwards.  */
15186       if (type)
15187         {
15188           if ((AGGREGATE_TYPE_P (type)
15189                || TREE_CODE (type) == COMPLEX_TYPE
15190                || FIXED_POINT_TYPE_P (type))
15191               && int_size_in_bytes (type) <= 4)
15192             return true;
15193         }
15194       else
15195         {
15196           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15197               && GET_MODE_SIZE (mode) <= 4)
15198             return true;
15199         }
15200     }
15201
15202   /* Otherwise, use default padding.  */
15203   return !BYTES_BIG_ENDIAN;
15204 }
15205
15206 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15207    assuming that the address in the base register is word aligned.  */
15208 bool
15209 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15210 {
15211   HOST_WIDE_INT max_offset;
15212
15213   /* Offset must be a multiple of 4 in Thumb mode.  */
15214   if (TARGET_THUMB2 && ((offset & 3) != 0))
15215     return false;
15216
15217   if (TARGET_THUMB2)
15218     max_offset = 1020;
15219   else if (TARGET_ARM)
15220     max_offset = 255;
15221   else
15222     return false;
15223
15224   return ((offset <= max_offset) && (offset >= -max_offset));
15225 }
15226
15227 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15228    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15229    Assumes that the address in the base register RN is word aligned.  Pattern
15230    guarantees that both memory accesses use the same base register,
15231    the offsets are constants within the range, and the gap between the offsets is 4.
15232    If preload complete then check that registers are legal.  WBACK indicates whether
15233    address is updated.  LOAD indicates whether memory access is load or store.  */
15234 bool
15235 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15236                        bool wback, bool load)
15237 {
15238   unsigned int t, t2, n;
15239
15240   if (!reload_completed)
15241     return true;
15242
15243   if (!offset_ok_for_ldrd_strd (offset))
15244     return false;
15245
15246   t = REGNO (rt);
15247   t2 = REGNO (rt2);
15248   n = REGNO (rn);
15249
15250   if ((TARGET_THUMB2)
15251       && ((wback && (n == t || n == t2))
15252           || (t == SP_REGNUM)
15253           || (t == PC_REGNUM)
15254           || (t2 == SP_REGNUM)
15255           || (t2 == PC_REGNUM)
15256           || (!load && (n == PC_REGNUM))
15257           || (load && (t == t2))
15258           /* Triggers Cortex-M3 LDRD errata.  */
15259           || (!wback && load && fix_cm3_ldrd && (n == t))))
15260     return false;
15261
15262   if ((TARGET_ARM)
15263       && ((wback && (n == t || n == t2))
15264           || (t2 == PC_REGNUM)
15265           || (t % 2 != 0)   /* First destination register is not even.  */
15266           || (t2 != t + 1)
15267           /* PC can be used as base register (for offset addressing only),
15268              but it is depricated.  */
15269           || (n == PC_REGNUM)))
15270     return false;
15271
15272   return true;
15273 }
15274
15275 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15276    operand MEM's address contains an immediate offset from the base
15277    register and has no side effects, in which case it sets BASE and
15278    OFFSET accordingly.  */
15279 static bool
15280 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15281 {
15282   rtx addr;
15283
15284   gcc_assert (base != NULL && offset != NULL);
15285
15286   /* TODO: Handle more general memory operand patterns, such as
15287      PRE_DEC and PRE_INC.  */
15288
15289   if (side_effects_p (mem))
15290     return false;
15291
15292   /* Can't deal with subregs.  */
15293   if (GET_CODE (mem) == SUBREG)
15294     return false;
15295
15296   gcc_assert (MEM_P (mem));
15297
15298   *offset = const0_rtx;
15299
15300   addr = XEXP (mem, 0);
15301
15302   /* If addr isn't valid for DImode, then we can't handle it.  */
15303   if (!arm_legitimate_address_p (DImode, addr,
15304                                  reload_in_progress || reload_completed))
15305     return false;
15306
15307   if (REG_P (addr))
15308     {
15309       *base = addr;
15310       return true;
15311     }
15312   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15313     {
15314       *base = XEXP (addr, 0);
15315       *offset = XEXP (addr, 1);
15316       return (REG_P (*base) && CONST_INT_P (*offset));
15317     }
15318
15319   return false;
15320 }
15321
15322 /* Called from a peephole2 to replace two word-size accesses with a
15323    single LDRD/STRD instruction.  Returns true iff we can generate a
15324    new instruction sequence.  That is, both accesses use the same base
15325    register and the gap between constant offsets is 4.  This function
15326    may reorder its operands to match ldrd/strd RTL templates.
15327    OPERANDS are the operands found by the peephole matcher;
15328    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15329    corresponding memory operands.  LOAD indicaates whether the access
15330    is load or store.  CONST_STORE indicates a store of constant
15331    integer values held in OPERANDS[4,5] and assumes that the pattern
15332    is of length 4 insn, for the purpose of checking dead registers.
15333    COMMUTE indicates that register operands may be reordered.  */
15334 bool
15335 gen_operands_ldrd_strd (rtx *operands, bool load,
15336                         bool const_store, bool commute)
15337 {
15338   int nops = 2;
15339   HOST_WIDE_INT offsets[2], offset;
15340   rtx base = NULL_RTX;
15341   rtx cur_base, cur_offset, tmp;
15342   int i, gap;
15343   HARD_REG_SET regset;
15344
15345   gcc_assert (!const_store || !load);
15346   /* Check that the memory references are immediate offsets from the
15347      same base register.  Extract the base register, the destination
15348      registers, and the corresponding memory offsets.  */
15349   for (i = 0; i < nops; i++)
15350     {
15351       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15352         return false;
15353
15354       if (i == 0)
15355         base = cur_base;
15356       else if (REGNO (base) != REGNO (cur_base))
15357         return false;
15358
15359       offsets[i] = INTVAL (cur_offset);
15360       if (GET_CODE (operands[i]) == SUBREG)
15361         {
15362           tmp = SUBREG_REG (operands[i]);
15363           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15364           operands[i] = tmp;
15365         }
15366     }
15367
15368   /* Make sure there is no dependency between the individual loads.  */
15369   if (load && REGNO (operands[0]) == REGNO (base))
15370     return false; /* RAW */
15371
15372   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15373     return false; /* WAW */
15374
15375   /* If the same input register is used in both stores
15376      when storing different constants, try to find a free register.
15377      For example, the code
15378         mov r0, 0
15379         str r0, [r2]
15380         mov r0, 1
15381         str r0, [r2, #4]
15382      can be transformed into
15383         mov r1, 0
15384         mov r0, 1
15385         strd r1, r0, [r2]
15386      in Thumb mode assuming that r1 is free.
15387      For ARM mode do the same but only if the starting register
15388      can be made to be even.  */
15389   if (const_store
15390       && REGNO (operands[0]) == REGNO (operands[1])
15391       && INTVAL (operands[4]) != INTVAL (operands[5]))
15392     {
15393     if (TARGET_THUMB2)
15394       {
15395         CLEAR_HARD_REG_SET (regset);
15396         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15397         if (tmp == NULL_RTX)
15398           return false;
15399
15400         /* Use the new register in the first load to ensure that
15401            if the original input register is not dead after peephole,
15402            then it will have the correct constant value.  */
15403         operands[0] = tmp;
15404       }
15405     else if (TARGET_ARM)
15406       {
15407         int regno = REGNO (operands[0]);
15408         if (!peep2_reg_dead_p (4, operands[0]))
15409           {
15410             /* When the input register is even and is not dead after the
15411                pattern, it has to hold the second constant but we cannot
15412                form a legal STRD in ARM mode with this register as the second
15413                register.  */
15414             if (regno % 2 == 0)
15415               return false;
15416
15417             /* Is regno-1 free? */
15418             SET_HARD_REG_SET (regset);
15419             CLEAR_HARD_REG_BIT(regset, regno - 1);
15420             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15421             if (tmp == NULL_RTX)
15422               return false;
15423
15424             operands[0] = tmp;
15425           }
15426         else
15427           {
15428             /* Find a DImode register.  */
15429             CLEAR_HARD_REG_SET (regset);
15430             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15431             if (tmp != NULL_RTX)
15432               {
15433                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15434                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15435               }
15436             else
15437               {
15438                 /* Can we use the input register to form a DI register?  */
15439                 SET_HARD_REG_SET (regset);
15440                 CLEAR_HARD_REG_BIT(regset,
15441                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15442                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15443                 if (tmp == NULL_RTX)
15444                   return false;
15445                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15446               }
15447           }
15448
15449         gcc_assert (operands[0] != NULL_RTX);
15450         gcc_assert (operands[1] != NULL_RTX);
15451         gcc_assert (REGNO (operands[0]) % 2 == 0);
15452         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15453       }
15454     }
15455
15456   /* Make sure the instructions are ordered with lower memory access first.  */
15457   if (offsets[0] > offsets[1])
15458     {
15459       gap = offsets[0] - offsets[1];
15460       offset = offsets[1];
15461
15462       /* Swap the instructions such that lower memory is accessed first.  */
15463       std::swap (operands[0], operands[1]);
15464       std::swap (operands[2], operands[3]);
15465       if (const_store)
15466         std::swap (operands[4], operands[5]);
15467     }
15468   else
15469     {
15470       gap = offsets[1] - offsets[0];
15471       offset = offsets[0];
15472     }
15473
15474   /* Make sure accesses are to consecutive memory locations.  */
15475   if (gap != 4)
15476     return false;
15477
15478   /* Make sure we generate legal instructions.  */
15479   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15480                              false, load))
15481     return true;
15482
15483   /* In Thumb state, where registers are almost unconstrained, there
15484      is little hope to fix it.  */
15485   if (TARGET_THUMB2)
15486     return false;
15487
15488   if (load && commute)
15489     {
15490       /* Try reordering registers.  */
15491       std::swap (operands[0], operands[1]);
15492       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15493                                  false, load))
15494         return true;
15495     }
15496
15497   if (const_store)
15498     {
15499       /* If input registers are dead after this pattern, they can be
15500          reordered or replaced by other registers that are free in the
15501          current pattern.  */
15502       if (!peep2_reg_dead_p (4, operands[0])
15503           || !peep2_reg_dead_p (4, operands[1]))
15504         return false;
15505
15506       /* Try to reorder the input registers.  */
15507       /* For example, the code
15508            mov r0, 0
15509            mov r1, 1
15510            str r1, [r2]
15511            str r0, [r2, #4]
15512          can be transformed into
15513            mov r1, 0
15514            mov r0, 1
15515            strd r0, [r2]
15516       */
15517       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15518                                   false, false))
15519         {
15520           std::swap (operands[0], operands[1]);
15521           return true;
15522         }
15523
15524       /* Try to find a free DI register.  */
15525       CLEAR_HARD_REG_SET (regset);
15526       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15527       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15528       while (true)
15529         {
15530           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15531           if (tmp == NULL_RTX)
15532             return false;
15533
15534           /* DREG must be an even-numbered register in DImode.
15535              Split it into SI registers.  */
15536           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15537           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15538           gcc_assert (operands[0] != NULL_RTX);
15539           gcc_assert (operands[1] != NULL_RTX);
15540           gcc_assert (REGNO (operands[0]) % 2 == 0);
15541           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15542
15543           return (operands_ok_ldrd_strd (operands[0], operands[1],
15544                                          base, offset,
15545                                          false, load));
15546         }
15547     }
15548
15549   return false;
15550 }
15551
15552
15553
15554 \f
15555 /* Print a symbolic form of X to the debug file, F.  */
15556 static void
15557 arm_print_value (FILE *f, rtx x)
15558 {
15559   switch (GET_CODE (x))
15560     {
15561     case CONST_INT:
15562       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15563       return;
15564
15565     case CONST_DOUBLE:
15566       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15567       return;
15568
15569     case CONST_VECTOR:
15570       {
15571         int i;
15572
15573         fprintf (f, "<");
15574         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15575           {
15576             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15577             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15578               fputc (',', f);
15579           }
15580         fprintf (f, ">");
15581       }
15582       return;
15583
15584     case CONST_STRING:
15585       fprintf (f, "\"%s\"", XSTR (x, 0));
15586       return;
15587
15588     case SYMBOL_REF:
15589       fprintf (f, "`%s'", XSTR (x, 0));
15590       return;
15591
15592     case LABEL_REF:
15593       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15594       return;
15595
15596     case CONST:
15597       arm_print_value (f, XEXP (x, 0));
15598       return;
15599
15600     case PLUS:
15601       arm_print_value (f, XEXP (x, 0));
15602       fprintf (f, "+");
15603       arm_print_value (f, XEXP (x, 1));
15604       return;
15605
15606     case PC:
15607       fprintf (f, "pc");
15608       return;
15609
15610     default:
15611       fprintf (f, "????");
15612       return;
15613     }
15614 }
15615 \f
15616 /* Routines for manipulation of the constant pool.  */
15617
15618 /* Arm instructions cannot load a large constant directly into a
15619    register; they have to come from a pc relative load.  The constant
15620    must therefore be placed in the addressable range of the pc
15621    relative load.  Depending on the precise pc relative load
15622    instruction the range is somewhere between 256 bytes and 4k.  This
15623    means that we often have to dump a constant inside a function, and
15624    generate code to branch around it.
15625
15626    It is important to minimize this, since the branches will slow
15627    things down and make the code larger.
15628
15629    Normally we can hide the table after an existing unconditional
15630    branch so that there is no interruption of the flow, but in the
15631    worst case the code looks like this:
15632
15633         ldr     rn, L1
15634         ...
15635         b       L2
15636         align
15637         L1:     .long value
15638         L2:
15639         ...
15640
15641         ldr     rn, L3
15642         ...
15643         b       L4
15644         align
15645         L3:     .long value
15646         L4:
15647         ...
15648
15649    We fix this by performing a scan after scheduling, which notices
15650    which instructions need to have their operands fetched from the
15651    constant table and builds the table.
15652
15653    The algorithm starts by building a table of all the constants that
15654    need fixing up and all the natural barriers in the function (places
15655    where a constant table can be dropped without breaking the flow).
15656    For each fixup we note how far the pc-relative replacement will be
15657    able to reach and the offset of the instruction into the function.
15658
15659    Having built the table we then group the fixes together to form
15660    tables that are as large as possible (subject to addressing
15661    constraints) and emit each table of constants after the last
15662    barrier that is within range of all the instructions in the group.
15663    If a group does not contain a barrier, then we forcibly create one
15664    by inserting a jump instruction into the flow.  Once the table has
15665    been inserted, the insns are then modified to reference the
15666    relevant entry in the pool.
15667
15668    Possible enhancements to the algorithm (not implemented) are:
15669
15670    1) For some processors and object formats, there may be benefit in
15671    aligning the pools to the start of cache lines; this alignment
15672    would need to be taken into account when calculating addressability
15673    of a pool.  */
15674
15675 /* These typedefs are located at the start of this file, so that
15676    they can be used in the prototypes there.  This comment is to
15677    remind readers of that fact so that the following structures
15678    can be understood more easily.
15679
15680      typedef struct minipool_node    Mnode;
15681      typedef struct minipool_fixup   Mfix;  */
15682
15683 struct minipool_node
15684 {
15685   /* Doubly linked chain of entries.  */
15686   Mnode * next;
15687   Mnode * prev;
15688   /* The maximum offset into the code that this entry can be placed.  While
15689      pushing fixes for forward references, all entries are sorted in order
15690      of increasing max_address.  */
15691   HOST_WIDE_INT max_address;
15692   /* Similarly for an entry inserted for a backwards ref.  */
15693   HOST_WIDE_INT min_address;
15694   /* The number of fixes referencing this entry.  This can become zero
15695      if we "unpush" an entry.  In this case we ignore the entry when we
15696      come to emit the code.  */
15697   int refcount;
15698   /* The offset from the start of the minipool.  */
15699   HOST_WIDE_INT offset;
15700   /* The value in table.  */
15701   rtx value;
15702   /* The mode of value.  */
15703   machine_mode mode;
15704   /* The size of the value.  With iWMMXt enabled
15705      sizes > 4 also imply an alignment of 8-bytes.  */
15706   int fix_size;
15707 };
15708
15709 struct minipool_fixup
15710 {
15711   Mfix *            next;
15712   rtx_insn *        insn;
15713   HOST_WIDE_INT     address;
15714   rtx *             loc;
15715   machine_mode mode;
15716   int               fix_size;
15717   rtx               value;
15718   Mnode *           minipool;
15719   HOST_WIDE_INT     forwards;
15720   HOST_WIDE_INT     backwards;
15721 };
15722
15723 /* Fixes less than a word need padding out to a word boundary.  */
15724 #define MINIPOOL_FIX_SIZE(mode) \
15725   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15726
15727 static Mnode *  minipool_vector_head;
15728 static Mnode *  minipool_vector_tail;
15729 static rtx_code_label   *minipool_vector_label;
15730 static int      minipool_pad;
15731
15732 /* The linked list of all minipool fixes required for this function.  */
15733 Mfix *          minipool_fix_head;
15734 Mfix *          minipool_fix_tail;
15735 /* The fix entry for the current minipool, once it has been placed.  */
15736 Mfix *          minipool_barrier;
15737
15738 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15739 #define JUMP_TABLES_IN_TEXT_SECTION 0
15740 #endif
15741
15742 static HOST_WIDE_INT
15743 get_jump_table_size (rtx_jump_table_data *insn)
15744 {
15745   /* ADDR_VECs only take room if read-only data does into the text
15746      section.  */
15747   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15748     {
15749       rtx body = PATTERN (insn);
15750       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15751       HOST_WIDE_INT size;
15752       HOST_WIDE_INT modesize;
15753
15754       modesize = GET_MODE_SIZE (GET_MODE (body));
15755       size = modesize * XVECLEN (body, elt);
15756       switch (modesize)
15757         {
15758         case 1:
15759           /* Round up size  of TBB table to a halfword boundary.  */
15760           size = (size + 1) & ~HOST_WIDE_INT_1;
15761           break;
15762         case 2:
15763           /* No padding necessary for TBH.  */
15764           break;
15765         case 4:
15766           /* Add two bytes for alignment on Thumb.  */
15767           if (TARGET_THUMB)
15768             size += 2;
15769           break;
15770         default:
15771           gcc_unreachable ();
15772         }
15773       return size;
15774     }
15775
15776   return 0;
15777 }
15778
15779 /* Return the maximum amount of padding that will be inserted before
15780    label LABEL.  */
15781
15782 static HOST_WIDE_INT
15783 get_label_padding (rtx label)
15784 {
15785   HOST_WIDE_INT align, min_insn_size;
15786
15787   align = 1 << label_to_alignment (label);
15788   min_insn_size = TARGET_THUMB ? 2 : 4;
15789   return align > min_insn_size ? align - min_insn_size : 0;
15790 }
15791
15792 /* Move a minipool fix MP from its current location to before MAX_MP.
15793    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15794    constraints may need updating.  */
15795 static Mnode *
15796 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15797                                HOST_WIDE_INT max_address)
15798 {
15799   /* The code below assumes these are different.  */
15800   gcc_assert (mp != max_mp);
15801
15802   if (max_mp == NULL)
15803     {
15804       if (max_address < mp->max_address)
15805         mp->max_address = max_address;
15806     }
15807   else
15808     {
15809       if (max_address > max_mp->max_address - mp->fix_size)
15810         mp->max_address = max_mp->max_address - mp->fix_size;
15811       else
15812         mp->max_address = max_address;
15813
15814       /* Unlink MP from its current position.  Since max_mp is non-null,
15815        mp->prev must be non-null.  */
15816       mp->prev->next = mp->next;
15817       if (mp->next != NULL)
15818         mp->next->prev = mp->prev;
15819       else
15820         minipool_vector_tail = mp->prev;
15821
15822       /* Re-insert it before MAX_MP.  */
15823       mp->next = max_mp;
15824       mp->prev = max_mp->prev;
15825       max_mp->prev = mp;
15826
15827       if (mp->prev != NULL)
15828         mp->prev->next = mp;
15829       else
15830         minipool_vector_head = mp;
15831     }
15832
15833   /* Save the new entry.  */
15834   max_mp = mp;
15835
15836   /* Scan over the preceding entries and adjust their addresses as
15837      required.  */
15838   while (mp->prev != NULL
15839          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15840     {
15841       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15842       mp = mp->prev;
15843     }
15844
15845   return max_mp;
15846 }
15847
15848 /* Add a constant to the minipool for a forward reference.  Returns the
15849    node added or NULL if the constant will not fit in this pool.  */
15850 static Mnode *
15851 add_minipool_forward_ref (Mfix *fix)
15852 {
15853   /* If set, max_mp is the first pool_entry that has a lower
15854      constraint than the one we are trying to add.  */
15855   Mnode *       max_mp = NULL;
15856   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15857   Mnode *       mp;
15858
15859   /* If the minipool starts before the end of FIX->INSN then this FIX
15860      can not be placed into the current pool.  Furthermore, adding the
15861      new constant pool entry may cause the pool to start FIX_SIZE bytes
15862      earlier.  */
15863   if (minipool_vector_head &&
15864       (fix->address + get_attr_length (fix->insn)
15865        >= minipool_vector_head->max_address - fix->fix_size))
15866     return NULL;
15867
15868   /* Scan the pool to see if a constant with the same value has
15869      already been added.  While we are doing this, also note the
15870      location where we must insert the constant if it doesn't already
15871      exist.  */
15872   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15873     {
15874       if (GET_CODE (fix->value) == GET_CODE (mp->value)
15875           && fix->mode == mp->mode
15876           && (!LABEL_P (fix->value)
15877               || (CODE_LABEL_NUMBER (fix->value)
15878                   == CODE_LABEL_NUMBER (mp->value)))
15879           && rtx_equal_p (fix->value, mp->value))
15880         {
15881           /* More than one fix references this entry.  */
15882           mp->refcount++;
15883           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15884         }
15885
15886       /* Note the insertion point if necessary.  */
15887       if (max_mp == NULL
15888           && mp->max_address > max_address)
15889         max_mp = mp;
15890
15891       /* If we are inserting an 8-bytes aligned quantity and
15892          we have not already found an insertion point, then
15893          make sure that all such 8-byte aligned quantities are
15894          placed at the start of the pool.  */
15895       if (ARM_DOUBLEWORD_ALIGN
15896           && max_mp == NULL
15897           && fix->fix_size >= 8
15898           && mp->fix_size < 8)
15899         {
15900           max_mp = mp;
15901           max_address = mp->max_address;
15902         }
15903     }
15904
15905   /* The value is not currently in the minipool, so we need to create
15906      a new entry for it.  If MAX_MP is NULL, the entry will be put on
15907      the end of the list since the placement is less constrained than
15908      any existing entry.  Otherwise, we insert the new fix before
15909      MAX_MP and, if necessary, adjust the constraints on the other
15910      entries.  */
15911   mp = XNEW (Mnode);
15912   mp->fix_size = fix->fix_size;
15913   mp->mode = fix->mode;
15914   mp->value = fix->value;
15915   mp->refcount = 1;
15916   /* Not yet required for a backwards ref.  */
15917   mp->min_address = -65536;
15918
15919   if (max_mp == NULL)
15920     {
15921       mp->max_address = max_address;
15922       mp->next = NULL;
15923       mp->prev = minipool_vector_tail;
15924
15925       if (mp->prev == NULL)
15926         {
15927           minipool_vector_head = mp;
15928           minipool_vector_label = gen_label_rtx ();
15929         }
15930       else
15931         mp->prev->next = mp;
15932
15933       minipool_vector_tail = mp;
15934     }
15935   else
15936     {
15937       if (max_address > max_mp->max_address - mp->fix_size)
15938         mp->max_address = max_mp->max_address - mp->fix_size;
15939       else
15940         mp->max_address = max_address;
15941
15942       mp->next = max_mp;
15943       mp->prev = max_mp->prev;
15944       max_mp->prev = mp;
15945       if (mp->prev != NULL)
15946         mp->prev->next = mp;
15947       else
15948         minipool_vector_head = mp;
15949     }
15950
15951   /* Save the new entry.  */
15952   max_mp = mp;
15953
15954   /* Scan over the preceding entries and adjust their addresses as
15955      required.  */
15956   while (mp->prev != NULL
15957          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15958     {
15959       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15960       mp = mp->prev;
15961     }
15962
15963   return max_mp;
15964 }
15965
15966 static Mnode *
15967 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15968                                 HOST_WIDE_INT  min_address)
15969 {
15970   HOST_WIDE_INT offset;
15971
15972   /* The code below assumes these are different.  */
15973   gcc_assert (mp != min_mp);
15974
15975   if (min_mp == NULL)
15976     {
15977       if (min_address > mp->min_address)
15978         mp->min_address = min_address;
15979     }
15980   else
15981     {
15982       /* We will adjust this below if it is too loose.  */
15983       mp->min_address = min_address;
15984
15985       /* Unlink MP from its current position.  Since min_mp is non-null,
15986          mp->next must be non-null.  */
15987       mp->next->prev = mp->prev;
15988       if (mp->prev != NULL)
15989         mp->prev->next = mp->next;
15990       else
15991         minipool_vector_head = mp->next;
15992
15993       /* Reinsert it after MIN_MP.  */
15994       mp->prev = min_mp;
15995       mp->next = min_mp->next;
15996       min_mp->next = mp;
15997       if (mp->next != NULL)
15998         mp->next->prev = mp;
15999       else
16000         minipool_vector_tail = mp;
16001     }
16002
16003   min_mp = mp;
16004
16005   offset = 0;
16006   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16007     {
16008       mp->offset = offset;
16009       if (mp->refcount > 0)
16010         offset += mp->fix_size;
16011
16012       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16013         mp->next->min_address = mp->min_address + mp->fix_size;
16014     }
16015
16016   return min_mp;
16017 }
16018
16019 /* Add a constant to the minipool for a backward reference.  Returns the
16020    node added or NULL if the constant will not fit in this pool.
16021
16022    Note that the code for insertion for a backwards reference can be
16023    somewhat confusing because the calculated offsets for each fix do
16024    not take into account the size of the pool (which is still under
16025    construction.  */
16026 static Mnode *
16027 add_minipool_backward_ref (Mfix *fix)
16028 {
16029   /* If set, min_mp is the last pool_entry that has a lower constraint
16030      than the one we are trying to add.  */
16031   Mnode *min_mp = NULL;
16032   /* This can be negative, since it is only a constraint.  */
16033   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16034   Mnode *mp;
16035
16036   /* If we can't reach the current pool from this insn, or if we can't
16037      insert this entry at the end of the pool without pushing other
16038      fixes out of range, then we don't try.  This ensures that we
16039      can't fail later on.  */
16040   if (min_address >= minipool_barrier->address
16041       || (minipool_vector_tail->min_address + fix->fix_size
16042           >= minipool_barrier->address))
16043     return NULL;
16044
16045   /* Scan the pool to see if a constant with the same value has
16046      already been added.  While we are doing this, also note the
16047      location where we must insert the constant if it doesn't already
16048      exist.  */
16049   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16050     {
16051       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16052           && fix->mode == mp->mode
16053           && (!LABEL_P (fix->value)
16054               || (CODE_LABEL_NUMBER (fix->value)
16055                   == CODE_LABEL_NUMBER (mp->value)))
16056           && rtx_equal_p (fix->value, mp->value)
16057           /* Check that there is enough slack to move this entry to the
16058              end of the table (this is conservative).  */
16059           && (mp->max_address
16060               > (minipool_barrier->address
16061                  + minipool_vector_tail->offset
16062                  + minipool_vector_tail->fix_size)))
16063         {
16064           mp->refcount++;
16065           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16066         }
16067
16068       if (min_mp != NULL)
16069         mp->min_address += fix->fix_size;
16070       else
16071         {
16072           /* Note the insertion point if necessary.  */
16073           if (mp->min_address < min_address)
16074             {
16075               /* For now, we do not allow the insertion of 8-byte alignment
16076                  requiring nodes anywhere but at the start of the pool.  */
16077               if (ARM_DOUBLEWORD_ALIGN
16078                   && fix->fix_size >= 8 && mp->fix_size < 8)
16079                 return NULL;
16080               else
16081                 min_mp = mp;
16082             }
16083           else if (mp->max_address
16084                    < minipool_barrier->address + mp->offset + fix->fix_size)
16085             {
16086               /* Inserting before this entry would push the fix beyond
16087                  its maximum address (which can happen if we have
16088                  re-located a forwards fix); force the new fix to come
16089                  after it.  */
16090               if (ARM_DOUBLEWORD_ALIGN
16091                   && fix->fix_size >= 8 && mp->fix_size < 8)
16092                 return NULL;
16093               else
16094                 {
16095                   min_mp = mp;
16096                   min_address = mp->min_address + fix->fix_size;
16097                 }
16098             }
16099           /* Do not insert a non-8-byte aligned quantity before 8-byte
16100              aligned quantities.  */
16101           else if (ARM_DOUBLEWORD_ALIGN
16102                    && fix->fix_size < 8
16103                    && mp->fix_size >= 8)
16104             {
16105               min_mp = mp;
16106               min_address = mp->min_address + fix->fix_size;
16107             }
16108         }
16109     }
16110
16111   /* We need to create a new entry.  */
16112   mp = XNEW (Mnode);
16113   mp->fix_size = fix->fix_size;
16114   mp->mode = fix->mode;
16115   mp->value = fix->value;
16116   mp->refcount = 1;
16117   mp->max_address = minipool_barrier->address + 65536;
16118
16119   mp->min_address = min_address;
16120
16121   if (min_mp == NULL)
16122     {
16123       mp->prev = NULL;
16124       mp->next = minipool_vector_head;
16125
16126       if (mp->next == NULL)
16127         {
16128           minipool_vector_tail = mp;
16129           minipool_vector_label = gen_label_rtx ();
16130         }
16131       else
16132         mp->next->prev = mp;
16133
16134       minipool_vector_head = mp;
16135     }
16136   else
16137     {
16138       mp->next = min_mp->next;
16139       mp->prev = min_mp;
16140       min_mp->next = mp;
16141
16142       if (mp->next != NULL)
16143         mp->next->prev = mp;
16144       else
16145         minipool_vector_tail = mp;
16146     }
16147
16148   /* Save the new entry.  */
16149   min_mp = mp;
16150
16151   if (mp->prev)
16152     mp = mp->prev;
16153   else
16154     mp->offset = 0;
16155
16156   /* Scan over the following entries and adjust their offsets.  */
16157   while (mp->next != NULL)
16158     {
16159       if (mp->next->min_address < mp->min_address + mp->fix_size)
16160         mp->next->min_address = mp->min_address + mp->fix_size;
16161
16162       if (mp->refcount)
16163         mp->next->offset = mp->offset + mp->fix_size;
16164       else
16165         mp->next->offset = mp->offset;
16166
16167       mp = mp->next;
16168     }
16169
16170   return min_mp;
16171 }
16172
16173 static void
16174 assign_minipool_offsets (Mfix *barrier)
16175 {
16176   HOST_WIDE_INT offset = 0;
16177   Mnode *mp;
16178
16179   minipool_barrier = barrier;
16180
16181   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16182     {
16183       mp->offset = offset;
16184
16185       if (mp->refcount > 0)
16186         offset += mp->fix_size;
16187     }
16188 }
16189
16190 /* Output the literal table */
16191 static void
16192 dump_minipool (rtx_insn *scan)
16193 {
16194   Mnode * mp;
16195   Mnode * nmp;
16196   int align64 = 0;
16197
16198   if (ARM_DOUBLEWORD_ALIGN)
16199     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16200       if (mp->refcount > 0 && mp->fix_size >= 8)
16201         {
16202           align64 = 1;
16203           break;
16204         }
16205
16206   if (dump_file)
16207     fprintf (dump_file,
16208              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16209              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16210
16211   scan = emit_label_after (gen_label_rtx (), scan);
16212   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16213   scan = emit_label_after (minipool_vector_label, scan);
16214
16215   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16216     {
16217       if (mp->refcount > 0)
16218         {
16219           if (dump_file)
16220             {
16221               fprintf (dump_file,
16222                        ";;  Offset %u, min %ld, max %ld ",
16223                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16224                        (unsigned long) mp->max_address);
16225               arm_print_value (dump_file, mp->value);
16226               fputc ('\n', dump_file);
16227             }
16228
16229           rtx val = copy_rtx (mp->value);
16230
16231           switch (GET_MODE_SIZE (mp->mode))
16232             {
16233 #ifdef HAVE_consttable_1
16234             case 1:
16235               scan = emit_insn_after (gen_consttable_1 (val), scan);
16236               break;
16237
16238 #endif
16239 #ifdef HAVE_consttable_2
16240             case 2:
16241               scan = emit_insn_after (gen_consttable_2 (val), scan);
16242               break;
16243
16244 #endif
16245 #ifdef HAVE_consttable_4
16246             case 4:
16247               scan = emit_insn_after (gen_consttable_4 (val), scan);
16248               break;
16249
16250 #endif
16251 #ifdef HAVE_consttable_8
16252             case 8:
16253               scan = emit_insn_after (gen_consttable_8 (val), scan);
16254               break;
16255
16256 #endif
16257 #ifdef HAVE_consttable_16
16258             case 16:
16259               scan = emit_insn_after (gen_consttable_16 (val), scan);
16260               break;
16261
16262 #endif
16263             default:
16264               gcc_unreachable ();
16265             }
16266         }
16267
16268       nmp = mp->next;
16269       free (mp);
16270     }
16271
16272   minipool_vector_head = minipool_vector_tail = NULL;
16273   scan = emit_insn_after (gen_consttable_end (), scan);
16274   scan = emit_barrier_after (scan);
16275 }
16276
16277 /* Return the cost of forcibly inserting a barrier after INSN.  */
16278 static int
16279 arm_barrier_cost (rtx_insn *insn)
16280 {
16281   /* Basing the location of the pool on the loop depth is preferable,
16282      but at the moment, the basic block information seems to be
16283      corrupt by this stage of the compilation.  */
16284   int base_cost = 50;
16285   rtx_insn *next = next_nonnote_insn (insn);
16286
16287   if (next != NULL && LABEL_P (next))
16288     base_cost -= 20;
16289
16290   switch (GET_CODE (insn))
16291     {
16292     case CODE_LABEL:
16293       /* It will always be better to place the table before the label, rather
16294          than after it.  */
16295       return 50;
16296
16297     case INSN:
16298     case CALL_INSN:
16299       return base_cost;
16300
16301     case JUMP_INSN:
16302       return base_cost - 10;
16303
16304     default:
16305       return base_cost + 10;
16306     }
16307 }
16308
16309 /* Find the best place in the insn stream in the range
16310    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16311    Create the barrier by inserting a jump and add a new fix entry for
16312    it.  */
16313 static Mfix *
16314 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16315 {
16316   HOST_WIDE_INT count = 0;
16317   rtx_barrier *barrier;
16318   rtx_insn *from = fix->insn;
16319   /* The instruction after which we will insert the jump.  */
16320   rtx_insn *selected = NULL;
16321   int selected_cost;
16322   /* The address at which the jump instruction will be placed.  */
16323   HOST_WIDE_INT selected_address;
16324   Mfix * new_fix;
16325   HOST_WIDE_INT max_count = max_address - fix->address;
16326   rtx_code_label *label = gen_label_rtx ();
16327
16328   selected_cost = arm_barrier_cost (from);
16329   selected_address = fix->address;
16330
16331   while (from && count < max_count)
16332     {
16333       rtx_jump_table_data *tmp;
16334       int new_cost;
16335
16336       /* This code shouldn't have been called if there was a natural barrier
16337          within range.  */
16338       gcc_assert (!BARRIER_P (from));
16339
16340       /* Count the length of this insn.  This must stay in sync with the
16341          code that pushes minipool fixes.  */
16342       if (LABEL_P (from))
16343         count += get_label_padding (from);
16344       else
16345         count += get_attr_length (from);
16346
16347       /* If there is a jump table, add its length.  */
16348       if (tablejump_p (from, NULL, &tmp))
16349         {
16350           count += get_jump_table_size (tmp);
16351
16352           /* Jump tables aren't in a basic block, so base the cost on
16353              the dispatch insn.  If we select this location, we will
16354              still put the pool after the table.  */
16355           new_cost = arm_barrier_cost (from);
16356
16357           if (count < max_count
16358               && (!selected || new_cost <= selected_cost))
16359             {
16360               selected = tmp;
16361               selected_cost = new_cost;
16362               selected_address = fix->address + count;
16363             }
16364
16365           /* Continue after the dispatch table.  */
16366           from = NEXT_INSN (tmp);
16367           continue;
16368         }
16369
16370       new_cost = arm_barrier_cost (from);
16371
16372       if (count < max_count
16373           && (!selected || new_cost <= selected_cost))
16374         {
16375           selected = from;
16376           selected_cost = new_cost;
16377           selected_address = fix->address + count;
16378         }
16379
16380       from = NEXT_INSN (from);
16381     }
16382
16383   /* Make sure that we found a place to insert the jump.  */
16384   gcc_assert (selected);
16385
16386   /* Make sure we do not split a call and its corresponding
16387      CALL_ARG_LOCATION note.  */
16388   if (CALL_P (selected))
16389     {
16390       rtx_insn *next = NEXT_INSN (selected);
16391       if (next && NOTE_P (next)
16392           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16393           selected = next;
16394     }
16395
16396   /* Create a new JUMP_INSN that branches around a barrier.  */
16397   from = emit_jump_insn_after (gen_jump (label), selected);
16398   JUMP_LABEL (from) = label;
16399   barrier = emit_barrier_after (from);
16400   emit_label_after (label, barrier);
16401
16402   /* Create a minipool barrier entry for the new barrier.  */
16403   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16404   new_fix->insn = barrier;
16405   new_fix->address = selected_address;
16406   new_fix->next = fix->next;
16407   fix->next = new_fix;
16408
16409   return new_fix;
16410 }
16411
16412 /* Record that there is a natural barrier in the insn stream at
16413    ADDRESS.  */
16414 static void
16415 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16416 {
16417   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16418
16419   fix->insn = insn;
16420   fix->address = address;
16421
16422   fix->next = NULL;
16423   if (minipool_fix_head != NULL)
16424     minipool_fix_tail->next = fix;
16425   else
16426     minipool_fix_head = fix;
16427
16428   minipool_fix_tail = fix;
16429 }
16430
16431 /* Record INSN, which will need fixing up to load a value from the
16432    minipool.  ADDRESS is the offset of the insn since the start of the
16433    function; LOC is a pointer to the part of the insn which requires
16434    fixing; VALUE is the constant that must be loaded, which is of type
16435    MODE.  */
16436 static void
16437 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16438                    machine_mode mode, rtx value)
16439 {
16440   gcc_assert (!arm_disable_literal_pool);
16441   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16442
16443   fix->insn = insn;
16444   fix->address = address;
16445   fix->loc = loc;
16446   fix->mode = mode;
16447   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16448   fix->value = value;
16449   fix->forwards = get_attr_pool_range (insn);
16450   fix->backwards = get_attr_neg_pool_range (insn);
16451   fix->minipool = NULL;
16452
16453   /* If an insn doesn't have a range defined for it, then it isn't
16454      expecting to be reworked by this code.  Better to stop now than
16455      to generate duff assembly code.  */
16456   gcc_assert (fix->forwards || fix->backwards);
16457
16458   /* If an entry requires 8-byte alignment then assume all constant pools
16459      require 4 bytes of padding.  Trying to do this later on a per-pool
16460      basis is awkward because existing pool entries have to be modified.  */
16461   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16462     minipool_pad = 4;
16463
16464   if (dump_file)
16465     {
16466       fprintf (dump_file,
16467                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16468                GET_MODE_NAME (mode),
16469                INSN_UID (insn), (unsigned long) address,
16470                -1 * (long)fix->backwards, (long)fix->forwards);
16471       arm_print_value (dump_file, fix->value);
16472       fprintf (dump_file, "\n");
16473     }
16474
16475   /* Add it to the chain of fixes.  */
16476   fix->next = NULL;
16477
16478   if (minipool_fix_head != NULL)
16479     minipool_fix_tail->next = fix;
16480   else
16481     minipool_fix_head = fix;
16482
16483   minipool_fix_tail = fix;
16484 }
16485
16486 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16487    Returns the number of insns needed, or 99 if we always want to synthesize
16488    the value.  */
16489 int
16490 arm_max_const_double_inline_cost ()
16491 {
16492   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16493 }
16494
16495 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16496    Returns the number of insns needed, or 99 if we don't know how to
16497    do it.  */
16498 int
16499 arm_const_double_inline_cost (rtx val)
16500 {
16501   rtx lowpart, highpart;
16502   machine_mode mode;
16503
16504   mode = GET_MODE (val);
16505
16506   if (mode == VOIDmode)
16507     mode = DImode;
16508
16509   gcc_assert (GET_MODE_SIZE (mode) == 8);
16510
16511   lowpart = gen_lowpart (SImode, val);
16512   highpart = gen_highpart_mode (SImode, mode, val);
16513
16514   gcc_assert (CONST_INT_P (lowpart));
16515   gcc_assert (CONST_INT_P (highpart));
16516
16517   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16518                             NULL_RTX, NULL_RTX, 0, 0)
16519           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16520                               NULL_RTX, NULL_RTX, 0, 0));
16521 }
16522
16523 /* Cost of loading a SImode constant.  */
16524 static inline int
16525 arm_const_inline_cost (enum rtx_code code, rtx val)
16526 {
16527   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16528                            NULL_RTX, NULL_RTX, 1, 0);
16529 }
16530
16531 /* Return true if it is worthwhile to split a 64-bit constant into two
16532    32-bit operations.  This is the case if optimizing for size, or
16533    if we have load delay slots, or if one 32-bit part can be done with
16534    a single data operation.  */
16535 bool
16536 arm_const_double_by_parts (rtx val)
16537 {
16538   machine_mode mode = GET_MODE (val);
16539   rtx part;
16540
16541   if (optimize_size || arm_ld_sched)
16542     return true;
16543
16544   if (mode == VOIDmode)
16545     mode = DImode;
16546
16547   part = gen_highpart_mode (SImode, mode, val);
16548
16549   gcc_assert (CONST_INT_P (part));
16550
16551   if (const_ok_for_arm (INTVAL (part))
16552       || const_ok_for_arm (~INTVAL (part)))
16553     return true;
16554
16555   part = gen_lowpart (SImode, val);
16556
16557   gcc_assert (CONST_INT_P (part));
16558
16559   if (const_ok_for_arm (INTVAL (part))
16560       || const_ok_for_arm (~INTVAL (part)))
16561     return true;
16562
16563   return false;
16564 }
16565
16566 /* Return true if it is possible to inline both the high and low parts
16567    of a 64-bit constant into 32-bit data processing instructions.  */
16568 bool
16569 arm_const_double_by_immediates (rtx val)
16570 {
16571   machine_mode mode = GET_MODE (val);
16572   rtx part;
16573
16574   if (mode == VOIDmode)
16575     mode = DImode;
16576
16577   part = gen_highpart_mode (SImode, mode, val);
16578
16579   gcc_assert (CONST_INT_P (part));
16580
16581   if (!const_ok_for_arm (INTVAL (part)))
16582     return false;
16583
16584   part = gen_lowpart (SImode, val);
16585
16586   gcc_assert (CONST_INT_P (part));
16587
16588   if (!const_ok_for_arm (INTVAL (part)))
16589     return false;
16590
16591   return true;
16592 }
16593
16594 /* Scan INSN and note any of its operands that need fixing.
16595    If DO_PUSHES is false we do not actually push any of the fixups
16596    needed.  */
16597 static void
16598 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16599 {
16600   int opno;
16601
16602   extract_constrain_insn (insn);
16603
16604   if (recog_data.n_alternatives == 0)
16605     return;
16606
16607   /* Fill in recog_op_alt with information about the constraints of
16608      this insn.  */
16609   preprocess_constraints (insn);
16610
16611   const operand_alternative *op_alt = which_op_alt ();
16612   for (opno = 0; opno < recog_data.n_operands; opno++)
16613     {
16614       /* Things we need to fix can only occur in inputs.  */
16615       if (recog_data.operand_type[opno] != OP_IN)
16616         continue;
16617
16618       /* If this alternative is a memory reference, then any mention
16619          of constants in this alternative is really to fool reload
16620          into allowing us to accept one there.  We need to fix them up
16621          now so that we output the right code.  */
16622       if (op_alt[opno].memory_ok)
16623         {
16624           rtx op = recog_data.operand[opno];
16625
16626           if (CONSTANT_P (op))
16627             {
16628               if (do_pushes)
16629                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16630                                    recog_data.operand_mode[opno], op);
16631             }
16632           else if (MEM_P (op)
16633                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16634                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16635             {
16636               if (do_pushes)
16637                 {
16638                   rtx cop = avoid_constant_pool_reference (op);
16639
16640                   /* Casting the address of something to a mode narrower
16641                      than a word can cause avoid_constant_pool_reference()
16642                      to return the pool reference itself.  That's no good to
16643                      us here.  Lets just hope that we can use the
16644                      constant pool value directly.  */
16645                   if (op == cop)
16646                     cop = get_pool_constant (XEXP (op, 0));
16647
16648                   push_minipool_fix (insn, address,
16649                                      recog_data.operand_loc[opno],
16650                                      recog_data.operand_mode[opno], cop);
16651                 }
16652
16653             }
16654         }
16655     }
16656
16657   return;
16658 }
16659
16660 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16661    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16662    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16663    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16664    or four masks, depending on whether it is being computed for a
16665    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16666    respectively.  The tree for the type of the argument or a field within an
16667    argument is passed in ARG_TYPE, the current register this argument or field
16668    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16669    argument or field starts at is passed in STARTING_BIT and the last used bit
16670    is kept in LAST_USED_BIT which is also updated accordingly.  */
16671
16672 static unsigned HOST_WIDE_INT
16673 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16674                                uint32_t * padding_bits_to_clear,
16675                                unsigned starting_bit, int * last_used_bit)
16676
16677 {
16678   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16679
16680   if (TREE_CODE (arg_type) == RECORD_TYPE)
16681     {
16682       unsigned current_bit = starting_bit;
16683       tree field;
16684       long int offset, size;
16685
16686
16687       field = TYPE_FIELDS (arg_type);
16688       while (field)
16689         {
16690           /* The offset within a structure is always an offset from
16691              the start of that structure.  Make sure we take that into the
16692              calculation of the register based offset that we use here.  */
16693           offset = starting_bit;
16694           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16695           offset %= 32;
16696
16697           /* This is the actual size of the field, for bitfields this is the
16698              bitfield width and not the container size.  */
16699           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16700
16701           if (*last_used_bit != offset)
16702             {
16703               if (offset < *last_used_bit)
16704                 {
16705                   /* This field's offset is before the 'last_used_bit', that
16706                      means this field goes on the next register.  So we need to
16707                      pad the rest of the current register and increase the
16708                      register number.  */
16709                   uint32_t mask;
16710                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16711                   mask++;
16712
16713                   padding_bits_to_clear[*regno] |= mask;
16714                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16715                   (*regno)++;
16716                 }
16717               else
16718                 {
16719                   /* Otherwise we pad the bits between the last field's end and
16720                      the start of the new field.  */
16721                   uint32_t mask;
16722
16723                   mask = ((uint32_t)-1) >> (32 - offset);
16724                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16725                   padding_bits_to_clear[*regno] |= mask;
16726                 }
16727               current_bit = offset;
16728             }
16729
16730           /* Calculate further padding bits for inner structs/unions too.  */
16731           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16732             {
16733               *last_used_bit = current_bit;
16734               not_to_clear_reg_mask
16735                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16736                                                   padding_bits_to_clear, offset,
16737                                                   last_used_bit);
16738             }
16739           else
16740             {
16741               /* Update 'current_bit' with this field's size.  If the
16742                  'current_bit' lies in a subsequent register, update 'regno' and
16743                  reset 'current_bit' to point to the current bit in that new
16744                  register.  */
16745               current_bit += size;
16746               while (current_bit >= 32)
16747                 {
16748                   current_bit-=32;
16749                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16750                   (*regno)++;
16751                 }
16752               *last_used_bit = current_bit;
16753             }
16754
16755           field = TREE_CHAIN (field);
16756         }
16757       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16758     }
16759   else if (TREE_CODE (arg_type) == UNION_TYPE)
16760     {
16761       tree field, field_t;
16762       int i, regno_t, field_size;
16763       int max_reg = -1;
16764       int max_bit = -1;
16765       uint32_t mask;
16766       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16767         = {-1, -1, -1, -1};
16768
16769       /* To compute the padding bits in a union we only consider bits as
16770          padding bits if they are always either a padding bit or fall outside a
16771          fields size for all fields in the union.  */
16772       field = TYPE_FIELDS (arg_type);
16773       while (field)
16774         {
16775           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16776             = {0U, 0U, 0U, 0U};
16777           int last_used_bit_t = *last_used_bit;
16778           regno_t = *regno;
16779           field_t = TREE_TYPE (field);
16780
16781           /* If the field's type is either a record or a union make sure to
16782              compute their padding bits too.  */
16783           if (RECORD_OR_UNION_TYPE_P (field_t))
16784             not_to_clear_reg_mask
16785               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16786                                                 &padding_bits_to_clear_t[0],
16787                                                 starting_bit, &last_used_bit_t);
16788           else
16789             {
16790               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16791               regno_t = (field_size / 32) + *regno;
16792               last_used_bit_t = (starting_bit + field_size) % 32;
16793             }
16794
16795           for (i = *regno; i < regno_t; i++)
16796             {
16797               /* For all but the last register used by this field only keep the
16798                  padding bits that were padding bits in this field.  */
16799               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16800             }
16801
16802             /* For the last register, keep all padding bits that were padding
16803                bits in this field and any padding bits that are still valid
16804                as padding bits but fall outside of this field's size.  */
16805             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16806             padding_bits_to_clear_res[regno_t]
16807               &= padding_bits_to_clear_t[regno_t] | mask;
16808
16809           /* Update the maximum size of the fields in terms of registers used
16810              ('max_reg') and the 'last_used_bit' in said register.  */
16811           if (max_reg < regno_t)
16812             {
16813               max_reg = regno_t;
16814               max_bit = last_used_bit_t;
16815             }
16816           else if (max_reg == regno_t && max_bit < last_used_bit_t)
16817             max_bit = last_used_bit_t;
16818
16819           field = TREE_CHAIN (field);
16820         }
16821
16822       /* Update the current padding_bits_to_clear using the intersection of the
16823          padding bits of all the fields.  */
16824       for (i=*regno; i < max_reg; i++)
16825         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16826
16827       /* Do not keep trailing padding bits, we do not know yet whether this
16828          is the end of the argument.  */
16829       mask = ((uint32_t) 1 << max_bit) - 1;
16830       padding_bits_to_clear[max_reg]
16831         |= padding_bits_to_clear_res[max_reg] & mask;
16832
16833       *regno = max_reg;
16834       *last_used_bit = max_bit;
16835     }
16836   else
16837     /* This function should only be used for structs and unions.  */
16838     gcc_unreachable ();
16839
16840   return not_to_clear_reg_mask;
16841 }
16842
16843 /* In the context of ARMv8-M Security Extensions, this function is used for both
16844    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16845    registers are used when returning or passing arguments, which is then
16846    returned as a mask.  It will also compute a mask to indicate padding/unused
16847    bits for each of these registers, and passes this through the
16848    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
16849    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16850    the starting register used to pass this argument or return value is passed
16851    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16852    for struct and union types.  */
16853
16854 static unsigned HOST_WIDE_INT
16855 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16856                              uint32_t * padding_bits_to_clear)
16857
16858 {
16859   int last_used_bit = 0;
16860   unsigned HOST_WIDE_INT not_to_clear_mask;
16861
16862   if (RECORD_OR_UNION_TYPE_P (arg_type))
16863     {
16864       not_to_clear_mask
16865         = comp_not_to_clear_mask_str_un (arg_type, &regno,
16866                                          padding_bits_to_clear, 0,
16867                                          &last_used_bit);
16868
16869
16870       /* If the 'last_used_bit' is not zero, that means we are still using a
16871          part of the last 'regno'.  In such cases we must clear the trailing
16872          bits.  Otherwise we are not using regno and we should mark it as to
16873          clear.  */
16874       if (last_used_bit != 0)
16875         padding_bits_to_clear[regno]
16876           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16877       else
16878         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16879     }
16880   else
16881     {
16882       not_to_clear_mask = 0;
16883       /* We are not dealing with structs nor unions.  So these arguments may be
16884          passed in floating point registers too.  In some cases a BLKmode is
16885          used when returning or passing arguments in multiple VFP registers.  */
16886       if (GET_MODE (arg_rtx) == BLKmode)
16887         {
16888           int i, arg_regs;
16889           rtx reg;
16890
16891           /* This should really only occur when dealing with the hard-float
16892              ABI.  */
16893           gcc_assert (TARGET_HARD_FLOAT_ABI);
16894
16895           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16896             {
16897               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16898               gcc_assert (REG_P (reg));
16899
16900               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16901
16902               /* If we are dealing with DF mode, make sure we don't
16903                  clear either of the registers it addresses.  */
16904               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16905               if (arg_regs > 1)
16906                 {
16907                   unsigned HOST_WIDE_INT mask;
16908                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16909                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
16910                   not_to_clear_mask |= mask;
16911                 }
16912             }
16913         }
16914       else
16915         {
16916           /* Otherwise we can rely on the MODE to determine how many registers
16917              are being used by this argument.  */
16918           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16919           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16920           if (arg_regs > 1)
16921             {
16922               unsigned HOST_WIDE_INT
16923               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16924               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16925               not_to_clear_mask |= mask;
16926             }
16927         }
16928     }
16929
16930   return not_to_clear_mask;
16931 }
16932
16933 /* Clears caller saved registers not used to pass arguments before a
16934    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
16935    registers is done in __gnu_cmse_nonsecure_call libcall.
16936    See libgcc/config/arm/cmse_nonsecure_call.S.  */
16937
16938 static void
16939 cmse_nonsecure_call_clear_caller_saved (void)
16940 {
16941   basic_block bb;
16942
16943   FOR_EACH_BB_FN (bb, cfun)
16944     {
16945       rtx_insn *insn;
16946
16947       FOR_BB_INSNS (bb, insn)
16948         {
16949           uint64_t to_clear_mask, float_mask;
16950           rtx_insn *seq;
16951           rtx pat, call, unspec, reg, cleared_reg, tmp;
16952           unsigned int regno, maxregno;
16953           rtx address;
16954           CUMULATIVE_ARGS args_so_far_v;
16955           cumulative_args_t args_so_far;
16956           tree arg_type, fntype;
16957           bool using_r4, first_param = true;
16958           function_args_iterator args_iter;
16959           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16960           uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16961
16962           if (!NONDEBUG_INSN_P (insn))
16963             continue;
16964
16965           if (!CALL_P (insn))
16966             continue;
16967
16968           pat = PATTERN (insn);
16969           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16970           call = XVECEXP (pat, 0, 0);
16971
16972           /* Get the real call RTX if the insn sets a value, ie. returns.  */
16973           if (GET_CODE (call) == SET)
16974               call = SET_SRC (call);
16975
16976           /* Check if it is a cmse_nonsecure_call.  */
16977           unspec = XEXP (call, 0);
16978           if (GET_CODE (unspec) != UNSPEC
16979               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16980             continue;
16981
16982           /* Determine the caller-saved registers we need to clear.  */
16983           to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16984           maxregno = NUM_ARG_REGS - 1;
16985           /* Only look at the caller-saved floating point registers in case of
16986              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
16987              lazy store and loads which clear both caller- and callee-saved
16988              registers.  */
16989           if (TARGET_HARD_FLOAT_ABI)
16990             {
16991               float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16992               float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16993               to_clear_mask |= float_mask;
16994               maxregno = D7_VFP_REGNUM;
16995             }
16996
16997           /* Make sure the register used to hold the function address is not
16998              cleared.  */
16999           address = RTVEC_ELT (XVEC (unspec, 0), 0);
17000           gcc_assert (MEM_P (address));
17001           gcc_assert (REG_P (XEXP (address, 0)));
17002           to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17003
17004           /* Set basic block of call insn so that df rescan is performed on
17005              insns inserted here.  */
17006           set_block_for_insn (insn, bb);
17007           df_set_flags (DF_DEFER_INSN_RESCAN);
17008           start_sequence ();
17009
17010           /* Make sure the scheduler doesn't schedule other insns beyond
17011              here.  */
17012           emit_insn (gen_blockage ());
17013
17014           /* Walk through all arguments and clear registers appropriately.
17015           */
17016           fntype = TREE_TYPE (MEM_EXPR (address));
17017           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17018                                     NULL_TREE);
17019           args_so_far = pack_cumulative_args (&args_so_far_v);
17020           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17021             {
17022               rtx arg_rtx;
17023               machine_mode arg_mode = TYPE_MODE (arg_type);
17024
17025               if (VOID_TYPE_P (arg_type))
17026                 continue;
17027
17028               if (!first_param)
17029                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17030                                           true);
17031
17032               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17033                                           true);
17034               gcc_assert (REG_P (arg_rtx));
17035               to_clear_mask
17036                 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17037                                                REGNO (arg_rtx),
17038                                                padding_bits_to_clear_ptr);
17039
17040               first_param = false;
17041             }
17042
17043           /* Clear padding bits where needed.  */
17044           cleared_reg = XEXP (address, 0);
17045           reg = gen_rtx_REG (SImode, IP_REGNUM);
17046           using_r4 = false;
17047           for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17048             {
17049               if (padding_bits_to_clear[regno] == 0)
17050                 continue;
17051
17052               /* If this is a Thumb-1 target copy the address of the function
17053                  we are calling from 'r4' into 'ip' such that we can use r4 to
17054                  clear the unused bits in the arguments.  */
17055               if (TARGET_THUMB1 && !using_r4)
17056                 {
17057                   using_r4 =  true;
17058                   reg = cleared_reg;
17059                   emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17060                                           reg);
17061                 }
17062
17063               tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17064               emit_move_insn (reg, tmp);
17065               /* Also fill the top half of the negated
17066                  padding_bits_to_clear.  */
17067               if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17068                 {
17069                   tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17070                   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17071                                                                 GEN_INT (16),
17072                                                                 GEN_INT (16)),
17073                                           tmp));
17074                 }
17075
17076               emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17077                                      gen_rtx_REG (SImode, regno),
17078                                      reg));
17079
17080             }
17081           if (using_r4)
17082             emit_move_insn (cleared_reg,
17083                             gen_rtx_REG (SImode, IP_REGNUM));
17084
17085           /* We use right shift and left shift to clear the LSB of the address
17086              we jump to instead of using bic, to avoid having to use an extra
17087              register on Thumb-1.  */
17088           tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17089           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17090           tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17091           emit_insn (gen_rtx_SET (cleared_reg, tmp));
17092
17093           /* Clearing all registers that leak before doing a non-secure
17094              call.  */
17095           for (regno = R0_REGNUM; regno <= maxregno; regno++)
17096             {
17097               if (!(to_clear_mask & (1LL << regno)))
17098                 continue;
17099
17100               /* If regno is an even vfp register and its successor is also to
17101                  be cleared, use vmov.  */
17102               if (IS_VFP_REGNUM (regno))
17103                 {
17104                   if (TARGET_VFP_DOUBLE
17105                       && VFP_REGNO_OK_FOR_DOUBLE (regno)
17106                       && to_clear_mask & (1LL << (regno + 1)))
17107                     emit_move_insn (gen_rtx_REG (DFmode, regno++),
17108                                     CONST0_RTX (DFmode));
17109                   else
17110                     emit_move_insn (gen_rtx_REG (SFmode, regno),
17111                                     CONST0_RTX (SFmode));
17112                 }
17113               else
17114                 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17115             }
17116
17117           seq = get_insns ();
17118           end_sequence ();
17119           emit_insn_before (seq, insn);
17120
17121         }
17122     }
17123 }
17124
17125 /* Rewrite move insn into subtract of 0 if the condition codes will
17126    be useful in next conditional jump insn.  */
17127
17128 static void
17129 thumb1_reorg (void)
17130 {
17131   basic_block bb;
17132
17133   FOR_EACH_BB_FN (bb, cfun)
17134     {
17135       rtx dest, src;
17136       rtx cmp, op0, op1, set = NULL;
17137       rtx_insn *prev, *insn = BB_END (bb);
17138       bool insn_clobbered = false;
17139
17140       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17141         insn = PREV_INSN (insn);
17142
17143       /* Find the last cbranchsi4_insn in basic block BB.  */
17144       if (insn == BB_HEAD (bb)
17145           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17146         continue;
17147
17148       /* Get the register with which we are comparing.  */
17149       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17150       op0 = XEXP (cmp, 0);
17151       op1 = XEXP (cmp, 1);
17152
17153       /* Check that comparison is against ZERO.  */
17154       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17155         continue;
17156
17157       /* Find the first flag setting insn before INSN in basic block BB.  */
17158       gcc_assert (insn != BB_HEAD (bb));
17159       for (prev = PREV_INSN (insn);
17160            (!insn_clobbered
17161             && prev != BB_HEAD (bb)
17162             && (NOTE_P (prev)
17163                 || DEBUG_INSN_P (prev)
17164                 || ((set = single_set (prev)) != NULL
17165                     && get_attr_conds (prev) == CONDS_NOCOND)));
17166            prev = PREV_INSN (prev))
17167         {
17168           if (reg_set_p (op0, prev))
17169             insn_clobbered = true;
17170         }
17171
17172       /* Skip if op0 is clobbered by insn other than prev. */
17173       if (insn_clobbered)
17174         continue;
17175
17176       if (!set)
17177         continue;
17178
17179       dest = SET_DEST (set);
17180       src = SET_SRC (set);
17181       if (!low_register_operand (dest, SImode)
17182           || !low_register_operand (src, SImode))
17183         continue;
17184
17185       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17186          in INSN.  Both src and dest of the move insn are checked.  */
17187       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17188         {
17189           dest = copy_rtx (dest);
17190           src = copy_rtx (src);
17191           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17192           PATTERN (prev) = gen_rtx_SET (dest, src);
17193           INSN_CODE (prev) = -1;
17194           /* Set test register in INSN to dest.  */
17195           XEXP (cmp, 0) = copy_rtx (dest);
17196           INSN_CODE (insn) = -1;
17197         }
17198     }
17199 }
17200
17201 /* Convert instructions to their cc-clobbering variant if possible, since
17202    that allows us to use smaller encodings.  */
17203
17204 static void
17205 thumb2_reorg (void)
17206 {
17207   basic_block bb;
17208   regset_head live;
17209
17210   INIT_REG_SET (&live);
17211
17212   /* We are freeing block_for_insn in the toplev to keep compatibility
17213      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17214   compute_bb_for_insn ();
17215   df_analyze ();
17216
17217   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17218
17219   FOR_EACH_BB_FN (bb, cfun)
17220     {
17221       if ((current_tune->disparage_flag_setting_t16_encodings
17222            == tune_params::DISPARAGE_FLAGS_ALL)
17223           && optimize_bb_for_speed_p (bb))
17224         continue;
17225
17226       rtx_insn *insn;
17227       Convert_Action action = SKIP;
17228       Convert_Action action_for_partial_flag_setting
17229         = ((current_tune->disparage_flag_setting_t16_encodings
17230             != tune_params::DISPARAGE_FLAGS_NEITHER)
17231            && optimize_bb_for_speed_p (bb))
17232           ? SKIP : CONV;
17233
17234       COPY_REG_SET (&live, DF_LR_OUT (bb));
17235       df_simulate_initialize_backwards (bb, &live);
17236       FOR_BB_INSNS_REVERSE (bb, insn)
17237         {
17238           if (NONJUMP_INSN_P (insn)
17239               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17240               && GET_CODE (PATTERN (insn)) == SET)
17241             {
17242               action = SKIP;
17243               rtx pat = PATTERN (insn);
17244               rtx dst = XEXP (pat, 0);
17245               rtx src = XEXP (pat, 1);
17246               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17247
17248               if (UNARY_P (src) || BINARY_P (src))
17249                   op0 = XEXP (src, 0);
17250
17251               if (BINARY_P (src))
17252                   op1 = XEXP (src, 1);
17253
17254               if (low_register_operand (dst, SImode))
17255                 {
17256                   switch (GET_CODE (src))
17257                     {
17258                     case PLUS:
17259                       /* Adding two registers and storing the result
17260                          in the first source is already a 16-bit
17261                          operation.  */
17262                       if (rtx_equal_p (dst, op0)
17263                           && register_operand (op1, SImode))
17264                         break;
17265
17266                       if (low_register_operand (op0, SImode))
17267                         {
17268                           /* ADDS <Rd>,<Rn>,<Rm>  */
17269                           if (low_register_operand (op1, SImode))
17270                             action = CONV;
17271                           /* ADDS <Rdn>,#<imm8>  */
17272                           /* SUBS <Rdn>,#<imm8>  */
17273                           else if (rtx_equal_p (dst, op0)
17274                                    && CONST_INT_P (op1)
17275                                    && IN_RANGE (INTVAL (op1), -255, 255))
17276                             action = CONV;
17277                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17278                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17279                           else if (CONST_INT_P (op1)
17280                                    && IN_RANGE (INTVAL (op1), -7, 7))
17281                             action = CONV;
17282                         }
17283                       /* ADCS <Rd>, <Rn>  */
17284                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17285                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17286                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17287                                                        SImode)
17288                               && COMPARISON_P (op1)
17289                               && cc_register (XEXP (op1, 0), VOIDmode)
17290                               && maybe_get_arm_condition_code (op1) == ARM_CS
17291                               && XEXP (op1, 1) == const0_rtx)
17292                         action = CONV;
17293                       break;
17294
17295                     case MINUS:
17296                       /* RSBS <Rd>,<Rn>,#0
17297                          Not handled here: see NEG below.  */
17298                       /* SUBS <Rd>,<Rn>,#<imm3>
17299                          SUBS <Rdn>,#<imm8>
17300                          Not handled here: see PLUS above.  */
17301                       /* SUBS <Rd>,<Rn>,<Rm>  */
17302                       if (low_register_operand (op0, SImode)
17303                           && low_register_operand (op1, SImode))
17304                             action = CONV;
17305                       break;
17306
17307                     case MULT:
17308                       /* MULS <Rdm>,<Rn>,<Rdm>
17309                          As an exception to the rule, this is only used
17310                          when optimizing for size since MULS is slow on all
17311                          known implementations.  We do not even want to use
17312                          MULS in cold code, if optimizing for speed, so we
17313                          test the global flag here.  */
17314                       if (!optimize_size)
17315                         break;
17316                       /* Fall through.  */
17317                     case AND:
17318                     case IOR:
17319                     case XOR:
17320                       /* ANDS <Rdn>,<Rm>  */
17321                       if (rtx_equal_p (dst, op0)
17322                           && low_register_operand (op1, SImode))
17323                         action = action_for_partial_flag_setting;
17324                       else if (rtx_equal_p (dst, op1)
17325                                && low_register_operand (op0, SImode))
17326                         action = action_for_partial_flag_setting == SKIP
17327                                  ? SKIP : SWAP_CONV;
17328                       break;
17329
17330                     case ASHIFTRT:
17331                     case ASHIFT:
17332                     case LSHIFTRT:
17333                       /* ASRS <Rdn>,<Rm> */
17334                       /* LSRS <Rdn>,<Rm> */
17335                       /* LSLS <Rdn>,<Rm> */
17336                       if (rtx_equal_p (dst, op0)
17337                           && low_register_operand (op1, SImode))
17338                         action = action_for_partial_flag_setting;
17339                       /* ASRS <Rd>,<Rm>,#<imm5> */
17340                       /* LSRS <Rd>,<Rm>,#<imm5> */
17341                       /* LSLS <Rd>,<Rm>,#<imm5> */
17342                       else if (low_register_operand (op0, SImode)
17343                                && CONST_INT_P (op1)
17344                                && IN_RANGE (INTVAL (op1), 0, 31))
17345                         action = action_for_partial_flag_setting;
17346                       break;
17347
17348                     case ROTATERT:
17349                       /* RORS <Rdn>,<Rm>  */
17350                       if (rtx_equal_p (dst, op0)
17351                           && low_register_operand (op1, SImode))
17352                         action = action_for_partial_flag_setting;
17353                       break;
17354
17355                     case NOT:
17356                       /* MVNS <Rd>,<Rm>  */
17357                       if (low_register_operand (op0, SImode))
17358                         action = action_for_partial_flag_setting;
17359                       break;
17360
17361                     case NEG:
17362                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17363                       if (low_register_operand (op0, SImode))
17364                         action = CONV;
17365                       break;
17366
17367                     case CONST_INT:
17368                       /* MOVS <Rd>,#<imm8>  */
17369                       if (CONST_INT_P (src)
17370                           && IN_RANGE (INTVAL (src), 0, 255))
17371                         action = action_for_partial_flag_setting;
17372                       break;
17373
17374                     case REG:
17375                       /* MOVS and MOV<c> with registers have different
17376                          encodings, so are not relevant here.  */
17377                       break;
17378
17379                     default:
17380                       break;
17381                     }
17382                 }
17383
17384               if (action != SKIP)
17385                 {
17386                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17387                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17388                   rtvec vec;
17389
17390                   if (action == SWAP_CONV)
17391                     {
17392                       src = copy_rtx (src);
17393                       XEXP (src, 0) = op1;
17394                       XEXP (src, 1) = op0;
17395                       pat = gen_rtx_SET (dst, src);
17396                       vec = gen_rtvec (2, pat, clobber);
17397                     }
17398                   else /* action == CONV */
17399                     vec = gen_rtvec (2, pat, clobber);
17400
17401                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17402                   INSN_CODE (insn) = -1;
17403                 }
17404             }
17405
17406           if (NONDEBUG_INSN_P (insn))
17407             df_simulate_one_insn_backwards (bb, insn, &live);
17408         }
17409     }
17410
17411   CLEAR_REG_SET (&live);
17412 }
17413
17414 /* Gcc puts the pool in the wrong place for ARM, since we can only
17415    load addresses a limited distance around the pc.  We do some
17416    special munging to move the constant pool values to the correct
17417    point in the code.  */
17418 static void
17419 arm_reorg (void)
17420 {
17421   rtx_insn *insn;
17422   HOST_WIDE_INT address = 0;
17423   Mfix * fix;
17424
17425   if (use_cmse)
17426     cmse_nonsecure_call_clear_caller_saved ();
17427   if (TARGET_THUMB1)
17428     thumb1_reorg ();
17429   else if (TARGET_THUMB2)
17430     thumb2_reorg ();
17431
17432   /* Ensure all insns that must be split have been split at this point.
17433      Otherwise, the pool placement code below may compute incorrect
17434      insn lengths.  Note that when optimizing, all insns have already
17435      been split at this point.  */
17436   if (!optimize)
17437     split_all_insns_noflow ();
17438
17439   /* Make sure we do not attempt to create a literal pool even though it should
17440      no longer be necessary to create any.  */
17441   if (arm_disable_literal_pool)
17442     return ;
17443
17444   minipool_fix_head = minipool_fix_tail = NULL;
17445
17446   /* The first insn must always be a note, or the code below won't
17447      scan it properly.  */
17448   insn = get_insns ();
17449   gcc_assert (NOTE_P (insn));
17450   minipool_pad = 0;
17451
17452   /* Scan all the insns and record the operands that will need fixing.  */
17453   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17454     {
17455       if (BARRIER_P (insn))
17456         push_minipool_barrier (insn, address);
17457       else if (INSN_P (insn))
17458         {
17459           rtx_jump_table_data *table;
17460
17461           note_invalid_constants (insn, address, true);
17462           address += get_attr_length (insn);
17463
17464           /* If the insn is a vector jump, add the size of the table
17465              and skip the table.  */
17466           if (tablejump_p (insn, NULL, &table))
17467             {
17468               address += get_jump_table_size (table);
17469               insn = table;
17470             }
17471         }
17472       else if (LABEL_P (insn))
17473         /* Add the worst-case padding due to alignment.  We don't add
17474            the _current_ padding because the minipool insertions
17475            themselves might change it.  */
17476         address += get_label_padding (insn);
17477     }
17478
17479   fix = minipool_fix_head;
17480
17481   /* Now scan the fixups and perform the required changes.  */
17482   while (fix)
17483     {
17484       Mfix * ftmp;
17485       Mfix * fdel;
17486       Mfix *  last_added_fix;
17487       Mfix * last_barrier = NULL;
17488       Mfix * this_fix;
17489
17490       /* Skip any further barriers before the next fix.  */
17491       while (fix && BARRIER_P (fix->insn))
17492         fix = fix->next;
17493
17494       /* No more fixes.  */
17495       if (fix == NULL)
17496         break;
17497
17498       last_added_fix = NULL;
17499
17500       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17501         {
17502           if (BARRIER_P (ftmp->insn))
17503             {
17504               if (ftmp->address >= minipool_vector_head->max_address)
17505                 break;
17506
17507               last_barrier = ftmp;
17508             }
17509           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17510             break;
17511
17512           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17513         }
17514
17515       /* If we found a barrier, drop back to that; any fixes that we
17516          could have reached but come after the barrier will now go in
17517          the next mini-pool.  */
17518       if (last_barrier != NULL)
17519         {
17520           /* Reduce the refcount for those fixes that won't go into this
17521              pool after all.  */
17522           for (fdel = last_barrier->next;
17523                fdel && fdel != ftmp;
17524                fdel = fdel->next)
17525             {
17526               fdel->minipool->refcount--;
17527               fdel->minipool = NULL;
17528             }
17529
17530           ftmp = last_barrier;
17531         }
17532       else
17533         {
17534           /* ftmp is first fix that we can't fit into this pool and
17535              there no natural barriers that we could use.  Insert a
17536              new barrier in the code somewhere between the previous
17537              fix and this one, and arrange to jump around it.  */
17538           HOST_WIDE_INT max_address;
17539
17540           /* The last item on the list of fixes must be a barrier, so
17541              we can never run off the end of the list of fixes without
17542              last_barrier being set.  */
17543           gcc_assert (ftmp);
17544
17545           max_address = minipool_vector_head->max_address;
17546           /* Check that there isn't another fix that is in range that
17547              we couldn't fit into this pool because the pool was
17548              already too large: we need to put the pool before such an
17549              instruction.  The pool itself may come just after the
17550              fix because create_fix_barrier also allows space for a
17551              jump instruction.  */
17552           if (ftmp->address < max_address)
17553             max_address = ftmp->address + 1;
17554
17555           last_barrier = create_fix_barrier (last_added_fix, max_address);
17556         }
17557
17558       assign_minipool_offsets (last_barrier);
17559
17560       while (ftmp)
17561         {
17562           if (!BARRIER_P (ftmp->insn)
17563               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17564                   == NULL))
17565             break;
17566
17567           ftmp = ftmp->next;
17568         }
17569
17570       /* Scan over the fixes we have identified for this pool, fixing them
17571          up and adding the constants to the pool itself.  */
17572       for (this_fix = fix; this_fix && ftmp != this_fix;
17573            this_fix = this_fix->next)
17574         if (!BARRIER_P (this_fix->insn))
17575           {
17576             rtx addr
17577               = plus_constant (Pmode,
17578                                gen_rtx_LABEL_REF (VOIDmode,
17579                                                   minipool_vector_label),
17580                                this_fix->minipool->offset);
17581             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17582           }
17583
17584       dump_minipool (last_barrier->insn);
17585       fix = ftmp;
17586     }
17587
17588   /* From now on we must synthesize any constants that we can't handle
17589      directly.  This can happen if the RTL gets split during final
17590      instruction generation.  */
17591   cfun->machine->after_arm_reorg = 1;
17592
17593   /* Free the minipool memory.  */
17594   obstack_free (&minipool_obstack, minipool_startobj);
17595 }
17596 \f
17597 /* Routines to output assembly language.  */
17598
17599 /* Return string representation of passed in real value.  */
17600 static const char *
17601 fp_const_from_val (REAL_VALUE_TYPE *r)
17602 {
17603   if (!fp_consts_inited)
17604     init_fp_table ();
17605
17606   gcc_assert (real_equal (r, &value_fp0));
17607   return "0";
17608 }
17609
17610 /* OPERANDS[0] is the entire list of insns that constitute pop,
17611    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17612    is in the list, UPDATE is true iff the list contains explicit
17613    update of base register.  */
17614 void
17615 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17616                          bool update)
17617 {
17618   int i;
17619   char pattern[100];
17620   int offset;
17621   const char *conditional;
17622   int num_saves = XVECLEN (operands[0], 0);
17623   unsigned int regno;
17624   unsigned int regno_base = REGNO (operands[1]);
17625   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17626
17627   offset = 0;
17628   offset += update ? 1 : 0;
17629   offset += return_pc ? 1 : 0;
17630
17631   /* Is the base register in the list?  */
17632   for (i = offset; i < num_saves; i++)
17633     {
17634       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17635       /* If SP is in the list, then the base register must be SP.  */
17636       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17637       /* If base register is in the list, there must be no explicit update.  */
17638       if (regno == regno_base)
17639         gcc_assert (!update);
17640     }
17641
17642   conditional = reverse ? "%?%D0" : "%?%d0";
17643   /* Can't use POP if returning from an interrupt.  */
17644   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17645     sprintf (pattern, "pop%s\t{", conditional);
17646   else
17647     {
17648       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17649          It's just a convention, their semantics are identical.  */
17650       if (regno_base == SP_REGNUM)
17651         sprintf (pattern, "ldmfd%s\t", conditional);
17652       else if (update)
17653         sprintf (pattern, "ldmia%s\t", conditional);
17654       else
17655         sprintf (pattern, "ldm%s\t", conditional);
17656
17657       strcat (pattern, reg_names[regno_base]);
17658       if (update)
17659         strcat (pattern, "!, {");
17660       else
17661         strcat (pattern, ", {");
17662     }
17663
17664   /* Output the first destination register.  */
17665   strcat (pattern,
17666           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17667
17668   /* Output the rest of the destination registers.  */
17669   for (i = offset + 1; i < num_saves; i++)
17670     {
17671       strcat (pattern, ", ");
17672       strcat (pattern,
17673               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17674     }
17675
17676   strcat (pattern, "}");
17677
17678   if (interrupt_p && return_pc)
17679     strcat (pattern, "^");
17680
17681   output_asm_insn (pattern, &cond);
17682 }
17683
17684
17685 /* Output the assembly for a store multiple.  */
17686
17687 const char *
17688 vfp_output_vstmd (rtx * operands)
17689 {
17690   char pattern[100];
17691   int p;
17692   int base;
17693   int i;
17694   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17695                    ? XEXP (operands[0], 0)
17696                    : XEXP (XEXP (operands[0], 0), 0);
17697   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17698
17699   if (push_p)
17700     strcpy (pattern, "vpush%?.64\t{%P1");
17701   else
17702     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17703
17704   p = strlen (pattern);
17705
17706   gcc_assert (REG_P (operands[1]));
17707
17708   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17709   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17710     {
17711       p += sprintf (&pattern[p], ", d%d", base + i);
17712     }
17713   strcpy (&pattern[p], "}");
17714
17715   output_asm_insn (pattern, operands);
17716   return "";
17717 }
17718
17719
17720 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17721    number of bytes pushed.  */
17722
17723 static int
17724 vfp_emit_fstmd (int base_reg, int count)
17725 {
17726   rtx par;
17727   rtx dwarf;
17728   rtx tmp, reg;
17729   int i;
17730
17731   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17732      register pairs are stored by a store multiple insn.  We avoid this
17733      by pushing an extra pair.  */
17734   if (count == 2 && !arm_arch6)
17735     {
17736       if (base_reg == LAST_VFP_REGNUM - 3)
17737         base_reg -= 2;
17738       count++;
17739     }
17740
17741   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17742      larger stores into multiple parts (up to a maximum of two, in
17743      practice).  */
17744   if (count > 16)
17745     {
17746       int saved;
17747       /* NOTE: base_reg is an internal register number, so each D register
17748          counts as 2.  */
17749       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17750       saved += vfp_emit_fstmd (base_reg, 16);
17751       return saved;
17752     }
17753
17754   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17755   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17756
17757   reg = gen_rtx_REG (DFmode, base_reg);
17758   base_reg += 2;
17759
17760   XVECEXP (par, 0, 0)
17761     = gen_rtx_SET (gen_frame_mem
17762                    (BLKmode,
17763                     gen_rtx_PRE_MODIFY (Pmode,
17764                                         stack_pointer_rtx,
17765                                         plus_constant
17766                                         (Pmode, stack_pointer_rtx,
17767                                          - (count * 8)))
17768                     ),
17769                    gen_rtx_UNSPEC (BLKmode,
17770                                    gen_rtvec (1, reg),
17771                                    UNSPEC_PUSH_MULT));
17772
17773   tmp = gen_rtx_SET (stack_pointer_rtx,
17774                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17775   RTX_FRAME_RELATED_P (tmp) = 1;
17776   XVECEXP (dwarf, 0, 0) = tmp;
17777
17778   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17779   RTX_FRAME_RELATED_P (tmp) = 1;
17780   XVECEXP (dwarf, 0, 1) = tmp;
17781
17782   for (i = 1; i < count; i++)
17783     {
17784       reg = gen_rtx_REG (DFmode, base_reg);
17785       base_reg += 2;
17786       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17787
17788       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17789                                         plus_constant (Pmode,
17790                                                        stack_pointer_rtx,
17791                                                        i * 8)),
17792                          reg);
17793       RTX_FRAME_RELATED_P (tmp) = 1;
17794       XVECEXP (dwarf, 0, i + 1) = tmp;
17795     }
17796
17797   par = emit_insn (par);
17798   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17799   RTX_FRAME_RELATED_P (par) = 1;
17800
17801   return count * 8;
17802 }
17803
17804 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17805    has the cmse_nonsecure_call attribute and returns false otherwise.  */
17806
17807 bool
17808 detect_cmse_nonsecure_call (tree addr)
17809 {
17810   if (!addr)
17811     return FALSE;
17812
17813   tree fntype = TREE_TYPE (addr);
17814   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17815                                     TYPE_ATTRIBUTES (fntype)))
17816     return TRUE;
17817   return FALSE;
17818 }
17819
17820
17821 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17822    the call target.  */
17823
17824 void
17825 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17826 {
17827   rtx insn;
17828
17829   insn = emit_call_insn (pat);
17830
17831   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17832      If the call might use such an entry, add a use of the PIC register
17833      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17834   if (TARGET_VXWORKS_RTP
17835       && flag_pic
17836       && !sibcall
17837       && GET_CODE (addr) == SYMBOL_REF
17838       && (SYMBOL_REF_DECL (addr)
17839           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17840           : !SYMBOL_REF_LOCAL_P (addr)))
17841     {
17842       require_pic_register ();
17843       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17844     }
17845
17846   if (TARGET_AAPCS_BASED)
17847     {
17848       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17849          linker.  We need to add an IP clobber to allow setting
17850          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17851          is not needed since it's a fixed register.  */
17852       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17853       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17854     }
17855 }
17856
17857 /* Output a 'call' insn.  */
17858 const char *
17859 output_call (rtx *operands)
17860 {
17861   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17862
17863   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17864   if (REGNO (operands[0]) == LR_REGNUM)
17865     {
17866       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17867       output_asm_insn ("mov%?\t%0, %|lr", operands);
17868     }
17869
17870   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17871
17872   if (TARGET_INTERWORK || arm_arch4t)
17873     output_asm_insn ("bx%?\t%0", operands);
17874   else
17875     output_asm_insn ("mov%?\t%|pc, %0", operands);
17876
17877   return "";
17878 }
17879
17880 /* Output a move from arm registers to arm registers of a long double
17881    OPERANDS[0] is the destination.
17882    OPERANDS[1] is the source.  */
17883 const char *
17884 output_mov_long_double_arm_from_arm (rtx *operands)
17885 {
17886   /* We have to be careful here because the two might overlap.  */
17887   int dest_start = REGNO (operands[0]);
17888   int src_start = REGNO (operands[1]);
17889   rtx ops[2];
17890   int i;
17891
17892   if (dest_start < src_start)
17893     {
17894       for (i = 0; i < 3; i++)
17895         {
17896           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17897           ops[1] = gen_rtx_REG (SImode, src_start + i);
17898           output_asm_insn ("mov%?\t%0, %1", ops);
17899         }
17900     }
17901   else
17902     {
17903       for (i = 2; i >= 0; i--)
17904         {
17905           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17906           ops[1] = gen_rtx_REG (SImode, src_start + i);
17907           output_asm_insn ("mov%?\t%0, %1", ops);
17908         }
17909     }
17910
17911   return "";
17912 }
17913
17914 void
17915 arm_emit_movpair (rtx dest, rtx src)
17916  {
17917   /* If the src is an immediate, simplify it.  */
17918   if (CONST_INT_P (src))
17919     {
17920       HOST_WIDE_INT val = INTVAL (src);
17921       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17922       if ((val >> 16) & 0x0000ffff)
17923         {
17924           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17925                                                GEN_INT (16)),
17926                          GEN_INT ((val >> 16) & 0x0000ffff));
17927           rtx_insn *insn = get_last_insn ();
17928           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17929         }
17930       return;
17931     }
17932    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17933    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17934    rtx_insn *insn = get_last_insn ();
17935    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17936  }
17937
17938 /* Output a move between double words.  It must be REG<-MEM
17939    or MEM<-REG.  */
17940 const char *
17941 output_move_double (rtx *operands, bool emit, int *count)
17942 {
17943   enum rtx_code code0 = GET_CODE (operands[0]);
17944   enum rtx_code code1 = GET_CODE (operands[1]);
17945   rtx otherops[3];
17946   if (count)
17947     *count = 1;
17948
17949   /* The only case when this might happen is when
17950      you are looking at the length of a DImode instruction
17951      that has an invalid constant in it.  */
17952   if (code0 == REG && code1 != MEM)
17953     {
17954       gcc_assert (!emit);
17955       *count = 2;
17956       return "";
17957     }
17958
17959   if (code0 == REG)
17960     {
17961       unsigned int reg0 = REGNO (operands[0]);
17962
17963       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17964
17965       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17966
17967       switch (GET_CODE (XEXP (operands[1], 0)))
17968         {
17969         case REG:
17970
17971           if (emit)
17972             {
17973               if (TARGET_LDRD
17974                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17975                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17976               else
17977                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17978             }
17979           break;
17980
17981         case PRE_INC:
17982           gcc_assert (TARGET_LDRD);
17983           if (emit)
17984             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17985           break;
17986
17987         case PRE_DEC:
17988           if (emit)
17989             {
17990               if (TARGET_LDRD)
17991                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17992               else
17993                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17994             }
17995           break;
17996
17997         case POST_INC:
17998           if (emit)
17999             {
18000               if (TARGET_LDRD)
18001                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18002               else
18003                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18004             }
18005           break;
18006
18007         case POST_DEC:
18008           gcc_assert (TARGET_LDRD);
18009           if (emit)
18010             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18011           break;
18012
18013         case PRE_MODIFY:
18014         case POST_MODIFY:
18015           /* Autoicrement addressing modes should never have overlapping
18016              base and destination registers, and overlapping index registers
18017              are already prohibited, so this doesn't need to worry about
18018              fix_cm3_ldrd.  */
18019           otherops[0] = operands[0];
18020           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18021           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18022
18023           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18024             {
18025               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18026                 {
18027                   /* Registers overlap so split out the increment.  */
18028                   if (emit)
18029                     {
18030                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18031                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18032                     }
18033                   if (count)
18034                     *count = 2;
18035                 }
18036               else
18037                 {
18038                   /* Use a single insn if we can.
18039                      FIXME: IWMMXT allows offsets larger than ldrd can
18040                      handle, fix these up with a pair of ldr.  */
18041                   if (TARGET_THUMB2
18042                       || !CONST_INT_P (otherops[2])
18043                       || (INTVAL (otherops[2]) > -256
18044                           && INTVAL (otherops[2]) < 256))
18045                     {
18046                       if (emit)
18047                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18048                     }
18049                   else
18050                     {
18051                       if (emit)
18052                         {
18053                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18054                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18055                         }
18056                       if (count)
18057                         *count = 2;
18058
18059                     }
18060                 }
18061             }
18062           else
18063             {
18064               /* Use a single insn if we can.
18065                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18066                  fix these up with a pair of ldr.  */
18067               if (TARGET_THUMB2
18068                   || !CONST_INT_P (otherops[2])
18069                   || (INTVAL (otherops[2]) > -256
18070                       && INTVAL (otherops[2]) < 256))
18071                 {
18072                   if (emit)
18073                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18074                 }
18075               else
18076                 {
18077                   if (emit)
18078                     {
18079                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18080                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18081                     }
18082                   if (count)
18083                     *count = 2;
18084                 }
18085             }
18086           break;
18087
18088         case LABEL_REF:
18089         case CONST:
18090           /* We might be able to use ldrd %0, %1 here.  However the range is
18091              different to ldr/adr, and it is broken on some ARMv7-M
18092              implementations.  */
18093           /* Use the second register of the pair to avoid problematic
18094              overlap.  */
18095           otherops[1] = operands[1];
18096           if (emit)
18097             output_asm_insn ("adr%?\t%0, %1", otherops);
18098           operands[1] = otherops[0];
18099           if (emit)
18100             {
18101               if (TARGET_LDRD)
18102                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18103               else
18104                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18105             }
18106
18107           if (count)
18108             *count = 2;
18109           break;
18110
18111           /* ??? This needs checking for thumb2.  */
18112         default:
18113           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18114                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18115             {
18116               otherops[0] = operands[0];
18117               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18118               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18119
18120               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18121                 {
18122                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18123                     {
18124                       switch ((int) INTVAL (otherops[2]))
18125                         {
18126                         case -8:
18127                           if (emit)
18128                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18129                           return "";
18130                         case -4:
18131                           if (TARGET_THUMB2)
18132                             break;
18133                           if (emit)
18134                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18135                           return "";
18136                         case 4:
18137                           if (TARGET_THUMB2)
18138                             break;
18139                           if (emit)
18140                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18141                           return "";
18142                         }
18143                     }
18144                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18145                   operands[1] = otherops[0];
18146                   if (TARGET_LDRD
18147                       && (REG_P (otherops[2])
18148                           || TARGET_THUMB2
18149                           || (CONST_INT_P (otherops[2])
18150                               && INTVAL (otherops[2]) > -256
18151                               && INTVAL (otherops[2]) < 256)))
18152                     {
18153                       if (reg_overlap_mentioned_p (operands[0],
18154                                                    otherops[2]))
18155                         {
18156                           /* Swap base and index registers over to
18157                              avoid a conflict.  */
18158                           std::swap (otherops[1], otherops[2]);
18159                         }
18160                       /* If both registers conflict, it will usually
18161                          have been fixed by a splitter.  */
18162                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18163                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18164                         {
18165                           if (emit)
18166                             {
18167                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18168                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18169                             }
18170                           if (count)
18171                             *count = 2;
18172                         }
18173                       else
18174                         {
18175                           otherops[0] = operands[0];
18176                           if (emit)
18177                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18178                         }
18179                       return "";
18180                     }
18181
18182                   if (CONST_INT_P (otherops[2]))
18183                     {
18184                       if (emit)
18185                         {
18186                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18187                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18188                           else
18189                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18190                         }
18191                     }
18192                   else
18193                     {
18194                       if (emit)
18195                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18196                     }
18197                 }
18198               else
18199                 {
18200                   if (emit)
18201                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18202                 }
18203
18204               if (count)
18205                 *count = 2;
18206
18207               if (TARGET_LDRD)
18208                 return "ldrd%?\t%0, [%1]";
18209
18210               return "ldmia%?\t%1, %M0";
18211             }
18212           else
18213             {
18214               otherops[1] = adjust_address (operands[1], SImode, 4);
18215               /* Take care of overlapping base/data reg.  */
18216               if (reg_mentioned_p (operands[0], operands[1]))
18217                 {
18218                   if (emit)
18219                     {
18220                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18221                       output_asm_insn ("ldr%?\t%0, %1", operands);
18222                     }
18223                   if (count)
18224                     *count = 2;
18225
18226                 }
18227               else
18228                 {
18229                   if (emit)
18230                     {
18231                       output_asm_insn ("ldr%?\t%0, %1", operands);
18232                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18233                     }
18234                   if (count)
18235                     *count = 2;
18236                 }
18237             }
18238         }
18239     }
18240   else
18241     {
18242       /* Constraints should ensure this.  */
18243       gcc_assert (code0 == MEM && code1 == REG);
18244       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18245                   || (TARGET_ARM && TARGET_LDRD));
18246
18247       switch (GET_CODE (XEXP (operands[0], 0)))
18248         {
18249         case REG:
18250           if (emit)
18251             {
18252               if (TARGET_LDRD)
18253                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18254               else
18255                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18256             }
18257           break;
18258
18259         case PRE_INC:
18260           gcc_assert (TARGET_LDRD);
18261           if (emit)
18262             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18263           break;
18264
18265         case PRE_DEC:
18266           if (emit)
18267             {
18268               if (TARGET_LDRD)
18269                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18270               else
18271                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18272             }
18273           break;
18274
18275         case POST_INC:
18276           if (emit)
18277             {
18278               if (TARGET_LDRD)
18279                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18280               else
18281                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18282             }
18283           break;
18284
18285         case POST_DEC:
18286           gcc_assert (TARGET_LDRD);
18287           if (emit)
18288             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18289           break;
18290
18291         case PRE_MODIFY:
18292         case POST_MODIFY:
18293           otherops[0] = operands[1];
18294           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18295           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18296
18297           /* IWMMXT allows offsets larger than ldrd can handle,
18298              fix these up with a pair of ldr.  */
18299           if (!TARGET_THUMB2
18300               && CONST_INT_P (otherops[2])
18301               && (INTVAL(otherops[2]) <= -256
18302                   || INTVAL(otherops[2]) >= 256))
18303             {
18304               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18305                 {
18306                   if (emit)
18307                     {
18308                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18309                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18310                     }
18311                   if (count)
18312                     *count = 2;
18313                 }
18314               else
18315                 {
18316                   if (emit)
18317                     {
18318                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18319                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18320                     }
18321                   if (count)
18322                     *count = 2;
18323                 }
18324             }
18325           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18326             {
18327               if (emit)
18328                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18329             }
18330           else
18331             {
18332               if (emit)
18333                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18334             }
18335           break;
18336
18337         case PLUS:
18338           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18339           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18340             {
18341               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18342                 {
18343                 case -8:
18344                   if (emit)
18345                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18346                   return "";
18347
18348                 case -4:
18349                   if (TARGET_THUMB2)
18350                     break;
18351                   if (emit)
18352                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18353                   return "";
18354
18355                 case 4:
18356                   if (TARGET_THUMB2)
18357                     break;
18358                   if (emit)
18359                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18360                   return "";
18361                 }
18362             }
18363           if (TARGET_LDRD
18364               && (REG_P (otherops[2])
18365                   || TARGET_THUMB2
18366                   || (CONST_INT_P (otherops[2])
18367                       && INTVAL (otherops[2]) > -256
18368                       && INTVAL (otherops[2]) < 256)))
18369             {
18370               otherops[0] = operands[1];
18371               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18372               if (emit)
18373                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18374               return "";
18375             }
18376           /* Fall through */
18377
18378         default:
18379           otherops[0] = adjust_address (operands[0], SImode, 4);
18380           otherops[1] = operands[1];
18381           if (emit)
18382             {
18383               output_asm_insn ("str%?\t%1, %0", operands);
18384               output_asm_insn ("str%?\t%H1, %0", otherops);
18385             }
18386           if (count)
18387             *count = 2;
18388         }
18389     }
18390
18391   return "";
18392 }
18393
18394 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18395    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18396
18397 const char *
18398 output_move_quad (rtx *operands)
18399 {
18400   if (REG_P (operands[0]))
18401     {
18402       /* Load, or reg->reg move.  */
18403
18404       if (MEM_P (operands[1]))
18405         {
18406           switch (GET_CODE (XEXP (operands[1], 0)))
18407             {
18408             case REG:
18409               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18410               break;
18411
18412             case LABEL_REF:
18413             case CONST:
18414               output_asm_insn ("adr%?\t%0, %1", operands);
18415               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18416               break;
18417
18418             default:
18419               gcc_unreachable ();
18420             }
18421         }
18422       else
18423         {
18424           rtx ops[2];
18425           int dest, src, i;
18426
18427           gcc_assert (REG_P (operands[1]));
18428
18429           dest = REGNO (operands[0]);
18430           src = REGNO (operands[1]);
18431
18432           /* This seems pretty dumb, but hopefully GCC won't try to do it
18433              very often.  */
18434           if (dest < src)
18435             for (i = 0; i < 4; i++)
18436               {
18437                 ops[0] = gen_rtx_REG (SImode, dest + i);
18438                 ops[1] = gen_rtx_REG (SImode, src + i);
18439                 output_asm_insn ("mov%?\t%0, %1", ops);
18440               }
18441           else
18442             for (i = 3; i >= 0; i--)
18443               {
18444                 ops[0] = gen_rtx_REG (SImode, dest + i);
18445                 ops[1] = gen_rtx_REG (SImode, src + i);
18446                 output_asm_insn ("mov%?\t%0, %1", ops);
18447               }
18448         }
18449     }
18450   else
18451     {
18452       gcc_assert (MEM_P (operands[0]));
18453       gcc_assert (REG_P (operands[1]));
18454       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18455
18456       switch (GET_CODE (XEXP (operands[0], 0)))
18457         {
18458         case REG:
18459           output_asm_insn ("stm%?\t%m0, %M1", operands);
18460           break;
18461
18462         default:
18463           gcc_unreachable ();
18464         }
18465     }
18466
18467   return "";
18468 }
18469
18470 /* Output a VFP load or store instruction.  */
18471
18472 const char *
18473 output_move_vfp (rtx *operands)
18474 {
18475   rtx reg, mem, addr, ops[2];
18476   int load = REG_P (operands[0]);
18477   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18478   int sp = (!TARGET_VFP_FP16INST
18479             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18480   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18481   const char *templ;
18482   char buff[50];
18483   machine_mode mode;
18484
18485   reg = operands[!load];
18486   mem = operands[load];
18487
18488   mode = GET_MODE (reg);
18489
18490   gcc_assert (REG_P (reg));
18491   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18492   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18493               || mode == SFmode
18494               || mode == DFmode
18495               || mode == HImode
18496               || mode == SImode
18497               || mode == DImode
18498               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18499   gcc_assert (MEM_P (mem));
18500
18501   addr = XEXP (mem, 0);
18502
18503   switch (GET_CODE (addr))
18504     {
18505     case PRE_DEC:
18506       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18507       ops[0] = XEXP (addr, 0);
18508       ops[1] = reg;
18509       break;
18510
18511     case POST_INC:
18512       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18513       ops[0] = XEXP (addr, 0);
18514       ops[1] = reg;
18515       break;
18516
18517     default:
18518       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18519       ops[0] = reg;
18520       ops[1] = mem;
18521       break;
18522     }
18523
18524   sprintf (buff, templ,
18525            load ? "ld" : "st",
18526            dp ? "64" : sp ? "32" : "16",
18527            dp ? "P" : "",
18528            integer_p ? "\t%@ int" : "");
18529   output_asm_insn (buff, ops);
18530
18531   return "";
18532 }
18533
18534 /* Output a Neon double-word or quad-word load or store, or a load
18535    or store for larger structure modes.
18536
18537    WARNING: The ordering of elements is weird in big-endian mode,
18538    because the EABI requires that vectors stored in memory appear
18539    as though they were stored by a VSTM, as required by the EABI.
18540    GCC RTL defines element ordering based on in-memory order.
18541    This can be different from the architectural ordering of elements
18542    within a NEON register. The intrinsics defined in arm_neon.h use the
18543    NEON register element ordering, not the GCC RTL element ordering.
18544
18545    For example, the in-memory ordering of a big-endian a quadword
18546    vector with 16-bit elements when stored from register pair {d0,d1}
18547    will be (lowest address first, d0[N] is NEON register element N):
18548
18549      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18550
18551    When necessary, quadword registers (dN, dN+1) are moved to ARM
18552    registers from rN in the order:
18553
18554      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18555
18556    So that STM/LDM can be used on vectors in ARM registers, and the
18557    same memory layout will result as if VSTM/VLDM were used.
18558
18559    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18560    possible, which allows use of appropriate alignment tags.
18561    Note that the choice of "64" is independent of the actual vector
18562    element size; this size simply ensures that the behavior is
18563    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18564
18565    Due to limitations of those instructions, use of VST1.64/VLD1.64
18566    is not possible if:
18567     - the address contains PRE_DEC, or
18568     - the mode refers to more than 4 double-word registers
18569
18570    In those cases, it would be possible to replace VSTM/VLDM by a
18571    sequence of instructions; this is not currently implemented since
18572    this is not certain to actually improve performance.  */
18573
18574 const char *
18575 output_move_neon (rtx *operands)
18576 {
18577   rtx reg, mem, addr, ops[2];
18578   int regno, nregs, load = REG_P (operands[0]);
18579   const char *templ;
18580   char buff[50];
18581   machine_mode mode;
18582
18583   reg = operands[!load];
18584   mem = operands[load];
18585
18586   mode = GET_MODE (reg);
18587
18588   gcc_assert (REG_P (reg));
18589   regno = REGNO (reg);
18590   nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18591   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18592               || NEON_REGNO_OK_FOR_QUAD (regno));
18593   gcc_assert (VALID_NEON_DREG_MODE (mode)
18594               || VALID_NEON_QREG_MODE (mode)
18595               || VALID_NEON_STRUCT_MODE (mode));
18596   gcc_assert (MEM_P (mem));
18597
18598   addr = XEXP (mem, 0);
18599
18600   /* Strip off const from addresses like (const (plus (...))).  */
18601   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18602     addr = XEXP (addr, 0);
18603
18604   switch (GET_CODE (addr))
18605     {
18606     case POST_INC:
18607       /* We have to use vldm / vstm for too-large modes.  */
18608       if (nregs > 4)
18609         {
18610           templ = "v%smia%%?\t%%0!, %%h1";
18611           ops[0] = XEXP (addr, 0);
18612         }
18613       else
18614         {
18615           templ = "v%s1.64\t%%h1, %%A0";
18616           ops[0] = mem;
18617         }
18618       ops[1] = reg;
18619       break;
18620
18621     case PRE_DEC:
18622       /* We have to use vldm / vstm in this case, since there is no
18623          pre-decrement form of the vld1 / vst1 instructions.  */
18624       templ = "v%smdb%%?\t%%0!, %%h1";
18625       ops[0] = XEXP (addr, 0);
18626       ops[1] = reg;
18627       break;
18628
18629     case POST_MODIFY:
18630       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18631       gcc_unreachable ();
18632
18633     case REG:
18634       /* We have to use vldm / vstm for too-large modes.  */
18635       if (nregs > 1)
18636         {
18637           if (nregs > 4)
18638             templ = "v%smia%%?\t%%m0, %%h1";
18639           else
18640             templ = "v%s1.64\t%%h1, %%A0";
18641
18642           ops[0] = mem;
18643           ops[1] = reg;
18644           break;
18645         }
18646       /* Fall through.  */
18647     case LABEL_REF:
18648     case PLUS:
18649       {
18650         int i;
18651         int overlap = -1;
18652         for (i = 0; i < nregs; i++)
18653           {
18654             /* We're only using DImode here because it's a convenient size.  */
18655             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18656             ops[1] = adjust_address (mem, DImode, 8 * i);
18657             if (reg_overlap_mentioned_p (ops[0], mem))
18658               {
18659                 gcc_assert (overlap == -1);
18660                 overlap = i;
18661               }
18662             else
18663               {
18664                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18665                 output_asm_insn (buff, ops);
18666               }
18667           }
18668         if (overlap != -1)
18669           {
18670             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18671             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18672             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18673             output_asm_insn (buff, ops);
18674           }
18675
18676         return "";
18677       }
18678
18679     default:
18680       gcc_unreachable ();
18681     }
18682
18683   sprintf (buff, templ, load ? "ld" : "st");
18684   output_asm_insn (buff, ops);
18685
18686   return "";
18687 }
18688
18689 /* Compute and return the length of neon_mov<mode>, where <mode> is
18690    one of VSTRUCT modes: EI, OI, CI or XI.  */
18691 int
18692 arm_attr_length_move_neon (rtx_insn *insn)
18693 {
18694   rtx reg, mem, addr;
18695   int load;
18696   machine_mode mode;
18697
18698   extract_insn_cached (insn);
18699
18700   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18701     {
18702       mode = GET_MODE (recog_data.operand[0]);
18703       switch (mode)
18704         {
18705         case E_EImode:
18706         case E_OImode:
18707           return 8;
18708         case E_CImode:
18709           return 12;
18710         case E_XImode:
18711           return 16;
18712         default:
18713           gcc_unreachable ();
18714         }
18715     }
18716
18717   load = REG_P (recog_data.operand[0]);
18718   reg = recog_data.operand[!load];
18719   mem = recog_data.operand[load];
18720
18721   gcc_assert (MEM_P (mem));
18722
18723   mode = GET_MODE (reg);
18724   addr = XEXP (mem, 0);
18725
18726   /* Strip off const from addresses like (const (plus (...))).  */
18727   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18728     addr = XEXP (addr, 0);
18729
18730   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18731     {
18732       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18733       return insns * 4;
18734     }
18735   else
18736     return 4;
18737 }
18738
18739 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18740    return zero.  */
18741
18742 int
18743 arm_address_offset_is_imm (rtx_insn *insn)
18744 {
18745   rtx mem, addr;
18746
18747   extract_insn_cached (insn);
18748
18749   if (REG_P (recog_data.operand[0]))
18750     return 0;
18751
18752   mem = recog_data.operand[0];
18753
18754   gcc_assert (MEM_P (mem));
18755
18756   addr = XEXP (mem, 0);
18757
18758   if (REG_P (addr)
18759       || (GET_CODE (addr) == PLUS
18760           && REG_P (XEXP (addr, 0))
18761           && CONST_INT_P (XEXP (addr, 1))))
18762     return 1;
18763   else
18764     return 0;
18765 }
18766
18767 /* Output an ADD r, s, #n where n may be too big for one instruction.
18768    If adding zero to one register, output nothing.  */
18769 const char *
18770 output_add_immediate (rtx *operands)
18771 {
18772   HOST_WIDE_INT n = INTVAL (operands[2]);
18773
18774   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18775     {
18776       if (n < 0)
18777         output_multi_immediate (operands,
18778                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18779                                 -n);
18780       else
18781         output_multi_immediate (operands,
18782                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18783                                 n);
18784     }
18785
18786   return "";
18787 }
18788
18789 /* Output a multiple immediate operation.
18790    OPERANDS is the vector of operands referred to in the output patterns.
18791    INSTR1 is the output pattern to use for the first constant.
18792    INSTR2 is the output pattern to use for subsequent constants.
18793    IMMED_OP is the index of the constant slot in OPERANDS.
18794    N is the constant value.  */
18795 static const char *
18796 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18797                         int immed_op, HOST_WIDE_INT n)
18798 {
18799 #if HOST_BITS_PER_WIDE_INT > 32
18800   n &= 0xffffffff;
18801 #endif
18802
18803   if (n == 0)
18804     {
18805       /* Quick and easy output.  */
18806       operands[immed_op] = const0_rtx;
18807       output_asm_insn (instr1, operands);
18808     }
18809   else
18810     {
18811       int i;
18812       const char * instr = instr1;
18813
18814       /* Note that n is never zero here (which would give no output).  */
18815       for (i = 0; i < 32; i += 2)
18816         {
18817           if (n & (3 << i))
18818             {
18819               operands[immed_op] = GEN_INT (n & (255 << i));
18820               output_asm_insn (instr, operands);
18821               instr = instr2;
18822               i += 6;
18823             }
18824         }
18825     }
18826
18827   return "";
18828 }
18829
18830 /* Return the name of a shifter operation.  */
18831 static const char *
18832 arm_shift_nmem(enum rtx_code code)
18833 {
18834   switch (code)
18835     {
18836     case ASHIFT:
18837       return ARM_LSL_NAME;
18838
18839     case ASHIFTRT:
18840       return "asr";
18841
18842     case LSHIFTRT:
18843       return "lsr";
18844
18845     case ROTATERT:
18846       return "ror";
18847
18848     default:
18849       abort();
18850     }
18851 }
18852
18853 /* Return the appropriate ARM instruction for the operation code.
18854    The returned result should not be overwritten.  OP is the rtx of the
18855    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18856    was shifted.  */
18857 const char *
18858 arithmetic_instr (rtx op, int shift_first_arg)
18859 {
18860   switch (GET_CODE (op))
18861     {
18862     case PLUS:
18863       return "add";
18864
18865     case MINUS:
18866       return shift_first_arg ? "rsb" : "sub";
18867
18868     case IOR:
18869       return "orr";
18870
18871     case XOR:
18872       return "eor";
18873
18874     case AND:
18875       return "and";
18876
18877     case ASHIFT:
18878     case ASHIFTRT:
18879     case LSHIFTRT:
18880     case ROTATERT:
18881       return arm_shift_nmem(GET_CODE(op));
18882
18883     default:
18884       gcc_unreachable ();
18885     }
18886 }
18887
18888 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18889    for the operation code.  The returned result should not be overwritten.
18890    OP is the rtx code of the shift.
18891    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18892    shift.  */
18893 static const char *
18894 shift_op (rtx op, HOST_WIDE_INT *amountp)
18895 {
18896   const char * mnem;
18897   enum rtx_code code = GET_CODE (op);
18898
18899   switch (code)
18900     {
18901     case ROTATE:
18902       if (!CONST_INT_P (XEXP (op, 1)))
18903         {
18904           output_operand_lossage ("invalid shift operand");
18905           return NULL;
18906         }
18907
18908       code = ROTATERT;
18909       *amountp = 32 - INTVAL (XEXP (op, 1));
18910       mnem = "ror";
18911       break;
18912
18913     case ASHIFT:
18914     case ASHIFTRT:
18915     case LSHIFTRT:
18916     case ROTATERT:
18917       mnem = arm_shift_nmem(code);
18918       if (CONST_INT_P (XEXP (op, 1)))
18919         {
18920           *amountp = INTVAL (XEXP (op, 1));
18921         }
18922       else if (REG_P (XEXP (op, 1)))
18923         {
18924           *amountp = -1;
18925           return mnem;
18926         }
18927       else
18928         {
18929           output_operand_lossage ("invalid shift operand");
18930           return NULL;
18931         }
18932       break;
18933
18934     case MULT:
18935       /* We never have to worry about the amount being other than a
18936          power of 2, since this case can never be reloaded from a reg.  */
18937       if (!CONST_INT_P (XEXP (op, 1)))
18938         {
18939           output_operand_lossage ("invalid shift operand");
18940           return NULL;
18941         }
18942
18943       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18944
18945       /* Amount must be a power of two.  */
18946       if (*amountp & (*amountp - 1))
18947         {
18948           output_operand_lossage ("invalid shift operand");
18949           return NULL;
18950         }
18951
18952       *amountp = exact_log2 (*amountp);
18953       gcc_assert (IN_RANGE (*amountp, 0, 31));
18954       return ARM_LSL_NAME;
18955
18956     default:
18957       output_operand_lossage ("invalid shift operand");
18958       return NULL;
18959     }
18960
18961   /* This is not 100% correct, but follows from the desire to merge
18962      multiplication by a power of 2 with the recognizer for a
18963      shift.  >=32 is not a valid shift for "lsl", so we must try and
18964      output a shift that produces the correct arithmetical result.
18965      Using lsr #32 is identical except for the fact that the carry bit
18966      is not set correctly if we set the flags; but we never use the
18967      carry bit from such an operation, so we can ignore that.  */
18968   if (code == ROTATERT)
18969     /* Rotate is just modulo 32.  */
18970     *amountp &= 31;
18971   else if (*amountp != (*amountp & 31))
18972     {
18973       if (code == ASHIFT)
18974         mnem = "lsr";
18975       *amountp = 32;
18976     }
18977
18978   /* Shifts of 0 are no-ops.  */
18979   if (*amountp == 0)
18980     return NULL;
18981
18982   return mnem;
18983 }
18984
18985 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
18986    because /bin/as is horribly restrictive.  The judgement about
18987    whether or not each character is 'printable' (and can be output as
18988    is) or not (and must be printed with an octal escape) must be made
18989    with reference to the *host* character set -- the situation is
18990    similar to that discussed in the comments above pp_c_char in
18991    c-pretty-print.c.  */
18992
18993 #define MAX_ASCII_LEN 51
18994
18995 void
18996 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18997 {
18998   int i;
18999   int len_so_far = 0;
19000
19001   fputs ("\t.ascii\t\"", stream);
19002
19003   for (i = 0; i < len; i++)
19004     {
19005       int c = p[i];
19006
19007       if (len_so_far >= MAX_ASCII_LEN)
19008         {
19009           fputs ("\"\n\t.ascii\t\"", stream);
19010           len_so_far = 0;
19011         }
19012
19013       if (ISPRINT (c))
19014         {
19015           if (c == '\\' || c == '\"')
19016             {
19017               putc ('\\', stream);
19018               len_so_far++;
19019             }
19020           putc (c, stream);
19021           len_so_far++;
19022         }
19023       else
19024         {
19025           fprintf (stream, "\\%03o", c);
19026           len_so_far += 4;
19027         }
19028     }
19029
19030   fputs ("\"\n", stream);
19031 }
19032 \f
19033 /* Whether a register is callee saved or not.  This is necessary because high
19034    registers are marked as caller saved when optimizing for size on Thumb-1
19035    targets despite being callee saved in order to avoid using them.  */
19036 #define callee_saved_reg_p(reg) \
19037   (!call_used_regs[reg] \
19038    || (TARGET_THUMB1 && optimize_size \
19039        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19040
19041 /* Compute the register save mask for registers 0 through 12
19042    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19043
19044 static unsigned long
19045 arm_compute_save_reg0_reg12_mask (void)
19046 {
19047   unsigned long func_type = arm_current_func_type ();
19048   unsigned long save_reg_mask = 0;
19049   unsigned int reg;
19050
19051   if (IS_INTERRUPT (func_type))
19052     {
19053       unsigned int max_reg;
19054       /* Interrupt functions must not corrupt any registers,
19055          even call clobbered ones.  If this is a leaf function
19056          we can just examine the registers used by the RTL, but
19057          otherwise we have to assume that whatever function is
19058          called might clobber anything, and so we have to save
19059          all the call-clobbered registers as well.  */
19060       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19061         /* FIQ handlers have registers r8 - r12 banked, so
19062            we only need to check r0 - r7, Normal ISRs only
19063            bank r14 and r15, so we must check up to r12.
19064            r13 is the stack pointer which is always preserved,
19065            so we do not need to consider it here.  */
19066         max_reg = 7;
19067       else
19068         max_reg = 12;
19069
19070       for (reg = 0; reg <= max_reg; reg++)
19071         if (df_regs_ever_live_p (reg)
19072             || (! crtl->is_leaf && call_used_regs[reg]))
19073           save_reg_mask |= (1 << reg);
19074
19075       /* Also save the pic base register if necessary.  */
19076       if (flag_pic
19077           && !TARGET_SINGLE_PIC_BASE
19078           && arm_pic_register != INVALID_REGNUM
19079           && crtl->uses_pic_offset_table)
19080         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19081     }
19082   else if (IS_VOLATILE(func_type))
19083     {
19084       /* For noreturn functions we historically omitted register saves
19085          altogether.  However this really messes up debugging.  As a
19086          compromise save just the frame pointers.  Combined with the link
19087          register saved elsewhere this should be sufficient to get
19088          a backtrace.  */
19089       if (frame_pointer_needed)
19090         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19091       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19092         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19093       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19094         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19095     }
19096   else
19097     {
19098       /* In the normal case we only need to save those registers
19099          which are call saved and which are used by this function.  */
19100       for (reg = 0; reg <= 11; reg++)
19101         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19102           save_reg_mask |= (1 << reg);
19103
19104       /* Handle the frame pointer as a special case.  */
19105       if (frame_pointer_needed)
19106         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19107
19108       /* If we aren't loading the PIC register,
19109          don't stack it even though it may be live.  */
19110       if (flag_pic
19111           && !TARGET_SINGLE_PIC_BASE
19112           && arm_pic_register != INVALID_REGNUM
19113           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19114               || crtl->uses_pic_offset_table))
19115         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19116
19117       /* The prologue will copy SP into R0, so save it.  */
19118       if (IS_STACKALIGN (func_type))
19119         save_reg_mask |= 1;
19120     }
19121
19122   /* Save registers so the exception handler can modify them.  */
19123   if (crtl->calls_eh_return)
19124     {
19125       unsigned int i;
19126
19127       for (i = 0; ; i++)
19128         {
19129           reg = EH_RETURN_DATA_REGNO (i);
19130           if (reg == INVALID_REGNUM)
19131             break;
19132           save_reg_mask |= 1 << reg;
19133         }
19134     }
19135
19136   return save_reg_mask;
19137 }
19138
19139 /* Return true if r3 is live at the start of the function.  */
19140
19141 static bool
19142 arm_r3_live_at_start_p (void)
19143 {
19144   /* Just look at cfg info, which is still close enough to correct at this
19145      point.  This gives false positives for broken functions that might use
19146      uninitialized data that happens to be allocated in r3, but who cares?  */
19147   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19148 }
19149
19150 /* Compute the number of bytes used to store the static chain register on the
19151    stack, above the stack frame.  We need to know this accurately to get the
19152    alignment of the rest of the stack frame correct.  */
19153
19154 static int
19155 arm_compute_static_chain_stack_bytes (void)
19156 {
19157   /* See the defining assertion in arm_expand_prologue.  */
19158   if (IS_NESTED (arm_current_func_type ())
19159       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19160           || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19161               && !df_regs_ever_live_p (LR_REGNUM)))
19162       && arm_r3_live_at_start_p ()
19163       && crtl->args.pretend_args_size == 0)
19164     return 4;
19165
19166   return 0;
19167 }
19168
19169 /* Compute a bit mask of which core registers need to be
19170    saved on the stack for the current function.
19171    This is used by arm_compute_frame_layout, which may add extra registers.  */
19172
19173 static unsigned long
19174 arm_compute_save_core_reg_mask (void)
19175 {
19176   unsigned int save_reg_mask = 0;
19177   unsigned long func_type = arm_current_func_type ();
19178   unsigned int reg;
19179
19180   if (IS_NAKED (func_type))
19181     /* This should never really happen.  */
19182     return 0;
19183
19184   /* If we are creating a stack frame, then we must save the frame pointer,
19185      IP (which will hold the old stack pointer), LR and the PC.  */
19186   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19187     save_reg_mask |=
19188       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19189       | (1 << IP_REGNUM)
19190       | (1 << LR_REGNUM)
19191       | (1 << PC_REGNUM);
19192
19193   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19194
19195   /* Decide if we need to save the link register.
19196      Interrupt routines have their own banked link register,
19197      so they never need to save it.
19198      Otherwise if we do not use the link register we do not need to save
19199      it.  If we are pushing other registers onto the stack however, we
19200      can save an instruction in the epilogue by pushing the link register
19201      now and then popping it back into the PC.  This incurs extra memory
19202      accesses though, so we only do it when optimizing for size, and only
19203      if we know that we will not need a fancy return sequence.  */
19204   if (df_regs_ever_live_p (LR_REGNUM)
19205       || (save_reg_mask
19206           && optimize_size
19207           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19208           && !crtl->tail_call_emit
19209           && !crtl->calls_eh_return))
19210     save_reg_mask |= 1 << LR_REGNUM;
19211
19212   if (cfun->machine->lr_save_eliminated)
19213     save_reg_mask &= ~ (1 << LR_REGNUM);
19214
19215   if (TARGET_REALLY_IWMMXT
19216       && ((bit_count (save_reg_mask)
19217            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19218                            arm_compute_static_chain_stack_bytes())
19219            ) % 2) != 0)
19220     {
19221       /* The total number of registers that are going to be pushed
19222          onto the stack is odd.  We need to ensure that the stack
19223          is 64-bit aligned before we start to save iWMMXt registers,
19224          and also before we start to create locals.  (A local variable
19225          might be a double or long long which we will load/store using
19226          an iWMMXt instruction).  Therefore we need to push another
19227          ARM register, so that the stack will be 64-bit aligned.  We
19228          try to avoid using the arg registers (r0 -r3) as they might be
19229          used to pass values in a tail call.  */
19230       for (reg = 4; reg <= 12; reg++)
19231         if ((save_reg_mask & (1 << reg)) == 0)
19232           break;
19233
19234       if (reg <= 12)
19235         save_reg_mask |= (1 << reg);
19236       else
19237         {
19238           cfun->machine->sibcall_blocked = 1;
19239           save_reg_mask |= (1 << 3);
19240         }
19241     }
19242
19243   /* We may need to push an additional register for use initializing the
19244      PIC base register.  */
19245   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19246       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19247     {
19248       reg = thumb_find_work_register (1 << 4);
19249       if (!call_used_regs[reg])
19250         save_reg_mask |= (1 << reg);
19251     }
19252
19253   return save_reg_mask;
19254 }
19255
19256 /* Compute a bit mask of which core registers need to be
19257    saved on the stack for the current function.  */
19258 static unsigned long
19259 thumb1_compute_save_core_reg_mask (void)
19260 {
19261   unsigned long mask;
19262   unsigned reg;
19263
19264   mask = 0;
19265   for (reg = 0; reg < 12; reg ++)
19266     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19267       mask |= 1 << reg;
19268
19269   /* Handle the frame pointer as a special case.  */
19270   if (frame_pointer_needed)
19271     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19272
19273   if (flag_pic
19274       && !TARGET_SINGLE_PIC_BASE
19275       && arm_pic_register != INVALID_REGNUM
19276       && crtl->uses_pic_offset_table)
19277     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19278
19279   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19280   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19281     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19282
19283   /* LR will also be pushed if any lo regs are pushed.  */
19284   if (mask & 0xff || thumb_force_lr_save ())
19285     mask |= (1 << LR_REGNUM);
19286
19287   /* Make sure we have a low work register if we need one.
19288      We will need one if we are going to push a high register,
19289      but we are not currently intending to push a low register.  */
19290   if ((mask & 0xff) == 0
19291       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19292     {
19293       /* Use thumb_find_work_register to choose which register
19294          we will use.  If the register is live then we will
19295          have to push it.  Use LAST_LO_REGNUM as our fallback
19296          choice for the register to select.  */
19297       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19298       /* Make sure the register returned by thumb_find_work_register is
19299          not part of the return value.  */
19300       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19301         reg = LAST_LO_REGNUM;
19302
19303       if (callee_saved_reg_p (reg))
19304         mask |= 1 << reg;
19305     }
19306
19307   /* The 504 below is 8 bytes less than 512 because there are two possible
19308      alignment words.  We can't tell here if they will be present or not so we
19309      have to play it safe and assume that they are. */
19310   if ((CALLER_INTERWORKING_SLOT_SIZE +
19311        ROUND_UP_WORD (get_frame_size ()) +
19312        crtl->outgoing_args_size) >= 504)
19313     {
19314       /* This is the same as the code in thumb1_expand_prologue() which
19315          determines which register to use for stack decrement. */
19316       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19317         if (mask & (1 << reg))
19318           break;
19319
19320       if (reg > LAST_LO_REGNUM)
19321         {
19322           /* Make sure we have a register available for stack decrement. */
19323           mask |= 1 << LAST_LO_REGNUM;
19324         }
19325     }
19326
19327   return mask;
19328 }
19329
19330
19331 /* Return the number of bytes required to save VFP registers.  */
19332 static int
19333 arm_get_vfp_saved_size (void)
19334 {
19335   unsigned int regno;
19336   int count;
19337   int saved;
19338
19339   saved = 0;
19340   /* Space for saved VFP registers.  */
19341   if (TARGET_HARD_FLOAT)
19342     {
19343       count = 0;
19344       for (regno = FIRST_VFP_REGNUM;
19345            regno < LAST_VFP_REGNUM;
19346            regno += 2)
19347         {
19348           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19349               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19350             {
19351               if (count > 0)
19352                 {
19353                   /* Workaround ARM10 VFPr1 bug.  */
19354                   if (count == 2 && !arm_arch6)
19355                     count++;
19356                   saved += count * 8;
19357                 }
19358               count = 0;
19359             }
19360           else
19361             count++;
19362         }
19363       if (count > 0)
19364         {
19365           if (count == 2 && !arm_arch6)
19366             count++;
19367           saved += count * 8;
19368         }
19369     }
19370   return saved;
19371 }
19372
19373
19374 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19375    everything bar the final return instruction.  If simple_return is true,
19376    then do not output epilogue, because it has already been emitted in RTL.  */
19377 const char *
19378 output_return_instruction (rtx operand, bool really_return, bool reverse,
19379                            bool simple_return)
19380 {
19381   char conditional[10];
19382   char instr[100];
19383   unsigned reg;
19384   unsigned long live_regs_mask;
19385   unsigned long func_type;
19386   arm_stack_offsets *offsets;
19387
19388   func_type = arm_current_func_type ();
19389
19390   if (IS_NAKED (func_type))
19391     return "";
19392
19393   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19394     {
19395       /* If this function was declared non-returning, and we have
19396          found a tail call, then we have to trust that the called
19397          function won't return.  */
19398       if (really_return)
19399         {
19400           rtx ops[2];
19401
19402           /* Otherwise, trap an attempted return by aborting.  */
19403           ops[0] = operand;
19404           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19405                                        : "abort");
19406           assemble_external_libcall (ops[1]);
19407           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19408         }
19409
19410       return "";
19411     }
19412
19413   gcc_assert (!cfun->calls_alloca || really_return);
19414
19415   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19416
19417   cfun->machine->return_used_this_function = 1;
19418
19419   offsets = arm_get_frame_offsets ();
19420   live_regs_mask = offsets->saved_regs_mask;
19421
19422   if (!simple_return && live_regs_mask)
19423     {
19424       const char * return_reg;
19425
19426       /* If we do not have any special requirements for function exit
19427          (e.g. interworking) then we can load the return address
19428          directly into the PC.  Otherwise we must load it into LR.  */
19429       if (really_return
19430           && !IS_CMSE_ENTRY (func_type)
19431           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19432         return_reg = reg_names[PC_REGNUM];
19433       else
19434         return_reg = reg_names[LR_REGNUM];
19435
19436       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19437         {
19438           /* There are three possible reasons for the IP register
19439              being saved.  1) a stack frame was created, in which case
19440              IP contains the old stack pointer, or 2) an ISR routine
19441              corrupted it, or 3) it was saved to align the stack on
19442              iWMMXt.  In case 1, restore IP into SP, otherwise just
19443              restore IP.  */
19444           if (frame_pointer_needed)
19445             {
19446               live_regs_mask &= ~ (1 << IP_REGNUM);
19447               live_regs_mask |=   (1 << SP_REGNUM);
19448             }
19449           else
19450             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19451         }
19452
19453       /* On some ARM architectures it is faster to use LDR rather than
19454          LDM to load a single register.  On other architectures, the
19455          cost is the same.  In 26 bit mode, or for exception handlers,
19456          we have to use LDM to load the PC so that the CPSR is also
19457          restored.  */
19458       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19459         if (live_regs_mask == (1U << reg))
19460           break;
19461
19462       if (reg <= LAST_ARM_REGNUM
19463           && (reg != LR_REGNUM
19464               || ! really_return
19465               || ! IS_INTERRUPT (func_type)))
19466         {
19467           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19468                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19469         }
19470       else
19471         {
19472           char *p;
19473           int first = 1;
19474
19475           /* Generate the load multiple instruction to restore the
19476              registers.  Note we can get here, even if
19477              frame_pointer_needed is true, but only if sp already
19478              points to the base of the saved core registers.  */
19479           if (live_regs_mask & (1 << SP_REGNUM))
19480             {
19481               unsigned HOST_WIDE_INT stack_adjust;
19482
19483               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19484               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19485
19486               if (stack_adjust && arm_arch5 && TARGET_ARM)
19487                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19488               else
19489                 {
19490                   /* If we can't use ldmib (SA110 bug),
19491                      then try to pop r3 instead.  */
19492                   if (stack_adjust)
19493                     live_regs_mask |= 1 << 3;
19494
19495                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19496                 }
19497             }
19498           /* For interrupt returns we have to use an LDM rather than
19499              a POP so that we can use the exception return variant.  */
19500           else if (IS_INTERRUPT (func_type))
19501             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19502           else
19503             sprintf (instr, "pop%s\t{", conditional);
19504
19505           p = instr + strlen (instr);
19506
19507           for (reg = 0; reg <= SP_REGNUM; reg++)
19508             if (live_regs_mask & (1 << reg))
19509               {
19510                 int l = strlen (reg_names[reg]);
19511
19512                 if (first)
19513                   first = 0;
19514                 else
19515                   {
19516                     memcpy (p, ", ", 2);
19517                     p += 2;
19518                   }
19519
19520                 memcpy (p, "%|", 2);
19521                 memcpy (p + 2, reg_names[reg], l);
19522                 p += l + 2;
19523               }
19524
19525           if (live_regs_mask & (1 << LR_REGNUM))
19526             {
19527               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19528               /* If returning from an interrupt, restore the CPSR.  */
19529               if (IS_INTERRUPT (func_type))
19530                 strcat (p, "^");
19531             }
19532           else
19533             strcpy (p, "}");
19534         }
19535
19536       output_asm_insn (instr, & operand);
19537
19538       /* See if we need to generate an extra instruction to
19539          perform the actual function return.  */
19540       if (really_return
19541           && func_type != ARM_FT_INTERWORKED
19542           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19543         {
19544           /* The return has already been handled
19545              by loading the LR into the PC.  */
19546           return "";
19547         }
19548     }
19549
19550   if (really_return)
19551     {
19552       switch ((int) ARM_FUNC_TYPE (func_type))
19553         {
19554         case ARM_FT_ISR:
19555         case ARM_FT_FIQ:
19556           /* ??? This is wrong for unified assembly syntax.  */
19557           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19558           break;
19559
19560         case ARM_FT_INTERWORKED:
19561           gcc_assert (arm_arch5 || arm_arch4t);
19562           sprintf (instr, "bx%s\t%%|lr", conditional);
19563           break;
19564
19565         case ARM_FT_EXCEPTION:
19566           /* ??? This is wrong for unified assembly syntax.  */
19567           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19568           break;
19569
19570         default:
19571           if (IS_CMSE_ENTRY (func_type))
19572             {
19573               /* Check if we have to clear the 'GE bits' which is only used if
19574                  parallel add and subtraction instructions are available.  */
19575               if (TARGET_INT_SIMD)
19576                 snprintf (instr, sizeof (instr),
19577                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19578               else
19579                 snprintf (instr, sizeof (instr),
19580                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19581
19582               output_asm_insn (instr, & operand);
19583               if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19584                 {
19585                   /* Clear the cumulative exception-status bits (0-4,7) and the
19586                      condition code bits (28-31) of the FPSCR.  We need to
19587                      remember to clear the first scratch register used (IP) and
19588                      save and restore the second (r4).  */
19589                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19590                   output_asm_insn (instr, & operand);
19591                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19592                   output_asm_insn (instr, & operand);
19593                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19594                   output_asm_insn (instr, & operand);
19595                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19596                   output_asm_insn (instr, & operand);
19597                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19598                   output_asm_insn (instr, & operand);
19599                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19600                   output_asm_insn (instr, & operand);
19601                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19602                   output_asm_insn (instr, & operand);
19603                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19604                   output_asm_insn (instr, & operand);
19605                 }
19606               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19607             }
19608           /* Use bx if it's available.  */
19609           else if (arm_arch5 || arm_arch4t)
19610             sprintf (instr, "bx%s\t%%|lr", conditional);
19611           else
19612             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19613           break;
19614         }
19615
19616       output_asm_insn (instr, & operand);
19617     }
19618
19619   return "";
19620 }
19621
19622 /* Output in FILE asm statements needed to declare the NAME of the function
19623    defined by its DECL node.  */
19624
19625 void
19626 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19627 {
19628   size_t cmse_name_len;
19629   char *cmse_name = 0;
19630   char cmse_prefix[] = "__acle_se_";
19631
19632   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19633      extra function label for each function with the 'cmse_nonsecure_entry'
19634      attribute.  This extra function label should be prepended with
19635      '__acle_se_', telling the linker that it needs to create secure gateway
19636      veneers for this function.  */
19637   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19638                                     DECL_ATTRIBUTES (decl)))
19639     {
19640       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19641       cmse_name = XALLOCAVEC (char, cmse_name_len);
19642       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19643       targetm.asm_out.globalize_label (file, cmse_name);
19644
19645       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19646       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19647     }
19648
19649   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19650   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19651   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19652   ASM_OUTPUT_LABEL (file, name);
19653
19654   if (cmse_name)
19655     ASM_OUTPUT_LABEL (file, cmse_name);
19656
19657   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19658 }
19659
19660 /* Write the function name into the code section, directly preceding
19661    the function prologue.
19662
19663    Code will be output similar to this:
19664      t0
19665          .ascii "arm_poke_function_name", 0
19666          .align
19667      t1
19668          .word 0xff000000 + (t1 - t0)
19669      arm_poke_function_name
19670          mov     ip, sp
19671          stmfd   sp!, {fp, ip, lr, pc}
19672          sub     fp, ip, #4
19673
19674    When performing a stack backtrace, code can inspect the value
19675    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19676    at location pc - 12 and the top 8 bits are set, then we know
19677    that there is a function name embedded immediately preceding this
19678    location and has length ((pc[-3]) & 0xff000000).
19679
19680    We assume that pc is declared as a pointer to an unsigned long.
19681
19682    It is of no benefit to output the function name if we are assembling
19683    a leaf function.  These function types will not contain a stack
19684    backtrace structure, therefore it is not possible to determine the
19685    function name.  */
19686 void
19687 arm_poke_function_name (FILE *stream, const char *name)
19688 {
19689   unsigned long alignlength;
19690   unsigned long length;
19691   rtx           x;
19692
19693   length      = strlen (name) + 1;
19694   alignlength = ROUND_UP_WORD (length);
19695
19696   ASM_OUTPUT_ASCII (stream, name, length);
19697   ASM_OUTPUT_ALIGN (stream, 2);
19698   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19699   assemble_aligned_integer (UNITS_PER_WORD, x);
19700 }
19701
19702 /* Place some comments into the assembler stream
19703    describing the current function.  */
19704 static void
19705 arm_output_function_prologue (FILE *f)
19706 {
19707   unsigned long func_type;
19708
19709   /* Sanity check.  */
19710   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19711
19712   func_type = arm_current_func_type ();
19713
19714   switch ((int) ARM_FUNC_TYPE (func_type))
19715     {
19716     default:
19717     case ARM_FT_NORMAL:
19718       break;
19719     case ARM_FT_INTERWORKED:
19720       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19721       break;
19722     case ARM_FT_ISR:
19723       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19724       break;
19725     case ARM_FT_FIQ:
19726       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19727       break;
19728     case ARM_FT_EXCEPTION:
19729       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19730       break;
19731     }
19732
19733   if (IS_NAKED (func_type))
19734     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19735
19736   if (IS_VOLATILE (func_type))
19737     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19738
19739   if (IS_NESTED (func_type))
19740     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19741   if (IS_STACKALIGN (func_type))
19742     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19743   if (IS_CMSE_ENTRY (func_type))
19744     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19745
19746   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19747                crtl->args.size,
19748                crtl->args.pretend_args_size,
19749                (HOST_WIDE_INT) get_frame_size ());
19750
19751   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19752                frame_pointer_needed,
19753                cfun->machine->uses_anonymous_args);
19754
19755   if (cfun->machine->lr_save_eliminated)
19756     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19757
19758   if (crtl->calls_eh_return)
19759     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19760
19761 }
19762
19763 static void
19764 arm_output_function_epilogue (FILE *)
19765 {
19766   arm_stack_offsets *offsets;
19767
19768   if (TARGET_THUMB1)
19769     {
19770       int regno;
19771
19772       /* Emit any call-via-reg trampolines that are needed for v4t support
19773          of call_reg and call_value_reg type insns.  */
19774       for (regno = 0; regno < LR_REGNUM; regno++)
19775         {
19776           rtx label = cfun->machine->call_via[regno];
19777
19778           if (label != NULL)
19779             {
19780               switch_to_section (function_section (current_function_decl));
19781               targetm.asm_out.internal_label (asm_out_file, "L",
19782                                               CODE_LABEL_NUMBER (label));
19783               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19784             }
19785         }
19786
19787       /* ??? Probably not safe to set this here, since it assumes that a
19788          function will be emitted as assembly immediately after we generate
19789          RTL for it.  This does not happen for inline functions.  */
19790       cfun->machine->return_used_this_function = 0;
19791     }
19792   else /* TARGET_32BIT */
19793     {
19794       /* We need to take into account any stack-frame rounding.  */
19795       offsets = arm_get_frame_offsets ();
19796
19797       gcc_assert (!use_return_insn (FALSE, NULL)
19798                   || (cfun->machine->return_used_this_function != 0)
19799                   || offsets->saved_regs == offsets->outgoing_args
19800                   || frame_pointer_needed);
19801     }
19802 }
19803
19804 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19805    STR and STRD.  If an even number of registers are being pushed, one
19806    or more STRD patterns are created for each register pair.  If an
19807    odd number of registers are pushed, emit an initial STR followed by
19808    as many STRD instructions as are needed.  This works best when the
19809    stack is initially 64-bit aligned (the normal case), since it
19810    ensures that each STRD is also 64-bit aligned.  */
19811 static void
19812 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19813 {
19814   int num_regs = 0;
19815   int i;
19816   int regno;
19817   rtx par = NULL_RTX;
19818   rtx dwarf = NULL_RTX;
19819   rtx tmp;
19820   bool first = true;
19821
19822   num_regs = bit_count (saved_regs_mask);
19823
19824   /* Must be at least one register to save, and can't save SP or PC.  */
19825   gcc_assert (num_regs > 0 && num_regs <= 14);
19826   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19827   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19828
19829   /* Create sequence for DWARF info.  All the frame-related data for
19830      debugging is held in this wrapper.  */
19831   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19832
19833   /* Describe the stack adjustment.  */
19834   tmp = gen_rtx_SET (stack_pointer_rtx,
19835                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19836   RTX_FRAME_RELATED_P (tmp) = 1;
19837   XVECEXP (dwarf, 0, 0) = tmp;
19838
19839   /* Find the first register.  */
19840   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19841     ;
19842
19843   i = 0;
19844
19845   /* If there's an odd number of registers to push.  Start off by
19846      pushing a single register.  This ensures that subsequent strd
19847      operations are dword aligned (assuming that SP was originally
19848      64-bit aligned).  */
19849   if ((num_regs & 1) != 0)
19850     {
19851       rtx reg, mem, insn;
19852
19853       reg = gen_rtx_REG (SImode, regno);
19854       if (num_regs == 1)
19855         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19856                                                      stack_pointer_rtx));
19857       else
19858         mem = gen_frame_mem (Pmode,
19859                              gen_rtx_PRE_MODIFY
19860                              (Pmode, stack_pointer_rtx,
19861                               plus_constant (Pmode, stack_pointer_rtx,
19862                                              -4 * num_regs)));
19863
19864       tmp = gen_rtx_SET (mem, reg);
19865       RTX_FRAME_RELATED_P (tmp) = 1;
19866       insn = emit_insn (tmp);
19867       RTX_FRAME_RELATED_P (insn) = 1;
19868       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19869       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19870       RTX_FRAME_RELATED_P (tmp) = 1;
19871       i++;
19872       regno++;
19873       XVECEXP (dwarf, 0, i) = tmp;
19874       first = false;
19875     }
19876
19877   while (i < num_regs)
19878     if (saved_regs_mask & (1 << regno))
19879       {
19880         rtx reg1, reg2, mem1, mem2;
19881         rtx tmp0, tmp1, tmp2;
19882         int regno2;
19883
19884         /* Find the register to pair with this one.  */
19885         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19886              regno2++)
19887           ;
19888
19889         reg1 = gen_rtx_REG (SImode, regno);
19890         reg2 = gen_rtx_REG (SImode, regno2);
19891
19892         if (first)
19893           {
19894             rtx insn;
19895
19896             first = false;
19897             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19898                                                         stack_pointer_rtx,
19899                                                         -4 * num_regs));
19900             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19901                                                         stack_pointer_rtx,
19902                                                         -4 * (num_regs - 1)));
19903             tmp0 = gen_rtx_SET (stack_pointer_rtx,
19904                                 plus_constant (Pmode, stack_pointer_rtx,
19905                                                -4 * (num_regs)));
19906             tmp1 = gen_rtx_SET (mem1, reg1);
19907             tmp2 = gen_rtx_SET (mem2, reg2);
19908             RTX_FRAME_RELATED_P (tmp0) = 1;
19909             RTX_FRAME_RELATED_P (tmp1) = 1;
19910             RTX_FRAME_RELATED_P (tmp2) = 1;
19911             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19912             XVECEXP (par, 0, 0) = tmp0;
19913             XVECEXP (par, 0, 1) = tmp1;
19914             XVECEXP (par, 0, 2) = tmp2;
19915             insn = emit_insn (par);
19916             RTX_FRAME_RELATED_P (insn) = 1;
19917             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19918           }
19919         else
19920           {
19921             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19922                                                         stack_pointer_rtx,
19923                                                         4 * i));
19924             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19925                                                         stack_pointer_rtx,
19926                                                         4 * (i + 1)));
19927             tmp1 = gen_rtx_SET (mem1, reg1);
19928             tmp2 = gen_rtx_SET (mem2, reg2);
19929             RTX_FRAME_RELATED_P (tmp1) = 1;
19930             RTX_FRAME_RELATED_P (tmp2) = 1;
19931             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19932             XVECEXP (par, 0, 0) = tmp1;
19933             XVECEXP (par, 0, 1) = tmp2;
19934             emit_insn (par);
19935           }
19936
19937         /* Create unwind information.  This is an approximation.  */
19938         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19939                                            plus_constant (Pmode,
19940                                                           stack_pointer_rtx,
19941                                                           4 * i)),
19942                             reg1);
19943         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19944                                            plus_constant (Pmode,
19945                                                           stack_pointer_rtx,
19946                                                           4 * (i + 1))),
19947                             reg2);
19948
19949         RTX_FRAME_RELATED_P (tmp1) = 1;
19950         RTX_FRAME_RELATED_P (tmp2) = 1;
19951         XVECEXP (dwarf, 0, i + 1) = tmp1;
19952         XVECEXP (dwarf, 0, i + 2) = tmp2;
19953         i += 2;
19954         regno = regno2 + 1;
19955       }
19956     else
19957       regno++;
19958
19959   return;
19960 }
19961
19962 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19963    whenever possible, otherwise it emits single-word stores.  The first store
19964    also allocates stack space for all saved registers, using writeback with
19965    post-addressing mode.  All other stores use offset addressing.  If no STRD
19966    can be emitted, this function emits a sequence of single-word stores,
19967    and not an STM as before, because single-word stores provide more freedom
19968    scheduling and can be turned into an STM by peephole optimizations.  */
19969 static void
19970 arm_emit_strd_push (unsigned long saved_regs_mask)
19971 {
19972   int num_regs = 0;
19973   int i, j, dwarf_index  = 0;
19974   int offset = 0;
19975   rtx dwarf = NULL_RTX;
19976   rtx insn = NULL_RTX;
19977   rtx tmp, mem;
19978
19979   /* TODO: A more efficient code can be emitted by changing the
19980      layout, e.g., first push all pairs that can use STRD to keep the
19981      stack aligned, and then push all other registers.  */
19982   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19983     if (saved_regs_mask & (1 << i))
19984       num_regs++;
19985
19986   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19987   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19988   gcc_assert (num_regs > 0);
19989
19990   /* Create sequence for DWARF info.  */
19991   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19992
19993   /* For dwarf info, we generate explicit stack update.  */
19994   tmp = gen_rtx_SET (stack_pointer_rtx,
19995                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19996   RTX_FRAME_RELATED_P (tmp) = 1;
19997   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19998
19999   /* Save registers.  */
20000   offset = - 4 * num_regs;
20001   j = 0;
20002   while (j <= LAST_ARM_REGNUM)
20003     if (saved_regs_mask & (1 << j))
20004       {
20005         if ((j % 2 == 0)
20006             && (saved_regs_mask & (1 << (j + 1))))
20007           {
20008             /* Current register and previous register form register pair for
20009                which STRD can be generated.  */
20010             if (offset < 0)
20011               {
20012                 /* Allocate stack space for all saved registers.  */
20013                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20014                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20015                 mem = gen_frame_mem (DImode, tmp);
20016                 offset = 0;
20017               }
20018             else if (offset > 0)
20019               mem = gen_frame_mem (DImode,
20020                                    plus_constant (Pmode,
20021                                                   stack_pointer_rtx,
20022                                                   offset));
20023             else
20024               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20025
20026             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20027             RTX_FRAME_RELATED_P (tmp) = 1;
20028             tmp = emit_insn (tmp);
20029
20030             /* Record the first store insn.  */
20031             if (dwarf_index == 1)
20032               insn = tmp;
20033
20034             /* Generate dwarf info.  */
20035             mem = gen_frame_mem (SImode,
20036                                  plus_constant (Pmode,
20037                                                 stack_pointer_rtx,
20038                                                 offset));
20039             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20040             RTX_FRAME_RELATED_P (tmp) = 1;
20041             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20042
20043             mem = gen_frame_mem (SImode,
20044                                  plus_constant (Pmode,
20045                                                 stack_pointer_rtx,
20046                                                 offset + 4));
20047             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20048             RTX_FRAME_RELATED_P (tmp) = 1;
20049             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20050
20051             offset += 8;
20052             j += 2;
20053           }
20054         else
20055           {
20056             /* Emit a single word store.  */
20057             if (offset < 0)
20058               {
20059                 /* Allocate stack space for all saved registers.  */
20060                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20061                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20062                 mem = gen_frame_mem (SImode, tmp);
20063                 offset = 0;
20064               }
20065             else if (offset > 0)
20066               mem = gen_frame_mem (SImode,
20067                                    plus_constant (Pmode,
20068                                                   stack_pointer_rtx,
20069                                                   offset));
20070             else
20071               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20072
20073             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20074             RTX_FRAME_RELATED_P (tmp) = 1;
20075             tmp = emit_insn (tmp);
20076
20077             /* Record the first store insn.  */
20078             if (dwarf_index == 1)
20079               insn = tmp;
20080
20081             /* Generate dwarf info.  */
20082             mem = gen_frame_mem (SImode,
20083                                  plus_constant(Pmode,
20084                                                stack_pointer_rtx,
20085                                                offset));
20086             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20087             RTX_FRAME_RELATED_P (tmp) = 1;
20088             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20089
20090             offset += 4;
20091             j += 1;
20092           }
20093       }
20094     else
20095       j++;
20096
20097   /* Attach dwarf info to the first insn we generate.  */
20098   gcc_assert (insn != NULL_RTX);
20099   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20100   RTX_FRAME_RELATED_P (insn) = 1;
20101 }
20102
20103 /* Generate and emit an insn that we will recognize as a push_multi.
20104    Unfortunately, since this insn does not reflect very well the actual
20105    semantics of the operation, we need to annotate the insn for the benefit
20106    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20107    MASK for registers that should be annotated for DWARF2 frame unwind
20108    information.  */
20109 static rtx
20110 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20111 {
20112   int num_regs = 0;
20113   int num_dwarf_regs = 0;
20114   int i, j;
20115   rtx par;
20116   rtx dwarf;
20117   int dwarf_par_index;
20118   rtx tmp, reg;
20119
20120   /* We don't record the PC in the dwarf frame information.  */
20121   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20122
20123   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20124     {
20125       if (mask & (1 << i))
20126         num_regs++;
20127       if (dwarf_regs_mask & (1 << i))
20128         num_dwarf_regs++;
20129     }
20130
20131   gcc_assert (num_regs && num_regs <= 16);
20132   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20133
20134   /* For the body of the insn we are going to generate an UNSPEC in
20135      parallel with several USEs.  This allows the insn to be recognized
20136      by the push_multi pattern in the arm.md file.
20137
20138      The body of the insn looks something like this:
20139
20140        (parallel [
20141            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20142                                         (const_int:SI <num>)))
20143                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20144            (use (reg:SI XX))
20145            (use (reg:SI YY))
20146            ...
20147         ])
20148
20149      For the frame note however, we try to be more explicit and actually
20150      show each register being stored into the stack frame, plus a (single)
20151      decrement of the stack pointer.  We do it this way in order to be
20152      friendly to the stack unwinding code, which only wants to see a single
20153      stack decrement per instruction.  The RTL we generate for the note looks
20154      something like this:
20155
20156       (sequence [
20157            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20158            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20159            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20160            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20161            ...
20162         ])
20163
20164      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20165      instead we'd have a parallel expression detailing all
20166      the stores to the various memory addresses so that debug
20167      information is more up-to-date. Remember however while writing
20168      this to take care of the constraints with the push instruction.
20169
20170      Note also that this has to be taken care of for the VFP registers.
20171
20172      For more see PR43399.  */
20173
20174   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20175   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20176   dwarf_par_index = 1;
20177
20178   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20179     {
20180       if (mask & (1 << i))
20181         {
20182           reg = gen_rtx_REG (SImode, i);
20183
20184           XVECEXP (par, 0, 0)
20185             = gen_rtx_SET (gen_frame_mem
20186                            (BLKmode,
20187                             gen_rtx_PRE_MODIFY (Pmode,
20188                                                 stack_pointer_rtx,
20189                                                 plus_constant
20190                                                 (Pmode, stack_pointer_rtx,
20191                                                  -4 * num_regs))
20192                             ),
20193                            gen_rtx_UNSPEC (BLKmode,
20194                                            gen_rtvec (1, reg),
20195                                            UNSPEC_PUSH_MULT));
20196
20197           if (dwarf_regs_mask & (1 << i))
20198             {
20199               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20200                                  reg);
20201               RTX_FRAME_RELATED_P (tmp) = 1;
20202               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20203             }
20204
20205           break;
20206         }
20207     }
20208
20209   for (j = 1, i++; j < num_regs; i++)
20210     {
20211       if (mask & (1 << i))
20212         {
20213           reg = gen_rtx_REG (SImode, i);
20214
20215           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20216
20217           if (dwarf_regs_mask & (1 << i))
20218             {
20219               tmp
20220                 = gen_rtx_SET (gen_frame_mem
20221                                (SImode,
20222                                 plus_constant (Pmode, stack_pointer_rtx,
20223                                                4 * j)),
20224                                reg);
20225               RTX_FRAME_RELATED_P (tmp) = 1;
20226               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20227             }
20228
20229           j++;
20230         }
20231     }
20232
20233   par = emit_insn (par);
20234
20235   tmp = gen_rtx_SET (stack_pointer_rtx,
20236                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20237   RTX_FRAME_RELATED_P (tmp) = 1;
20238   XVECEXP (dwarf, 0, 0) = tmp;
20239
20240   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20241
20242   return par;
20243 }
20244
20245 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20246    SIZE is the offset to be adjusted.
20247    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20248 static void
20249 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20250 {
20251   rtx dwarf;
20252
20253   RTX_FRAME_RELATED_P (insn) = 1;
20254   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20255   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20256 }
20257
20258 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20259    SAVED_REGS_MASK shows which registers need to be restored.
20260
20261    Unfortunately, since this insn does not reflect very well the actual
20262    semantics of the operation, we need to annotate the insn for the benefit
20263    of DWARF2 frame unwind information.  */
20264 static void
20265 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20266 {
20267   int num_regs = 0;
20268   int i, j;
20269   rtx par;
20270   rtx dwarf = NULL_RTX;
20271   rtx tmp, reg;
20272   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20273   int offset_adj;
20274   int emit_update;
20275
20276   offset_adj = return_in_pc ? 1 : 0;
20277   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20278     if (saved_regs_mask & (1 << i))
20279       num_regs++;
20280
20281   gcc_assert (num_regs && num_regs <= 16);
20282
20283   /* If SP is in reglist, then we don't emit SP update insn.  */
20284   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20285
20286   /* The parallel needs to hold num_regs SETs
20287      and one SET for the stack update.  */
20288   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20289
20290   if (return_in_pc)
20291     XVECEXP (par, 0, 0) = ret_rtx;
20292
20293   if (emit_update)
20294     {
20295       /* Increment the stack pointer, based on there being
20296          num_regs 4-byte registers to restore.  */
20297       tmp = gen_rtx_SET (stack_pointer_rtx,
20298                          plus_constant (Pmode,
20299                                         stack_pointer_rtx,
20300                                         4 * num_regs));
20301       RTX_FRAME_RELATED_P (tmp) = 1;
20302       XVECEXP (par, 0, offset_adj) = tmp;
20303     }
20304
20305   /* Now restore every reg, which may include PC.  */
20306   for (j = 0, i = 0; j < num_regs; i++)
20307     if (saved_regs_mask & (1 << i))
20308       {
20309         reg = gen_rtx_REG (SImode, i);
20310         if ((num_regs == 1) && emit_update && !return_in_pc)
20311           {
20312             /* Emit single load with writeback.  */
20313             tmp = gen_frame_mem (SImode,
20314                                  gen_rtx_POST_INC (Pmode,
20315                                                    stack_pointer_rtx));
20316             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20317             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20318             return;
20319           }
20320
20321         tmp = gen_rtx_SET (reg,
20322                            gen_frame_mem
20323                            (SImode,
20324                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20325         RTX_FRAME_RELATED_P (tmp) = 1;
20326         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20327
20328         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20329            should not have PC, skip PC.  */
20330         if (i != PC_REGNUM)
20331           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20332
20333         j++;
20334       }
20335
20336   if (return_in_pc)
20337     par = emit_jump_insn (par);
20338   else
20339     par = emit_insn (par);
20340
20341   REG_NOTES (par) = dwarf;
20342   if (!return_in_pc)
20343     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20344                                  stack_pointer_rtx, stack_pointer_rtx);
20345 }
20346
20347 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20348    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20349
20350    Unfortunately, since this insn does not reflect very well the actual
20351    semantics of the operation, we need to annotate the insn for the benefit
20352    of DWARF2 frame unwind information.  */
20353 static void
20354 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20355 {
20356   int i, j;
20357   rtx par;
20358   rtx dwarf = NULL_RTX;
20359   rtx tmp, reg;
20360
20361   gcc_assert (num_regs && num_regs <= 32);
20362
20363     /* Workaround ARM10 VFPr1 bug.  */
20364   if (num_regs == 2 && !arm_arch6)
20365     {
20366       if (first_reg == 15)
20367         first_reg--;
20368
20369       num_regs++;
20370     }
20371
20372   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20373      there could be up to 32 D-registers to restore.
20374      If there are more than 16 D-registers, make two recursive calls,
20375      each of which emits one pop_multi instruction.  */
20376   if (num_regs > 16)
20377     {
20378       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20379       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20380       return;
20381     }
20382
20383   /* The parallel needs to hold num_regs SETs
20384      and one SET for the stack update.  */
20385   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20386
20387   /* Increment the stack pointer, based on there being
20388      num_regs 8-byte registers to restore.  */
20389   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20390   RTX_FRAME_RELATED_P (tmp) = 1;
20391   XVECEXP (par, 0, 0) = tmp;
20392
20393   /* Now show every reg that will be restored, using a SET for each.  */
20394   for (j = 0, i=first_reg; j < num_regs; i += 2)
20395     {
20396       reg = gen_rtx_REG (DFmode, i);
20397
20398       tmp = gen_rtx_SET (reg,
20399                          gen_frame_mem
20400                          (DFmode,
20401                           plus_constant (Pmode, base_reg, 8 * j)));
20402       RTX_FRAME_RELATED_P (tmp) = 1;
20403       XVECEXP (par, 0, j + 1) = tmp;
20404
20405       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20406
20407       j++;
20408     }
20409
20410   par = emit_insn (par);
20411   REG_NOTES (par) = dwarf;
20412
20413   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20414   if (REGNO (base_reg) == IP_REGNUM)
20415     {
20416       RTX_FRAME_RELATED_P (par) = 1;
20417       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20418     }
20419   else
20420     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20421                                  base_reg, base_reg);
20422 }
20423
20424 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20425    number of registers are being popped, multiple LDRD patterns are created for
20426    all register pairs.  If odd number of registers are popped, last register is
20427    loaded by using LDR pattern.  */
20428 static void
20429 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20430 {
20431   int num_regs = 0;
20432   int i, j;
20433   rtx par = NULL_RTX;
20434   rtx dwarf = NULL_RTX;
20435   rtx tmp, reg, tmp1;
20436   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20437
20438   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20439     if (saved_regs_mask & (1 << i))
20440       num_regs++;
20441
20442   gcc_assert (num_regs && num_regs <= 16);
20443
20444   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20445      to be popped.  So, if num_regs is even, now it will become odd,
20446      and we can generate pop with PC.  If num_regs is odd, it will be
20447      even now, and ldr with return can be generated for PC.  */
20448   if (return_in_pc)
20449     num_regs--;
20450
20451   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20452
20453   /* Var j iterates over all the registers to gather all the registers in
20454      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20455      A PARALLEL RTX of register-pair is created here, so that pattern for
20456      LDRD can be matched.  As PC is always last register to be popped, and
20457      we have already decremented num_regs if PC, we don't have to worry
20458      about PC in this loop.  */
20459   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20460     if (saved_regs_mask & (1 << j))
20461       {
20462         /* Create RTX for memory load.  */
20463         reg = gen_rtx_REG (SImode, j);
20464         tmp = gen_rtx_SET (reg,
20465                            gen_frame_mem (SImode,
20466                                plus_constant (Pmode,
20467                                               stack_pointer_rtx, 4 * i)));
20468         RTX_FRAME_RELATED_P (tmp) = 1;
20469
20470         if (i % 2 == 0)
20471           {
20472             /* When saved-register index (i) is even, the RTX to be emitted is
20473                yet to be created.  Hence create it first.  The LDRD pattern we
20474                are generating is :
20475                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20476                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20477                where target registers need not be consecutive.  */
20478             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20479             dwarf = NULL_RTX;
20480           }
20481
20482         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20483            added as 0th element and if i is odd, reg_i is added as 1st element
20484            of LDRD pattern shown above.  */
20485         XVECEXP (par, 0, (i % 2)) = tmp;
20486         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20487
20488         if ((i % 2) == 1)
20489           {
20490             /* When saved-register index (i) is odd, RTXs for both the registers
20491                to be loaded are generated in above given LDRD pattern, and the
20492                pattern can be emitted now.  */
20493             par = emit_insn (par);
20494             REG_NOTES (par) = dwarf;
20495             RTX_FRAME_RELATED_P (par) = 1;
20496           }
20497
20498         i++;
20499       }
20500
20501   /* If the number of registers pushed is odd AND return_in_pc is false OR
20502      number of registers are even AND return_in_pc is true, last register is
20503      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20504      then LDR with post increment.  */
20505
20506   /* Increment the stack pointer, based on there being
20507      num_regs 4-byte registers to restore.  */
20508   tmp = gen_rtx_SET (stack_pointer_rtx,
20509                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20510   RTX_FRAME_RELATED_P (tmp) = 1;
20511   tmp = emit_insn (tmp);
20512   if (!return_in_pc)
20513     {
20514       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20515                                    stack_pointer_rtx, stack_pointer_rtx);
20516     }
20517
20518   dwarf = NULL_RTX;
20519
20520   if (((num_regs % 2) == 1 && !return_in_pc)
20521       || ((num_regs % 2) == 0 && return_in_pc))
20522     {
20523       /* Scan for the single register to be popped.  Skip until the saved
20524          register is found.  */
20525       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20526
20527       /* Gen LDR with post increment here.  */
20528       tmp1 = gen_rtx_MEM (SImode,
20529                           gen_rtx_POST_INC (SImode,
20530                                             stack_pointer_rtx));
20531       set_mem_alias_set (tmp1, get_frame_alias_set ());
20532
20533       reg = gen_rtx_REG (SImode, j);
20534       tmp = gen_rtx_SET (reg, tmp1);
20535       RTX_FRAME_RELATED_P (tmp) = 1;
20536       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20537
20538       if (return_in_pc)
20539         {
20540           /* If return_in_pc, j must be PC_REGNUM.  */
20541           gcc_assert (j == PC_REGNUM);
20542           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20543           XVECEXP (par, 0, 0) = ret_rtx;
20544           XVECEXP (par, 0, 1) = tmp;
20545           par = emit_jump_insn (par);
20546         }
20547       else
20548         {
20549           par = emit_insn (tmp);
20550           REG_NOTES (par) = dwarf;
20551           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20552                                        stack_pointer_rtx, stack_pointer_rtx);
20553         }
20554
20555     }
20556   else if ((num_regs % 2) == 1 && return_in_pc)
20557     {
20558       /* There are 2 registers to be popped.  So, generate the pattern
20559          pop_multiple_with_stack_update_and_return to pop in PC.  */
20560       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20561     }
20562
20563   return;
20564 }
20565
20566 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20567    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20568    offset addressing and then generates one separate stack udpate. This provides
20569    more scheduling freedom, compared to writeback on every load.  However,
20570    if the function returns using load into PC directly
20571    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20572    before the last load.  TODO: Add a peephole optimization to recognize
20573    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20574    peephole optimization to merge the load at stack-offset zero
20575    with the stack update instruction using load with writeback
20576    in post-index addressing mode.  */
20577 static void
20578 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20579 {
20580   int j = 0;
20581   int offset = 0;
20582   rtx par = NULL_RTX;
20583   rtx dwarf = NULL_RTX;
20584   rtx tmp, mem;
20585
20586   /* Restore saved registers.  */
20587   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20588   j = 0;
20589   while (j <= LAST_ARM_REGNUM)
20590     if (saved_regs_mask & (1 << j))
20591       {
20592         if ((j % 2) == 0
20593             && (saved_regs_mask & (1 << (j + 1)))
20594             && (j + 1) != PC_REGNUM)
20595           {
20596             /* Current register and next register form register pair for which
20597                LDRD can be generated. PC is always the last register popped, and
20598                we handle it separately.  */
20599             if (offset > 0)
20600               mem = gen_frame_mem (DImode,
20601                                    plus_constant (Pmode,
20602                                                   stack_pointer_rtx,
20603                                                   offset));
20604             else
20605               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20606
20607             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20608             tmp = emit_insn (tmp);
20609             RTX_FRAME_RELATED_P (tmp) = 1;
20610
20611             /* Generate dwarf info.  */
20612
20613             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20614                                     gen_rtx_REG (SImode, j),
20615                                     NULL_RTX);
20616             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20617                                     gen_rtx_REG (SImode, j + 1),
20618                                     dwarf);
20619
20620             REG_NOTES (tmp) = dwarf;
20621
20622             offset += 8;
20623             j += 2;
20624           }
20625         else if (j != PC_REGNUM)
20626           {
20627             /* Emit a single word load.  */
20628             if (offset > 0)
20629               mem = gen_frame_mem (SImode,
20630                                    plus_constant (Pmode,
20631                                                   stack_pointer_rtx,
20632                                                   offset));
20633             else
20634               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20635
20636             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20637             tmp = emit_insn (tmp);
20638             RTX_FRAME_RELATED_P (tmp) = 1;
20639
20640             /* Generate dwarf info.  */
20641             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20642                                               gen_rtx_REG (SImode, j),
20643                                               NULL_RTX);
20644
20645             offset += 4;
20646             j += 1;
20647           }
20648         else /* j == PC_REGNUM */
20649           j++;
20650       }
20651     else
20652       j++;
20653
20654   /* Update the stack.  */
20655   if (offset > 0)
20656     {
20657       tmp = gen_rtx_SET (stack_pointer_rtx,
20658                          plus_constant (Pmode,
20659                                         stack_pointer_rtx,
20660                                         offset));
20661       tmp = emit_insn (tmp);
20662       arm_add_cfa_adjust_cfa_note (tmp, offset,
20663                                    stack_pointer_rtx, stack_pointer_rtx);
20664       offset = 0;
20665     }
20666
20667   if (saved_regs_mask & (1 << PC_REGNUM))
20668     {
20669       /* Only PC is to be popped.  */
20670       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20671       XVECEXP (par, 0, 0) = ret_rtx;
20672       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20673                          gen_frame_mem (SImode,
20674                                         gen_rtx_POST_INC (SImode,
20675                                                           stack_pointer_rtx)));
20676       RTX_FRAME_RELATED_P (tmp) = 1;
20677       XVECEXP (par, 0, 1) = tmp;
20678       par = emit_jump_insn (par);
20679
20680       /* Generate dwarf info.  */
20681       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20682                               gen_rtx_REG (SImode, PC_REGNUM),
20683                               NULL_RTX);
20684       REG_NOTES (par) = dwarf;
20685       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20686                                    stack_pointer_rtx, stack_pointer_rtx);
20687     }
20688 }
20689
20690 /* Calculate the size of the return value that is passed in registers.  */
20691 static unsigned
20692 arm_size_return_regs (void)
20693 {
20694   machine_mode mode;
20695
20696   if (crtl->return_rtx != 0)
20697     mode = GET_MODE (crtl->return_rtx);
20698   else
20699     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20700
20701   return GET_MODE_SIZE (mode);
20702 }
20703
20704 /* Return true if the current function needs to save/restore LR.  */
20705 static bool
20706 thumb_force_lr_save (void)
20707 {
20708   return !cfun->machine->lr_save_eliminated
20709          && (!crtl->is_leaf
20710              || thumb_far_jump_used_p ()
20711              || df_regs_ever_live_p (LR_REGNUM));
20712 }
20713
20714 /* We do not know if r3 will be available because
20715    we do have an indirect tailcall happening in this
20716    particular case.  */
20717 static bool
20718 is_indirect_tailcall_p (rtx call)
20719 {
20720   rtx pat = PATTERN (call);
20721
20722   /* Indirect tail call.  */
20723   pat = XVECEXP (pat, 0, 0);
20724   if (GET_CODE (pat) == SET)
20725     pat = SET_SRC (pat);
20726
20727   pat = XEXP (XEXP (pat, 0), 0);
20728   return REG_P (pat);
20729 }
20730
20731 /* Return true if r3 is used by any of the tail call insns in the
20732    current function.  */
20733 static bool
20734 any_sibcall_could_use_r3 (void)
20735 {
20736   edge_iterator ei;
20737   edge e;
20738
20739   if (!crtl->tail_call_emit)
20740     return false;
20741   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20742     if (e->flags & EDGE_SIBCALL)
20743       {
20744         rtx_insn *call = BB_END (e->src);
20745         if (!CALL_P (call))
20746           call = prev_nonnote_nondebug_insn (call);
20747         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20748         if (find_regno_fusage (call, USE, 3)
20749             || is_indirect_tailcall_p (call))
20750           return true;
20751       }
20752   return false;
20753 }
20754
20755
20756 /* Compute the distance from register FROM to register TO.
20757    These can be the arg pointer (26), the soft frame pointer (25),
20758    the stack pointer (13) or the hard frame pointer (11).
20759    In thumb mode r7 is used as the soft frame pointer, if needed.
20760    Typical stack layout looks like this:
20761
20762        old stack pointer -> |    |
20763                              ----
20764                             |    | \
20765                             |    |   saved arguments for
20766                             |    |   vararg functions
20767                             |    | /
20768                               --
20769    hard FP & arg pointer -> |    | \
20770                             |    |   stack
20771                             |    |   frame
20772                             |    | /
20773                               --
20774                             |    | \
20775                             |    |   call saved
20776                             |    |   registers
20777       soft frame pointer -> |    | /
20778                               --
20779                             |    | \
20780                             |    |   local
20781                             |    |   variables
20782      locals base pointer -> |    | /
20783                               --
20784                             |    | \
20785                             |    |   outgoing
20786                             |    |   arguments
20787    current stack pointer -> |    | /
20788                               --
20789
20790   For a given function some or all of these stack components
20791   may not be needed, giving rise to the possibility of
20792   eliminating some of the registers.
20793
20794   The values returned by this function must reflect the behavior
20795   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20796
20797   The sign of the number returned reflects the direction of stack
20798   growth, so the values are positive for all eliminations except
20799   from the soft frame pointer to the hard frame pointer.
20800
20801   SFP may point just inside the local variables block to ensure correct
20802   alignment.  */
20803
20804
20805 /* Return cached stack offsets.  */
20806
20807 static arm_stack_offsets *
20808 arm_get_frame_offsets (void)
20809 {
20810   struct arm_stack_offsets *offsets;
20811
20812   offsets = &cfun->machine->stack_offsets;
20813
20814   return offsets;
20815 }
20816
20817
20818 /* Calculate stack offsets.  These are used to calculate register elimination
20819    offsets and in prologue/epilogue code.  Also calculates which registers
20820    should be saved.  */
20821
20822 static void
20823 arm_compute_frame_layout (void)
20824 {
20825   struct arm_stack_offsets *offsets;
20826   unsigned long func_type;
20827   int saved;
20828   int core_saved;
20829   HOST_WIDE_INT frame_size;
20830   int i;
20831
20832   offsets = &cfun->machine->stack_offsets;
20833
20834   /* Initially this is the size of the local variables.  It will translated
20835      into an offset once we have determined the size of preceding data.  */
20836   frame_size = ROUND_UP_WORD (get_frame_size ());
20837
20838   /* Space for variadic functions.  */
20839   offsets->saved_args = crtl->args.pretend_args_size;
20840
20841   /* In Thumb mode this is incorrect, but never used.  */
20842   offsets->frame
20843     = (offsets->saved_args
20844        + arm_compute_static_chain_stack_bytes ()
20845        + (frame_pointer_needed ? 4 : 0));
20846
20847   if (TARGET_32BIT)
20848     {
20849       unsigned int regno;
20850
20851       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20852       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20853       saved = core_saved;
20854
20855       /* We know that SP will be doubleword aligned on entry, and we must
20856          preserve that condition at any subroutine call.  We also require the
20857          soft frame pointer to be doubleword aligned.  */
20858
20859       if (TARGET_REALLY_IWMMXT)
20860         {
20861           /* Check for the call-saved iWMMXt registers.  */
20862           for (regno = FIRST_IWMMXT_REGNUM;
20863                regno <= LAST_IWMMXT_REGNUM;
20864                regno++)
20865             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20866               saved += 8;
20867         }
20868
20869       func_type = arm_current_func_type ();
20870       /* Space for saved VFP registers.  */
20871       if (! IS_VOLATILE (func_type)
20872           && TARGET_HARD_FLOAT)
20873         saved += arm_get_vfp_saved_size ();
20874     }
20875   else /* TARGET_THUMB1 */
20876     {
20877       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20878       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20879       saved = core_saved;
20880       if (TARGET_BACKTRACE)
20881         saved += 16;
20882     }
20883
20884   /* Saved registers include the stack frame.  */
20885   offsets->saved_regs
20886     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20887   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20888
20889   /* A leaf function does not need any stack alignment if it has nothing
20890      on the stack.  */
20891   if (crtl->is_leaf && frame_size == 0
20892       /* However if it calls alloca(), we have a dynamically allocated
20893          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20894       && ! cfun->calls_alloca)
20895     {
20896       offsets->outgoing_args = offsets->soft_frame;
20897       offsets->locals_base = offsets->soft_frame;
20898       return;
20899     }
20900
20901   /* Ensure SFP has the correct alignment.  */
20902   if (ARM_DOUBLEWORD_ALIGN
20903       && (offsets->soft_frame & 7))
20904     {
20905       offsets->soft_frame += 4;
20906       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20907          when there is a stack frame as the alignment will be rolled into
20908          the normal stack adjustment.  */
20909       if (frame_size + crtl->outgoing_args_size == 0)
20910         {
20911           int reg = -1;
20912
20913           /* Register r3 is caller-saved.  Normally it does not need to be
20914              saved on entry by the prologue.  However if we choose to save
20915              it for padding then we may confuse the compiler into thinking
20916              a prologue sequence is required when in fact it is not.  This
20917              will occur when shrink-wrapping if r3 is used as a scratch
20918              register and there are no other callee-saved writes.
20919
20920              This situation can be avoided when other callee-saved registers
20921              are available and r3 is not mandatory if we choose a callee-saved
20922              register for padding.  */
20923           bool prefer_callee_reg_p = false;
20924
20925           /* If it is safe to use r3, then do so.  This sometimes
20926              generates better code on Thumb-2 by avoiding the need to
20927              use 32-bit push/pop instructions.  */
20928           if (! any_sibcall_could_use_r3 ()
20929               && arm_size_return_regs () <= 12
20930               && (offsets->saved_regs_mask & (1 << 3)) == 0
20931               && (TARGET_THUMB2
20932                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20933             {
20934               reg = 3;
20935               if (!TARGET_THUMB2)
20936                 prefer_callee_reg_p = true;
20937             }
20938           if (reg == -1
20939               || prefer_callee_reg_p)
20940             {
20941               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20942                 {
20943                   /* Avoid fixed registers; they may be changed at
20944                      arbitrary times so it's unsafe to restore them
20945                      during the epilogue.  */
20946                   if (!fixed_regs[i]
20947                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20948                     {
20949                       reg = i;
20950                       break;
20951                     }
20952                 }
20953             }
20954
20955           if (reg != -1)
20956             {
20957               offsets->saved_regs += 4;
20958               offsets->saved_regs_mask |= (1 << reg);
20959             }
20960         }
20961     }
20962
20963   offsets->locals_base = offsets->soft_frame + frame_size;
20964   offsets->outgoing_args = (offsets->locals_base
20965                             + crtl->outgoing_args_size);
20966
20967   if (ARM_DOUBLEWORD_ALIGN)
20968     {
20969       /* Ensure SP remains doubleword aligned.  */
20970       if (offsets->outgoing_args & 7)
20971         offsets->outgoing_args += 4;
20972       gcc_assert (!(offsets->outgoing_args & 7));
20973     }
20974 }
20975
20976
20977 /* Calculate the relative offsets for the different stack pointers.  Positive
20978    offsets are in the direction of stack growth.  */
20979
20980 HOST_WIDE_INT
20981 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20982 {
20983   arm_stack_offsets *offsets;
20984
20985   offsets = arm_get_frame_offsets ();
20986
20987   /* OK, now we have enough information to compute the distances.
20988      There must be an entry in these switch tables for each pair
20989      of registers in ELIMINABLE_REGS, even if some of the entries
20990      seem to be redundant or useless.  */
20991   switch (from)
20992     {
20993     case ARG_POINTER_REGNUM:
20994       switch (to)
20995         {
20996         case THUMB_HARD_FRAME_POINTER_REGNUM:
20997           return 0;
20998
20999         case FRAME_POINTER_REGNUM:
21000           /* This is the reverse of the soft frame pointer
21001              to hard frame pointer elimination below.  */
21002           return offsets->soft_frame - offsets->saved_args;
21003
21004         case ARM_HARD_FRAME_POINTER_REGNUM:
21005           /* This is only non-zero in the case where the static chain register
21006              is stored above the frame.  */
21007           return offsets->frame - offsets->saved_args - 4;
21008
21009         case STACK_POINTER_REGNUM:
21010           /* If nothing has been pushed on the stack at all
21011              then this will return -4.  This *is* correct!  */
21012           return offsets->outgoing_args - (offsets->saved_args + 4);
21013
21014         default:
21015           gcc_unreachable ();
21016         }
21017       gcc_unreachable ();
21018
21019     case FRAME_POINTER_REGNUM:
21020       switch (to)
21021         {
21022         case THUMB_HARD_FRAME_POINTER_REGNUM:
21023           return 0;
21024
21025         case ARM_HARD_FRAME_POINTER_REGNUM:
21026           /* The hard frame pointer points to the top entry in the
21027              stack frame.  The soft frame pointer to the bottom entry
21028              in the stack frame.  If there is no stack frame at all,
21029              then they are identical.  */
21030
21031           return offsets->frame - offsets->soft_frame;
21032
21033         case STACK_POINTER_REGNUM:
21034           return offsets->outgoing_args - offsets->soft_frame;
21035
21036         default:
21037           gcc_unreachable ();
21038         }
21039       gcc_unreachable ();
21040
21041     default:
21042       /* You cannot eliminate from the stack pointer.
21043          In theory you could eliminate from the hard frame
21044          pointer to the stack pointer, but this will never
21045          happen, since if a stack frame is not needed the
21046          hard frame pointer will never be used.  */
21047       gcc_unreachable ();
21048     }
21049 }
21050
21051 /* Given FROM and TO register numbers, say whether this elimination is
21052    allowed.  Frame pointer elimination is automatically handled.
21053
21054    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21055    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21056    pointer, we must eliminate FRAME_POINTER_REGNUM into
21057    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21058    ARG_POINTER_REGNUM.  */
21059
21060 bool
21061 arm_can_eliminate (const int from, const int to)
21062 {
21063   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21064           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21065           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21066           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21067            true);
21068 }
21069
21070 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21071    number of bytes pushed.  */
21072
21073 static int
21074 arm_save_coproc_regs(void)
21075 {
21076   int saved_size = 0;
21077   unsigned reg;
21078   unsigned start_reg;
21079   rtx insn;
21080
21081   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21082     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21083       {
21084         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21085         insn = gen_rtx_MEM (V2SImode, insn);
21086         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21087         RTX_FRAME_RELATED_P (insn) = 1;
21088         saved_size += 8;
21089       }
21090
21091   if (TARGET_HARD_FLOAT)
21092     {
21093       start_reg = FIRST_VFP_REGNUM;
21094
21095       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21096         {
21097           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21098               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21099             {
21100               if (start_reg != reg)
21101                 saved_size += vfp_emit_fstmd (start_reg,
21102                                               (reg - start_reg) / 2);
21103               start_reg = reg + 2;
21104             }
21105         }
21106       if (start_reg != reg)
21107         saved_size += vfp_emit_fstmd (start_reg,
21108                                       (reg - start_reg) / 2);
21109     }
21110   return saved_size;
21111 }
21112
21113
21114 /* Set the Thumb frame pointer from the stack pointer.  */
21115
21116 static void
21117 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21118 {
21119   HOST_WIDE_INT amount;
21120   rtx insn, dwarf;
21121
21122   amount = offsets->outgoing_args - offsets->locals_base;
21123   if (amount < 1024)
21124     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21125                                   stack_pointer_rtx, GEN_INT (amount)));
21126   else
21127     {
21128       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21129       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21130          expects the first two operands to be the same.  */
21131       if (TARGET_THUMB2)
21132         {
21133           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21134                                         stack_pointer_rtx,
21135                                         hard_frame_pointer_rtx));
21136         }
21137       else
21138         {
21139           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21140                                         hard_frame_pointer_rtx,
21141                                         stack_pointer_rtx));
21142         }
21143       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21144                            plus_constant (Pmode, stack_pointer_rtx, amount));
21145       RTX_FRAME_RELATED_P (dwarf) = 1;
21146       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21147     }
21148
21149   RTX_FRAME_RELATED_P (insn) = 1;
21150 }
21151
21152 struct scratch_reg {
21153   rtx reg;
21154   bool saved;
21155 };
21156
21157 /* Return a short-lived scratch register for use as a 2nd scratch register on
21158    function entry after the registers are saved in the prologue.  This register
21159    must be released by means of release_scratch_register_on_entry.  IP is not
21160    considered since it is always used as the 1st scratch register if available.
21161
21162    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21163    mask of live registers.  */
21164
21165 static void
21166 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21167                                unsigned long live_regs)
21168 {
21169   int regno = -1;
21170
21171   sr->saved = false;
21172
21173   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21174     regno = LR_REGNUM;
21175   else
21176     {
21177       unsigned int i;
21178
21179       for (i = 4; i < 11; i++)
21180         if (regno1 != i && (live_regs & (1 << i)) != 0)
21181           {
21182             regno = i;
21183             break;
21184           }
21185
21186       if (regno < 0)
21187         {
21188           /* If IP is used as the 1st scratch register for a nested function,
21189              then either r3 wasn't available or is used to preserve IP.  */
21190           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21191             regno1 = 3;
21192           regno = (regno1 == 3 ? 2 : 3);
21193           sr->saved
21194             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21195                                regno);
21196         }
21197     }
21198
21199   sr->reg = gen_rtx_REG (SImode, regno);
21200   if (sr->saved)
21201     {
21202       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21203       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21204       rtx x = gen_rtx_SET (stack_pointer_rtx,
21205                            plus_constant (Pmode, stack_pointer_rtx, -4));
21206       RTX_FRAME_RELATED_P (insn) = 1;
21207       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21208     }
21209 }
21210
21211 /* Release a scratch register obtained from the preceding function.  */
21212
21213 static void
21214 release_scratch_register_on_entry (struct scratch_reg *sr)
21215 {
21216   if (sr->saved)
21217     {
21218       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21219       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21220       rtx x = gen_rtx_SET (stack_pointer_rtx,
21221                            plus_constant (Pmode, stack_pointer_rtx, 4));
21222       RTX_FRAME_RELATED_P (insn) = 1;
21223       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21224     }
21225 }
21226
21227 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21228
21229 #if PROBE_INTERVAL > 4096
21230 #error Cannot use indexed addressing mode for stack probing
21231 #endif
21232
21233 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21234    inclusive.  These are offsets from the current stack pointer.  REGNO1
21235    is the index number of the 1st scratch register and LIVE_REGS is the
21236    mask of live registers.  */
21237
21238 static void
21239 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21240                             unsigned int regno1, unsigned long live_regs)
21241 {
21242   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21243
21244   /* See if we have a constant small number of probes to generate.  If so,
21245      that's the easy case.  */
21246   if (size <= PROBE_INTERVAL)
21247     {
21248       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21249       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21250       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21251     }
21252
21253   /* The run-time loop is made up of 10 insns in the generic case while the
21254      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21255   else if (size <= 5 * PROBE_INTERVAL)
21256     {
21257       HOST_WIDE_INT i, rem;
21258
21259       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21260       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21261       emit_stack_probe (reg1);
21262
21263       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21264          it exceeds SIZE.  If only two probes are needed, this will not
21265          generate any code.  Then probe at FIRST + SIZE.  */
21266       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21267         {
21268           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21269           emit_stack_probe (reg1);
21270         }
21271
21272       rem = size - (i - PROBE_INTERVAL);
21273       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21274         {
21275           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21276           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21277         }
21278       else
21279         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21280     }
21281
21282   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21283      extra careful with variables wrapping around because we might be at
21284      the very top (or the very bottom) of the address space and we have
21285      to be able to handle this case properly; in particular, we use an
21286      equality test for the loop condition.  */
21287   else
21288     {
21289       HOST_WIDE_INT rounded_size;
21290       struct scratch_reg sr;
21291
21292       get_scratch_register_on_entry (&sr, regno1, live_regs);
21293
21294       emit_move_insn (reg1, GEN_INT (first));
21295
21296
21297       /* Step 1: round SIZE to the previous multiple of the interval.  */
21298
21299       rounded_size = size & -PROBE_INTERVAL;
21300       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21301
21302
21303       /* Step 2: compute initial and final value of the loop counter.  */
21304
21305       /* TEST_ADDR = SP + FIRST.  */
21306       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21307
21308       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21309       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21310
21311
21312       /* Step 3: the loop
21313
21314          do
21315            {
21316              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21317              probe at TEST_ADDR
21318            }
21319          while (TEST_ADDR != LAST_ADDR)
21320
21321          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21322          until it is equal to ROUNDED_SIZE.  */
21323
21324       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21325
21326
21327       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21328          that SIZE is equal to ROUNDED_SIZE.  */
21329
21330       if (size != rounded_size)
21331         {
21332           HOST_WIDE_INT rem = size - rounded_size;
21333
21334           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21335             {
21336               emit_set_insn (sr.reg,
21337                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21338               emit_stack_probe (plus_constant (Pmode, sr.reg,
21339                                                PROBE_INTERVAL - rem));
21340             }
21341           else
21342             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21343         }
21344
21345       release_scratch_register_on_entry (&sr);
21346     }
21347
21348   /* Make sure nothing is scheduled before we are done.  */
21349   emit_insn (gen_blockage ());
21350 }
21351
21352 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21353    absolute addresses.  */
21354
21355 const char *
21356 output_probe_stack_range (rtx reg1, rtx reg2)
21357 {
21358   static int labelno = 0;
21359   char loop_lab[32];
21360   rtx xops[2];
21361
21362   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21363
21364   /* Loop.  */
21365   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21366
21367   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21368   xops[0] = reg1;
21369   xops[1] = GEN_INT (PROBE_INTERVAL);
21370   output_asm_insn ("sub\t%0, %0, %1", xops);
21371
21372   /* Probe at TEST_ADDR.  */
21373   output_asm_insn ("str\tr0, [%0, #0]", xops);
21374
21375   /* Test if TEST_ADDR == LAST_ADDR.  */
21376   xops[1] = reg2;
21377   output_asm_insn ("cmp\t%0, %1", xops);
21378
21379   /* Branch.  */
21380   fputs ("\tbne\t", asm_out_file);
21381   assemble_name_raw (asm_out_file, loop_lab);
21382   fputc ('\n', asm_out_file);
21383
21384   return "";
21385 }
21386
21387 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21388    function.  */
21389 void
21390 arm_expand_prologue (void)
21391 {
21392   rtx amount;
21393   rtx insn;
21394   rtx ip_rtx;
21395   unsigned long live_regs_mask;
21396   unsigned long func_type;
21397   int fp_offset = 0;
21398   int saved_pretend_args = 0;
21399   int saved_regs = 0;
21400   unsigned HOST_WIDE_INT args_to_push;
21401   HOST_WIDE_INT size;
21402   arm_stack_offsets *offsets;
21403   bool clobber_ip;
21404
21405   func_type = arm_current_func_type ();
21406
21407   /* Naked functions don't have prologues.  */
21408   if (IS_NAKED (func_type))
21409     {
21410       if (flag_stack_usage_info)
21411         current_function_static_stack_size = 0;
21412       return;
21413     }
21414
21415   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21416   args_to_push = crtl->args.pretend_args_size;
21417
21418   /* Compute which register we will have to save onto the stack.  */
21419   offsets = arm_get_frame_offsets ();
21420   live_regs_mask = offsets->saved_regs_mask;
21421
21422   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21423
21424   if (IS_STACKALIGN (func_type))
21425     {
21426       rtx r0, r1;
21427
21428       /* Handle a word-aligned stack pointer.  We generate the following:
21429
21430           mov r0, sp
21431           bic r1, r0, #7
21432           mov sp, r1
21433           <save and restore r0 in normal prologue/epilogue>
21434           mov sp, r0
21435           bx lr
21436
21437          The unwinder doesn't need to know about the stack realignment.
21438          Just tell it we saved SP in r0.  */
21439       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21440
21441       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21442       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21443
21444       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21445       RTX_FRAME_RELATED_P (insn) = 1;
21446       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21447
21448       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21449
21450       /* ??? The CFA changes here, which may cause GDB to conclude that it
21451          has entered a different function.  That said, the unwind info is
21452          correct, individually, before and after this instruction because
21453          we've described the save of SP, which will override the default
21454          handling of SP as restoring from the CFA.  */
21455       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21456     }
21457
21458   /* The static chain register is the same as the IP register.  If it is
21459      clobbered when creating the frame, we need to save and restore it.  */
21460   clobber_ip = IS_NESTED (func_type)
21461                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21462                    || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21463                        && !df_regs_ever_live_p (LR_REGNUM)
21464                        && arm_r3_live_at_start_p ()));
21465
21466   /* Find somewhere to store IP whilst the frame is being created.
21467      We try the following places in order:
21468
21469        1. The last argument register r3 if it is available.
21470        2. A slot on the stack above the frame if there are no
21471           arguments to push onto the stack.
21472        3. Register r3 again, after pushing the argument registers
21473           onto the stack, if this is a varargs function.
21474        4. The last slot on the stack created for the arguments to
21475           push, if this isn't a varargs function.
21476
21477      Note - we only need to tell the dwarf2 backend about the SP
21478      adjustment in the second variant; the static chain register
21479      doesn't need to be unwound, as it doesn't contain a value
21480      inherited from the caller.  */
21481   if (clobber_ip)
21482     {
21483       if (!arm_r3_live_at_start_p ())
21484         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21485       else if (args_to_push == 0)
21486         {
21487           rtx addr, dwarf;
21488
21489           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21490           saved_regs += 4;
21491
21492           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21493           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21494           fp_offset = 4;
21495
21496           /* Just tell the dwarf backend that we adjusted SP.  */
21497           dwarf = gen_rtx_SET (stack_pointer_rtx,
21498                                plus_constant (Pmode, stack_pointer_rtx,
21499                                               -fp_offset));
21500           RTX_FRAME_RELATED_P (insn) = 1;
21501           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21502         }
21503       else
21504         {
21505           /* Store the args on the stack.  */
21506           if (cfun->machine->uses_anonymous_args)
21507             {
21508               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21509                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21510               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21511               saved_pretend_args = 1;
21512             }
21513           else
21514             {
21515               rtx addr, dwarf;
21516
21517               if (args_to_push == 4)
21518                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21519               else
21520                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21521                                            plus_constant (Pmode,
21522                                                           stack_pointer_rtx,
21523                                                           -args_to_push));
21524
21525               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21526
21527               /* Just tell the dwarf backend that we adjusted SP.  */
21528               dwarf = gen_rtx_SET (stack_pointer_rtx,
21529                                    plus_constant (Pmode, stack_pointer_rtx,
21530                                                   -args_to_push));
21531               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21532             }
21533
21534           RTX_FRAME_RELATED_P (insn) = 1;
21535           fp_offset = args_to_push;
21536           args_to_push = 0;
21537         }
21538     }
21539
21540   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21541     {
21542       if (IS_INTERRUPT (func_type))
21543         {
21544           /* Interrupt functions must not corrupt any registers.
21545              Creating a frame pointer however, corrupts the IP
21546              register, so we must push it first.  */
21547           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21548
21549           /* Do not set RTX_FRAME_RELATED_P on this insn.
21550              The dwarf stack unwinding code only wants to see one
21551              stack decrement per function, and this is not it.  If
21552              this instruction is labeled as being part of the frame
21553              creation sequence then dwarf2out_frame_debug_expr will
21554              die when it encounters the assignment of IP to FP
21555              later on, since the use of SP here establishes SP as
21556              the CFA register and not IP.
21557
21558              Anyway this instruction is not really part of the stack
21559              frame creation although it is part of the prologue.  */
21560         }
21561
21562       insn = emit_set_insn (ip_rtx,
21563                             plus_constant (Pmode, stack_pointer_rtx,
21564                                            fp_offset));
21565       RTX_FRAME_RELATED_P (insn) = 1;
21566     }
21567
21568   if (args_to_push)
21569     {
21570       /* Push the argument registers, or reserve space for them.  */
21571       if (cfun->machine->uses_anonymous_args)
21572         insn = emit_multi_reg_push
21573           ((0xf0 >> (args_to_push / 4)) & 0xf,
21574            (0xf0 >> (args_to_push / 4)) & 0xf);
21575       else
21576         insn = emit_insn
21577           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21578                        GEN_INT (- args_to_push)));
21579       RTX_FRAME_RELATED_P (insn) = 1;
21580     }
21581
21582   /* If this is an interrupt service routine, and the link register
21583      is going to be pushed, and we're not generating extra
21584      push of IP (needed when frame is needed and frame layout if apcs),
21585      subtracting four from LR now will mean that the function return
21586      can be done with a single instruction.  */
21587   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21588       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21589       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21590       && TARGET_ARM)
21591     {
21592       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21593
21594       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21595     }
21596
21597   if (live_regs_mask)
21598     {
21599       unsigned long dwarf_regs_mask = live_regs_mask;
21600
21601       saved_regs += bit_count (live_regs_mask) * 4;
21602       if (optimize_size && !frame_pointer_needed
21603           && saved_regs == offsets->saved_regs - offsets->saved_args)
21604         {
21605           /* If no coprocessor registers are being pushed and we don't have
21606              to worry about a frame pointer then push extra registers to
21607              create the stack frame.  This is done in a way that does not
21608              alter the frame layout, so is independent of the epilogue.  */
21609           int n;
21610           int frame;
21611           n = 0;
21612           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21613             n++;
21614           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21615           if (frame && n * 4 >= frame)
21616             {
21617               n = frame / 4;
21618               live_regs_mask |= (1 << n) - 1;
21619               saved_regs += frame;
21620             }
21621         }
21622
21623       if (TARGET_LDRD
21624           && current_tune->prefer_ldrd_strd
21625           && !optimize_function_for_size_p (cfun))
21626         {
21627           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21628           if (TARGET_THUMB2)
21629             thumb2_emit_strd_push (live_regs_mask);
21630           else if (TARGET_ARM
21631                    && !TARGET_APCS_FRAME
21632                    && !IS_INTERRUPT (func_type))
21633             arm_emit_strd_push (live_regs_mask);
21634           else
21635             {
21636               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21637               RTX_FRAME_RELATED_P (insn) = 1;
21638             }
21639         }
21640       else
21641         {
21642           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21643           RTX_FRAME_RELATED_P (insn) = 1;
21644         }
21645     }
21646
21647   if (! IS_VOLATILE (func_type))
21648     saved_regs += arm_save_coproc_regs ();
21649
21650   if (frame_pointer_needed && TARGET_ARM)
21651     {
21652       /* Create the new frame pointer.  */
21653       if (TARGET_APCS_FRAME)
21654         {
21655           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21656           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21657           RTX_FRAME_RELATED_P (insn) = 1;
21658         }
21659       else
21660         {
21661           insn = GEN_INT (saved_regs - (4 + fp_offset));
21662           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21663                                         stack_pointer_rtx, insn));
21664           RTX_FRAME_RELATED_P (insn) = 1;
21665         }
21666     }
21667
21668   size = offsets->outgoing_args - offsets->saved_args;
21669   if (flag_stack_usage_info)
21670     current_function_static_stack_size = size;
21671
21672   /* If this isn't an interrupt service routine and we have a frame, then do
21673      stack checking.  We use IP as the first scratch register, except for the
21674      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21675   if (!IS_INTERRUPT (func_type)
21676       && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21677     {
21678       unsigned int regno;
21679
21680       if (!IS_NESTED (func_type) || clobber_ip)
21681         regno = IP_REGNUM;
21682       else if (df_regs_ever_live_p (LR_REGNUM))
21683         regno = LR_REGNUM;
21684       else
21685         regno = 3;
21686
21687       if (crtl->is_leaf && !cfun->calls_alloca)
21688         {
21689           if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21690             arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21691                                         size - STACK_CHECK_PROTECT,
21692                                         regno, live_regs_mask);
21693         }
21694       else if (size > 0)
21695         arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21696                                     regno, live_regs_mask);
21697     }
21698
21699   /* Recover the static chain register.  */
21700   if (clobber_ip)
21701     {
21702       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21703         insn = gen_rtx_REG (SImode, 3);
21704       else
21705         {
21706           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21707           insn = gen_frame_mem (SImode, insn);
21708         }
21709       emit_set_insn (ip_rtx, insn);
21710       emit_insn (gen_force_register_use (ip_rtx));
21711     }
21712
21713   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21714     {
21715       /* This add can produce multiple insns for a large constant, so we
21716          need to get tricky.  */
21717       rtx_insn *last = get_last_insn ();
21718
21719       amount = GEN_INT (offsets->saved_args + saved_regs
21720                         - offsets->outgoing_args);
21721
21722       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21723                                     amount));
21724       do
21725         {
21726           last = last ? NEXT_INSN (last) : get_insns ();
21727           RTX_FRAME_RELATED_P (last) = 1;
21728         }
21729       while (last != insn);
21730
21731       /* If the frame pointer is needed, emit a special barrier that
21732          will prevent the scheduler from moving stores to the frame
21733          before the stack adjustment.  */
21734       if (frame_pointer_needed)
21735         emit_insn (gen_stack_tie (stack_pointer_rtx,
21736                                   hard_frame_pointer_rtx));
21737     }
21738
21739
21740   if (frame_pointer_needed && TARGET_THUMB2)
21741     thumb_set_frame_pointer (offsets);
21742
21743   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21744     {
21745       unsigned long mask;
21746
21747       mask = live_regs_mask;
21748       mask &= THUMB2_WORK_REGS;
21749       if (!IS_NESTED (func_type))
21750         mask |= (1 << IP_REGNUM);
21751       arm_load_pic_register (mask);
21752     }
21753
21754   /* If we are profiling, make sure no instructions are scheduled before
21755      the call to mcount.  Similarly if the user has requested no
21756      scheduling in the prolog.  Similarly if we want non-call exceptions
21757      using the EABI unwinder, to prevent faulting instructions from being
21758      swapped with a stack adjustment.  */
21759   if (crtl->profile || !TARGET_SCHED_PROLOG
21760       || (arm_except_unwind_info (&global_options) == UI_TARGET
21761           && cfun->can_throw_non_call_exceptions))
21762     emit_insn (gen_blockage ());
21763
21764   /* If the link register is being kept alive, with the return address in it,
21765      then make sure that it does not get reused by the ce2 pass.  */
21766   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21767     cfun->machine->lr_save_eliminated = 1;
21768 }
21769 \f
21770 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21771 static void
21772 arm_print_condition (FILE *stream)
21773 {
21774   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21775     {
21776       /* Branch conversion is not implemented for Thumb-2.  */
21777       if (TARGET_THUMB)
21778         {
21779           output_operand_lossage ("predicated Thumb instruction");
21780           return;
21781         }
21782       if (current_insn_predicate != NULL)
21783         {
21784           output_operand_lossage
21785             ("predicated instruction in conditional sequence");
21786           return;
21787         }
21788
21789       fputs (arm_condition_codes[arm_current_cc], stream);
21790     }
21791   else if (current_insn_predicate)
21792     {
21793       enum arm_cond_code code;
21794
21795       if (TARGET_THUMB1)
21796         {
21797           output_operand_lossage ("predicated Thumb instruction");
21798           return;
21799         }
21800
21801       code = get_arm_condition_code (current_insn_predicate);
21802       fputs (arm_condition_codes[code], stream);
21803     }
21804 }
21805
21806
21807 /* Globally reserved letters: acln
21808    Puncutation letters currently used: @_|?().!#
21809    Lower case letters currently used: bcdefhimpqtvwxyz
21810    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21811    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21812
21813    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21814
21815    If CODE is 'd', then the X is a condition operand and the instruction
21816    should only be executed if the condition is true.
21817    if CODE is 'D', then the X is a condition operand and the instruction
21818    should only be executed if the condition is false: however, if the mode
21819    of the comparison is CCFPEmode, then always execute the instruction -- we
21820    do this because in these circumstances !GE does not necessarily imply LT;
21821    in these cases the instruction pattern will take care to make sure that
21822    an instruction containing %d will follow, thereby undoing the effects of
21823    doing this instruction unconditionally.
21824    If CODE is 'N' then X is a floating point operand that must be negated
21825    before output.
21826    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21827    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21828 static void
21829 arm_print_operand (FILE *stream, rtx x, int code)
21830 {
21831   switch (code)
21832     {
21833     case '@':
21834       fputs (ASM_COMMENT_START, stream);
21835       return;
21836
21837     case '_':
21838       fputs (user_label_prefix, stream);
21839       return;
21840
21841     case '|':
21842       fputs (REGISTER_PREFIX, stream);
21843       return;
21844
21845     case '?':
21846       arm_print_condition (stream);
21847       return;
21848
21849     case '.':
21850       /* The current condition code for a condition code setting instruction.
21851          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21852       fputc('s', stream);
21853       arm_print_condition (stream);
21854       return;
21855
21856     case '!':
21857       /* If the instruction is conditionally executed then print
21858          the current condition code, otherwise print 's'.  */
21859       gcc_assert (TARGET_THUMB2);
21860       if (current_insn_predicate)
21861         arm_print_condition (stream);
21862       else
21863         fputc('s', stream);
21864       break;
21865
21866     /* %# is a "break" sequence. It doesn't output anything, but is used to
21867        separate e.g. operand numbers from following text, if that text consists
21868        of further digits which we don't want to be part of the operand
21869        number.  */
21870     case '#':
21871       return;
21872
21873     case 'N':
21874       {
21875         REAL_VALUE_TYPE r;
21876         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21877         fprintf (stream, "%s", fp_const_from_val (&r));
21878       }
21879       return;
21880
21881     /* An integer or symbol address without a preceding # sign.  */
21882     case 'c':
21883       switch (GET_CODE (x))
21884         {
21885         case CONST_INT:
21886           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21887           break;
21888
21889         case SYMBOL_REF:
21890           output_addr_const (stream, x);
21891           break;
21892
21893         case CONST:
21894           if (GET_CODE (XEXP (x, 0)) == PLUS
21895               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21896             {
21897               output_addr_const (stream, x);
21898               break;
21899             }
21900           /* Fall through.  */
21901
21902         default:
21903           output_operand_lossage ("Unsupported operand for code '%c'", code);
21904         }
21905       return;
21906
21907     /* An integer that we want to print in HEX.  */
21908     case 'x':
21909       switch (GET_CODE (x))
21910         {
21911         case CONST_INT:
21912           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21913           break;
21914
21915         default:
21916           output_operand_lossage ("Unsupported operand for code '%c'", code);
21917         }
21918       return;
21919
21920     case 'B':
21921       if (CONST_INT_P (x))
21922         {
21923           HOST_WIDE_INT val;
21924           val = ARM_SIGN_EXTEND (~INTVAL (x));
21925           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21926         }
21927       else
21928         {
21929           putc ('~', stream);
21930           output_addr_const (stream, x);
21931         }
21932       return;
21933
21934     case 'b':
21935       /* Print the log2 of a CONST_INT.  */
21936       {
21937         HOST_WIDE_INT val;
21938
21939         if (!CONST_INT_P (x)
21940             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21941           output_operand_lossage ("Unsupported operand for code '%c'", code);
21942         else
21943           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21944       }
21945       return;
21946
21947     case 'L':
21948       /* The low 16 bits of an immediate constant.  */
21949       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21950       return;
21951
21952     case 'i':
21953       fprintf (stream, "%s", arithmetic_instr (x, 1));
21954       return;
21955
21956     case 'I':
21957       fprintf (stream, "%s", arithmetic_instr (x, 0));
21958       return;
21959
21960     case 'S':
21961       {
21962         HOST_WIDE_INT val;
21963         const char *shift;
21964
21965         shift = shift_op (x, &val);
21966
21967         if (shift)
21968           {
21969             fprintf (stream, ", %s ", shift);
21970             if (val == -1)
21971               arm_print_operand (stream, XEXP (x, 1), 0);
21972             else
21973               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21974           }
21975       }
21976       return;
21977
21978       /* An explanation of the 'Q', 'R' and 'H' register operands:
21979
21980          In a pair of registers containing a DI or DF value the 'Q'
21981          operand returns the register number of the register containing
21982          the least significant part of the value.  The 'R' operand returns
21983          the register number of the register containing the most
21984          significant part of the value.
21985
21986          The 'H' operand returns the higher of the two register numbers.
21987          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21988          same as the 'Q' operand, since the most significant part of the
21989          value is held in the lower number register.  The reverse is true
21990          on systems where WORDS_BIG_ENDIAN is false.
21991
21992          The purpose of these operands is to distinguish between cases
21993          where the endian-ness of the values is important (for example
21994          when they are added together), and cases where the endian-ness
21995          is irrelevant, but the order of register operations is important.
21996          For example when loading a value from memory into a register
21997          pair, the endian-ness does not matter.  Provided that the value
21998          from the lower memory address is put into the lower numbered
21999          register, and the value from the higher address is put into the
22000          higher numbered register, the load will work regardless of whether
22001          the value being loaded is big-wordian or little-wordian.  The
22002          order of the two register loads can matter however, if the address
22003          of the memory location is actually held in one of the registers
22004          being overwritten by the load.
22005
22006          The 'Q' and 'R' constraints are also available for 64-bit
22007          constants.  */
22008     case 'Q':
22009       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22010         {
22011           rtx part = gen_lowpart (SImode, x);
22012           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22013           return;
22014         }
22015
22016       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22017         {
22018           output_operand_lossage ("invalid operand for code '%c'", code);
22019           return;
22020         }
22021
22022       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22023       return;
22024
22025     case 'R':
22026       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22027         {
22028           machine_mode mode = GET_MODE (x);
22029           rtx part;
22030
22031           if (mode == VOIDmode)
22032             mode = DImode;
22033           part = gen_highpart_mode (SImode, mode, x);
22034           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22035           return;
22036         }
22037
22038       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22039         {
22040           output_operand_lossage ("invalid operand for code '%c'", code);
22041           return;
22042         }
22043
22044       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22045       return;
22046
22047     case 'H':
22048       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22049         {
22050           output_operand_lossage ("invalid operand for code '%c'", code);
22051           return;
22052         }
22053
22054       asm_fprintf (stream, "%r", REGNO (x) + 1);
22055       return;
22056
22057     case 'J':
22058       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22059         {
22060           output_operand_lossage ("invalid operand for code '%c'", code);
22061           return;
22062         }
22063
22064       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22065       return;
22066
22067     case 'K':
22068       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22069         {
22070           output_operand_lossage ("invalid operand for code '%c'", code);
22071           return;
22072         }
22073
22074       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22075       return;
22076
22077     case 'm':
22078       asm_fprintf (stream, "%r",
22079                    REG_P (XEXP (x, 0))
22080                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22081       return;
22082
22083     case 'M':
22084       asm_fprintf (stream, "{%r-%r}",
22085                    REGNO (x),
22086                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22087       return;
22088
22089     /* Like 'M', but writing doubleword vector registers, for use by Neon
22090        insns.  */
22091     case 'h':
22092       {
22093         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22094         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22095         if (numregs == 1)
22096           asm_fprintf (stream, "{d%d}", regno);
22097         else
22098           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22099       }
22100       return;
22101
22102     case 'd':
22103       /* CONST_TRUE_RTX means always -- that's the default.  */
22104       if (x == const_true_rtx)
22105         return;
22106
22107       if (!COMPARISON_P (x))
22108         {
22109           output_operand_lossage ("invalid operand for code '%c'", code);
22110           return;
22111         }
22112
22113       fputs (arm_condition_codes[get_arm_condition_code (x)],
22114              stream);
22115       return;
22116
22117     case 'D':
22118       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22119          want to do that.  */
22120       if (x == const_true_rtx)
22121         {
22122           output_operand_lossage ("instruction never executed");
22123           return;
22124         }
22125       if (!COMPARISON_P (x))
22126         {
22127           output_operand_lossage ("invalid operand for code '%c'", code);
22128           return;
22129         }
22130
22131       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22132                                  (get_arm_condition_code (x))],
22133              stream);
22134       return;
22135
22136     case 's':
22137     case 'V':
22138     case 'W':
22139     case 'X':
22140     case 'Y':
22141     case 'Z':
22142       /* Former Maverick support, removed after GCC-4.7.  */
22143       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22144       return;
22145
22146     case 'U':
22147       if (!REG_P (x)
22148           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22149           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22150         /* Bad value for wCG register number.  */
22151         {
22152           output_operand_lossage ("invalid operand for code '%c'", code);
22153           return;
22154         }
22155
22156       else
22157         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22158       return;
22159
22160       /* Print an iWMMXt control register name.  */
22161     case 'w':
22162       if (!CONST_INT_P (x)
22163           || INTVAL (x) < 0
22164           || INTVAL (x) >= 16)
22165         /* Bad value for wC register number.  */
22166         {
22167           output_operand_lossage ("invalid operand for code '%c'", code);
22168           return;
22169         }
22170
22171       else
22172         {
22173           static const char * wc_reg_names [16] =
22174             {
22175               "wCID",  "wCon",  "wCSSF", "wCASF",
22176               "wC4",   "wC5",   "wC6",   "wC7",
22177               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22178               "wC12",  "wC13",  "wC14",  "wC15"
22179             };
22180
22181           fputs (wc_reg_names [INTVAL (x)], stream);
22182         }
22183       return;
22184
22185     /* Print the high single-precision register of a VFP double-precision
22186        register.  */
22187     case 'p':
22188       {
22189         machine_mode mode = GET_MODE (x);
22190         int regno;
22191
22192         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22193           {
22194             output_operand_lossage ("invalid operand for code '%c'", code);
22195             return;
22196           }
22197
22198         regno = REGNO (x);
22199         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22200           {
22201             output_operand_lossage ("invalid operand for code '%c'", code);
22202             return;
22203           }
22204
22205         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22206       }
22207       return;
22208
22209     /* Print a VFP/Neon double precision or quad precision register name.  */
22210     case 'P':
22211     case 'q':
22212       {
22213         machine_mode mode = GET_MODE (x);
22214         int is_quad = (code == 'q');
22215         int regno;
22216
22217         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22218           {
22219             output_operand_lossage ("invalid operand for code '%c'", code);
22220             return;
22221           }
22222
22223         if (!REG_P (x)
22224             || !IS_VFP_REGNUM (REGNO (x)))
22225           {
22226             output_operand_lossage ("invalid operand for code '%c'", code);
22227             return;
22228           }
22229
22230         regno = REGNO (x);
22231         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22232             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22233           {
22234             output_operand_lossage ("invalid operand for code '%c'", code);
22235             return;
22236           }
22237
22238         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22239           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22240       }
22241       return;
22242
22243     /* These two codes print the low/high doubleword register of a Neon quad
22244        register, respectively.  For pair-structure types, can also print
22245        low/high quadword registers.  */
22246     case 'e':
22247     case 'f':
22248       {
22249         machine_mode mode = GET_MODE (x);
22250         int regno;
22251
22252         if ((GET_MODE_SIZE (mode) != 16
22253              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22254           {
22255             output_operand_lossage ("invalid operand for code '%c'", code);
22256             return;
22257           }
22258
22259         regno = REGNO (x);
22260         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22261           {
22262             output_operand_lossage ("invalid operand for code '%c'", code);
22263             return;
22264           }
22265
22266         if (GET_MODE_SIZE (mode) == 16)
22267           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22268                                   + (code == 'f' ? 1 : 0));
22269         else
22270           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22271                                   + (code == 'f' ? 1 : 0));
22272       }
22273       return;
22274
22275     /* Print a VFPv3 floating-point constant, represented as an integer
22276        index.  */
22277     case 'G':
22278       {
22279         int index = vfp3_const_double_index (x);
22280         gcc_assert (index != -1);
22281         fprintf (stream, "%d", index);
22282       }
22283       return;
22284
22285     /* Print bits representing opcode features for Neon.
22286
22287        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22288        and polynomials as unsigned.
22289
22290        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22291
22292        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22293
22294     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22295     case 'T':
22296       {
22297         HOST_WIDE_INT bits = INTVAL (x);
22298         fputc ("uspf"[bits & 3], stream);
22299       }
22300       return;
22301
22302     /* Likewise, but signed and unsigned integers are both 'i'.  */
22303     case 'F':
22304       {
22305         HOST_WIDE_INT bits = INTVAL (x);
22306         fputc ("iipf"[bits & 3], stream);
22307       }
22308       return;
22309
22310     /* As for 'T', but emit 'u' instead of 'p'.  */
22311     case 't':
22312       {
22313         HOST_WIDE_INT bits = INTVAL (x);
22314         fputc ("usuf"[bits & 3], stream);
22315       }
22316       return;
22317
22318     /* Bit 2: rounding (vs none).  */
22319     case 'O':
22320       {
22321         HOST_WIDE_INT bits = INTVAL (x);
22322         fputs ((bits & 4) != 0 ? "r" : "", stream);
22323       }
22324       return;
22325
22326     /* Memory operand for vld1/vst1 instruction.  */
22327     case 'A':
22328       {
22329         rtx addr;
22330         bool postinc = FALSE;
22331         rtx postinc_reg = NULL;
22332         unsigned align, memsize, align_bits;
22333
22334         gcc_assert (MEM_P (x));
22335         addr = XEXP (x, 0);
22336         if (GET_CODE (addr) == POST_INC)
22337           {
22338             postinc = 1;
22339             addr = XEXP (addr, 0);
22340           }
22341         if (GET_CODE (addr) == POST_MODIFY)
22342           {
22343             postinc_reg = XEXP( XEXP (addr, 1), 1);
22344             addr = XEXP (addr, 0);
22345           }
22346         asm_fprintf (stream, "[%r", REGNO (addr));
22347
22348         /* We know the alignment of this access, so we can emit a hint in the
22349            instruction (for some alignments) as an aid to the memory subsystem
22350            of the target.  */
22351         align = MEM_ALIGN (x) >> 3;
22352         memsize = MEM_SIZE (x);
22353
22354         /* Only certain alignment specifiers are supported by the hardware.  */
22355         if (memsize == 32 && (align % 32) == 0)
22356           align_bits = 256;
22357         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22358           align_bits = 128;
22359         else if (memsize >= 8 && (align % 8) == 0)
22360           align_bits = 64;
22361         else
22362           align_bits = 0;
22363
22364         if (align_bits != 0)
22365           asm_fprintf (stream, ":%d", align_bits);
22366
22367         asm_fprintf (stream, "]");
22368
22369         if (postinc)
22370           fputs("!", stream);
22371         if (postinc_reg)
22372           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22373       }
22374       return;
22375
22376     case 'C':
22377       {
22378         rtx addr;
22379
22380         gcc_assert (MEM_P (x));
22381         addr = XEXP (x, 0);
22382         gcc_assert (REG_P (addr));
22383         asm_fprintf (stream, "[%r]", REGNO (addr));
22384       }
22385       return;
22386
22387     /* Translate an S register number into a D register number and element index.  */
22388     case 'y':
22389       {
22390         machine_mode mode = GET_MODE (x);
22391         int regno;
22392
22393         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22394           {
22395             output_operand_lossage ("invalid operand for code '%c'", code);
22396             return;
22397           }
22398
22399         regno = REGNO (x);
22400         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22401           {
22402             output_operand_lossage ("invalid operand for code '%c'", code);
22403             return;
22404           }
22405
22406         regno = regno - FIRST_VFP_REGNUM;
22407         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22408       }
22409       return;
22410
22411     case 'v':
22412         gcc_assert (CONST_DOUBLE_P (x));
22413         int result;
22414         result = vfp3_const_double_for_fract_bits (x);
22415         if (result == 0)
22416           result = vfp3_const_double_for_bits (x);
22417         fprintf (stream, "#%d", result);
22418         return;
22419
22420     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22421        number into a D register number and element index.  */
22422     case 'z':
22423       {
22424         machine_mode mode = GET_MODE (x);
22425         int regno;
22426
22427         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22428           {
22429             output_operand_lossage ("invalid operand for code '%c'", code);
22430             return;
22431           }
22432
22433         regno = REGNO (x);
22434         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22435           {
22436             output_operand_lossage ("invalid operand for code '%c'", code);
22437             return;
22438           }
22439
22440         regno = regno - FIRST_VFP_REGNUM;
22441         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22442       }
22443       return;
22444
22445     default:
22446       if (x == 0)
22447         {
22448           output_operand_lossage ("missing operand");
22449           return;
22450         }
22451
22452       switch (GET_CODE (x))
22453         {
22454         case REG:
22455           asm_fprintf (stream, "%r", REGNO (x));
22456           break;
22457
22458         case MEM:
22459           output_address (GET_MODE (x), XEXP (x, 0));
22460           break;
22461
22462         case CONST_DOUBLE:
22463           {
22464             char fpstr[20];
22465             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22466                               sizeof (fpstr), 0, 1);
22467             fprintf (stream, "#%s", fpstr);
22468           }
22469           break;
22470
22471         default:
22472           gcc_assert (GET_CODE (x) != NEG);
22473           fputc ('#', stream);
22474           if (GET_CODE (x) == HIGH)
22475             {
22476               fputs (":lower16:", stream);
22477               x = XEXP (x, 0);
22478             }
22479
22480           output_addr_const (stream, x);
22481           break;
22482         }
22483     }
22484 }
22485 \f
22486 /* Target hook for printing a memory address.  */
22487 static void
22488 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22489 {
22490   if (TARGET_32BIT)
22491     {
22492       int is_minus = GET_CODE (x) == MINUS;
22493
22494       if (REG_P (x))
22495         asm_fprintf (stream, "[%r]", REGNO (x));
22496       else if (GET_CODE (x) == PLUS || is_minus)
22497         {
22498           rtx base = XEXP (x, 0);
22499           rtx index = XEXP (x, 1);
22500           HOST_WIDE_INT offset = 0;
22501           if (!REG_P (base)
22502               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22503             {
22504               /* Ensure that BASE is a register.  */
22505               /* (one of them must be).  */
22506               /* Also ensure the SP is not used as in index register.  */
22507               std::swap (base, index);
22508             }
22509           switch (GET_CODE (index))
22510             {
22511             case CONST_INT:
22512               offset = INTVAL (index);
22513               if (is_minus)
22514                 offset = -offset;
22515               asm_fprintf (stream, "[%r, #%wd]",
22516                            REGNO (base), offset);
22517               break;
22518
22519             case REG:
22520               asm_fprintf (stream, "[%r, %s%r]",
22521                            REGNO (base), is_minus ? "-" : "",
22522                            REGNO (index));
22523               break;
22524
22525             case MULT:
22526             case ASHIFTRT:
22527             case LSHIFTRT:
22528             case ASHIFT:
22529             case ROTATERT:
22530               {
22531                 asm_fprintf (stream, "[%r, %s%r",
22532                              REGNO (base), is_minus ? "-" : "",
22533                              REGNO (XEXP (index, 0)));
22534                 arm_print_operand (stream, index, 'S');
22535                 fputs ("]", stream);
22536                 break;
22537               }
22538
22539             default:
22540               gcc_unreachable ();
22541             }
22542         }
22543       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22544                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22545         {
22546           gcc_assert (REG_P (XEXP (x, 0)));
22547
22548           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22549             asm_fprintf (stream, "[%r, #%s%d]!",
22550                          REGNO (XEXP (x, 0)),
22551                          GET_CODE (x) == PRE_DEC ? "-" : "",
22552                          GET_MODE_SIZE (mode));
22553           else
22554             asm_fprintf (stream, "[%r], #%s%d",
22555                          REGNO (XEXP (x, 0)),
22556                          GET_CODE (x) == POST_DEC ? "-" : "",
22557                          GET_MODE_SIZE (mode));
22558         }
22559       else if (GET_CODE (x) == PRE_MODIFY)
22560         {
22561           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22562           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22563             asm_fprintf (stream, "#%wd]!",
22564                          INTVAL (XEXP (XEXP (x, 1), 1)));
22565           else
22566             asm_fprintf (stream, "%r]!",
22567                          REGNO (XEXP (XEXP (x, 1), 1)));
22568         }
22569       else if (GET_CODE (x) == POST_MODIFY)
22570         {
22571           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22572           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22573             asm_fprintf (stream, "#%wd",
22574                          INTVAL (XEXP (XEXP (x, 1), 1)));
22575           else
22576             asm_fprintf (stream, "%r",
22577                          REGNO (XEXP (XEXP (x, 1), 1)));
22578         }
22579       else output_addr_const (stream, x);
22580     }
22581   else
22582     {
22583       if (REG_P (x))
22584         asm_fprintf (stream, "[%r]", REGNO (x));
22585       else if (GET_CODE (x) == POST_INC)
22586         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22587       else if (GET_CODE (x) == PLUS)
22588         {
22589           gcc_assert (REG_P (XEXP (x, 0)));
22590           if (CONST_INT_P (XEXP (x, 1)))
22591             asm_fprintf (stream, "[%r, #%wd]",
22592                          REGNO (XEXP (x, 0)),
22593                          INTVAL (XEXP (x, 1)));
22594           else
22595             asm_fprintf (stream, "[%r, %r]",
22596                          REGNO (XEXP (x, 0)),
22597                          REGNO (XEXP (x, 1)));
22598         }
22599       else
22600         output_addr_const (stream, x);
22601     }
22602 }
22603 \f
22604 /* Target hook for indicating whether a punctuation character for
22605    TARGET_PRINT_OPERAND is valid.  */
22606 static bool
22607 arm_print_operand_punct_valid_p (unsigned char code)
22608 {
22609   return (code == '@' || code == '|' || code == '.'
22610           || code == '(' || code == ')' || code == '#'
22611           || (TARGET_32BIT && (code == '?'))
22612           || (TARGET_THUMB2 && (code == '!'))
22613           || (TARGET_THUMB && (code == '_')));
22614 }
22615 \f
22616 /* Target hook for assembling integer objects.  The ARM version needs to
22617    handle word-sized values specially.  */
22618 static bool
22619 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22620 {
22621   machine_mode mode;
22622
22623   if (size == UNITS_PER_WORD && aligned_p)
22624     {
22625       fputs ("\t.word\t", asm_out_file);
22626       output_addr_const (asm_out_file, x);
22627
22628       /* Mark symbols as position independent.  We only do this in the
22629          .text segment, not in the .data segment.  */
22630       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22631           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22632         {
22633           /* See legitimize_pic_address for an explanation of the
22634              TARGET_VXWORKS_RTP check.  */
22635           /* References to weak symbols cannot be resolved locally:
22636              they may be overridden by a non-weak definition at link
22637              time.  */
22638           if (!arm_pic_data_is_text_relative
22639               || (GET_CODE (x) == SYMBOL_REF
22640                   && (!SYMBOL_REF_LOCAL_P (x)
22641                       || (SYMBOL_REF_DECL (x)
22642                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22643             fputs ("(GOT)", asm_out_file);
22644           else
22645             fputs ("(GOTOFF)", asm_out_file);
22646         }
22647       fputc ('\n', asm_out_file);
22648       return true;
22649     }
22650
22651   mode = GET_MODE (x);
22652
22653   if (arm_vector_mode_supported_p (mode))
22654     {
22655       int i, units;
22656
22657       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22658
22659       units = CONST_VECTOR_NUNITS (x);
22660       size = GET_MODE_UNIT_SIZE (mode);
22661
22662       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22663         for (i = 0; i < units; i++)
22664           {
22665             rtx elt = CONST_VECTOR_ELT (x, i);
22666             assemble_integer
22667               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22668           }
22669       else
22670         for (i = 0; i < units; i++)
22671           {
22672             rtx elt = CONST_VECTOR_ELT (x, i);
22673             assemble_real
22674               (*CONST_DOUBLE_REAL_VALUE (elt),
22675                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22676                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22677           }
22678
22679       return true;
22680     }
22681
22682   return default_assemble_integer (x, size, aligned_p);
22683 }
22684
22685 static void
22686 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22687 {
22688   section *s;
22689
22690   if (!TARGET_AAPCS_BASED)
22691     {
22692       (is_ctor ?
22693        default_named_section_asm_out_constructor
22694        : default_named_section_asm_out_destructor) (symbol, priority);
22695       return;
22696     }
22697
22698   /* Put these in the .init_array section, using a special relocation.  */
22699   if (priority != DEFAULT_INIT_PRIORITY)
22700     {
22701       char buf[18];
22702       sprintf (buf, "%s.%.5u",
22703                is_ctor ? ".init_array" : ".fini_array",
22704                priority);
22705       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22706     }
22707   else if (is_ctor)
22708     s = ctors_section;
22709   else
22710     s = dtors_section;
22711
22712   switch_to_section (s);
22713   assemble_align (POINTER_SIZE);
22714   fputs ("\t.word\t", asm_out_file);
22715   output_addr_const (asm_out_file, symbol);
22716   fputs ("(target1)\n", asm_out_file);
22717 }
22718
22719 /* Add a function to the list of static constructors.  */
22720
22721 static void
22722 arm_elf_asm_constructor (rtx symbol, int priority)
22723 {
22724   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22725 }
22726
22727 /* Add a function to the list of static destructors.  */
22728
22729 static void
22730 arm_elf_asm_destructor (rtx symbol, int priority)
22731 {
22732   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22733 }
22734 \f
22735 /* A finite state machine takes care of noticing whether or not instructions
22736    can be conditionally executed, and thus decrease execution time and code
22737    size by deleting branch instructions.  The fsm is controlled by
22738    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22739
22740 /* The state of the fsm controlling condition codes are:
22741    0: normal, do nothing special
22742    1: make ASM_OUTPUT_OPCODE not output this instruction
22743    2: make ASM_OUTPUT_OPCODE not output this instruction
22744    3: make instructions conditional
22745    4: make instructions conditional
22746
22747    State transitions (state->state by whom under condition):
22748    0 -> 1 final_prescan_insn if the `target' is a label
22749    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22750    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22751    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22752    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22753           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22754    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22755           (the target insn is arm_target_insn).
22756
22757    If the jump clobbers the conditions then we use states 2 and 4.
22758
22759    A similar thing can be done with conditional return insns.
22760
22761    XXX In case the `target' is an unconditional branch, this conditionalising
22762    of the instructions always reduces code size, but not always execution
22763    time.  But then, I want to reduce the code size to somewhere near what
22764    /bin/cc produces.  */
22765
22766 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22767    instructions.  When a COND_EXEC instruction is seen the subsequent
22768    instructions are scanned so that multiple conditional instructions can be
22769    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22770    specify the length and true/false mask for the IT block.  These will be
22771    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22772
22773 /* Returns the index of the ARM condition code string in
22774    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22775    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22776
22777 enum arm_cond_code
22778 maybe_get_arm_condition_code (rtx comparison)
22779 {
22780   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22781   enum arm_cond_code code;
22782   enum rtx_code comp_code = GET_CODE (comparison);
22783
22784   if (GET_MODE_CLASS (mode) != MODE_CC)
22785     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22786                            XEXP (comparison, 1));
22787
22788   switch (mode)
22789     {
22790     case E_CC_DNEmode: code = ARM_NE; goto dominance;
22791     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22792     case E_CC_DGEmode: code = ARM_GE; goto dominance;
22793     case E_CC_DGTmode: code = ARM_GT; goto dominance;
22794     case E_CC_DLEmode: code = ARM_LE; goto dominance;
22795     case E_CC_DLTmode: code = ARM_LT; goto dominance;
22796     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22797     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22798     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22799     case E_CC_DLTUmode: code = ARM_CC;
22800
22801     dominance:
22802       if (comp_code == EQ)
22803         return ARM_INVERSE_CONDITION_CODE (code);
22804       if (comp_code == NE)
22805         return code;
22806       return ARM_NV;
22807
22808     case E_CC_NOOVmode:
22809       switch (comp_code)
22810         {
22811         case NE: return ARM_NE;
22812         case EQ: return ARM_EQ;
22813         case GE: return ARM_PL;
22814         case LT: return ARM_MI;
22815         default: return ARM_NV;
22816         }
22817
22818     case E_CC_Zmode:
22819       switch (comp_code)
22820         {
22821         case NE: return ARM_NE;
22822         case EQ: return ARM_EQ;
22823         default: return ARM_NV;
22824         }
22825
22826     case E_CC_Nmode:
22827       switch (comp_code)
22828         {
22829         case NE: return ARM_MI;
22830         case EQ: return ARM_PL;
22831         default: return ARM_NV;
22832         }
22833
22834     case E_CCFPEmode:
22835     case E_CCFPmode:
22836       /* We can handle all cases except UNEQ and LTGT.  */
22837       switch (comp_code)
22838         {
22839         case GE: return ARM_GE;
22840         case GT: return ARM_GT;
22841         case LE: return ARM_LS;
22842         case LT: return ARM_MI;
22843         case NE: return ARM_NE;
22844         case EQ: return ARM_EQ;
22845         case ORDERED: return ARM_VC;
22846         case UNORDERED: return ARM_VS;
22847         case UNLT: return ARM_LT;
22848         case UNLE: return ARM_LE;
22849         case UNGT: return ARM_HI;
22850         case UNGE: return ARM_PL;
22851           /* UNEQ and LTGT do not have a representation.  */
22852         case UNEQ: /* Fall through.  */
22853         case LTGT: /* Fall through.  */
22854         default: return ARM_NV;
22855         }
22856
22857     case E_CC_SWPmode:
22858       switch (comp_code)
22859         {
22860         case NE: return ARM_NE;
22861         case EQ: return ARM_EQ;
22862         case GE: return ARM_LE;
22863         case GT: return ARM_LT;
22864         case LE: return ARM_GE;
22865         case LT: return ARM_GT;
22866         case GEU: return ARM_LS;
22867         case GTU: return ARM_CC;
22868         case LEU: return ARM_CS;
22869         case LTU: return ARM_HI;
22870         default: return ARM_NV;
22871         }
22872
22873     case E_CC_Cmode:
22874       switch (comp_code)
22875         {
22876         case LTU: return ARM_CS;
22877         case GEU: return ARM_CC;
22878         case NE: return ARM_CS;
22879         case EQ: return ARM_CC;
22880         default: return ARM_NV;
22881         }
22882
22883     case E_CC_CZmode:
22884       switch (comp_code)
22885         {
22886         case NE: return ARM_NE;
22887         case EQ: return ARM_EQ;
22888         case GEU: return ARM_CS;
22889         case GTU: return ARM_HI;
22890         case LEU: return ARM_LS;
22891         case LTU: return ARM_CC;
22892         default: return ARM_NV;
22893         }
22894
22895     case E_CC_NCVmode:
22896       switch (comp_code)
22897         {
22898         case GE: return ARM_GE;
22899         case LT: return ARM_LT;
22900         case GEU: return ARM_CS;
22901         case LTU: return ARM_CC;
22902         default: return ARM_NV;
22903         }
22904
22905     case E_CC_Vmode:
22906       switch (comp_code)
22907         {
22908         case NE: return ARM_VS;
22909         case EQ: return ARM_VC;
22910         default: return ARM_NV;
22911         }
22912
22913     case E_CCmode:
22914       switch (comp_code)
22915         {
22916         case NE: return ARM_NE;
22917         case EQ: return ARM_EQ;
22918         case GE: return ARM_GE;
22919         case GT: return ARM_GT;
22920         case LE: return ARM_LE;
22921         case LT: return ARM_LT;
22922         case GEU: return ARM_CS;
22923         case GTU: return ARM_HI;
22924         case LEU: return ARM_LS;
22925         case LTU: return ARM_CC;
22926         default: return ARM_NV;
22927         }
22928
22929     default: gcc_unreachable ();
22930     }
22931 }
22932
22933 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22934 static enum arm_cond_code
22935 get_arm_condition_code (rtx comparison)
22936 {
22937   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22938   gcc_assert (code != ARM_NV);
22939   return code;
22940 }
22941
22942 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
22943    code registers when not targetting Thumb1.  The VFP condition register
22944    only exists when generating hard-float code.  */
22945 static bool
22946 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22947 {
22948   if (!TARGET_32BIT)
22949     return false;
22950
22951   *p1 = CC_REGNUM;
22952   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22953   return true;
22954 }
22955
22956 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22957    instructions.  */
22958 void
22959 thumb2_final_prescan_insn (rtx_insn *insn)
22960 {
22961   rtx_insn *first_insn = insn;
22962   rtx body = PATTERN (insn);
22963   rtx predicate;
22964   enum arm_cond_code code;
22965   int n;
22966   int mask;
22967   int max;
22968
22969   /* max_insns_skipped in the tune was already taken into account in the
22970      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22971      just emit the IT blocks as we can.  It does not make sense to split
22972      the IT blocks.  */
22973   max = MAX_INSN_PER_IT_BLOCK;
22974
22975   /* Remove the previous insn from the count of insns to be output.  */
22976   if (arm_condexec_count)
22977       arm_condexec_count--;
22978
22979   /* Nothing to do if we are already inside a conditional block.  */
22980   if (arm_condexec_count)
22981     return;
22982
22983   if (GET_CODE (body) != COND_EXEC)
22984     return;
22985
22986   /* Conditional jumps are implemented directly.  */
22987   if (JUMP_P (insn))
22988     return;
22989
22990   predicate = COND_EXEC_TEST (body);
22991   arm_current_cc = get_arm_condition_code (predicate);
22992
22993   n = get_attr_ce_count (insn);
22994   arm_condexec_count = 1;
22995   arm_condexec_mask = (1 << n) - 1;
22996   arm_condexec_masklen = n;
22997   /* See if subsequent instructions can be combined into the same block.  */
22998   for (;;)
22999     {
23000       insn = next_nonnote_insn (insn);
23001
23002       /* Jumping into the middle of an IT block is illegal, so a label or
23003          barrier terminates the block.  */
23004       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23005         break;
23006
23007       body = PATTERN (insn);
23008       /* USE and CLOBBER aren't really insns, so just skip them.  */
23009       if (GET_CODE (body) == USE
23010           || GET_CODE (body) == CLOBBER)
23011         continue;
23012
23013       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23014       if (GET_CODE (body) != COND_EXEC)
23015         break;
23016       /* Maximum number of conditionally executed instructions in a block.  */
23017       n = get_attr_ce_count (insn);
23018       if (arm_condexec_masklen + n > max)
23019         break;
23020
23021       predicate = COND_EXEC_TEST (body);
23022       code = get_arm_condition_code (predicate);
23023       mask = (1 << n) - 1;
23024       if (arm_current_cc == code)
23025         arm_condexec_mask |= (mask << arm_condexec_masklen);
23026       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23027         break;
23028
23029       arm_condexec_count++;
23030       arm_condexec_masklen += n;
23031
23032       /* A jump must be the last instruction in a conditional block.  */
23033       if (JUMP_P (insn))
23034         break;
23035     }
23036   /* Restore recog_data (getting the attributes of other insns can
23037      destroy this array, but final.c assumes that it remains intact
23038      across this call).  */
23039   extract_constrain_insn_cached (first_insn);
23040 }
23041
23042 void
23043 arm_final_prescan_insn (rtx_insn *insn)
23044 {
23045   /* BODY will hold the body of INSN.  */
23046   rtx body = PATTERN (insn);
23047
23048   /* This will be 1 if trying to repeat the trick, and things need to be
23049      reversed if it appears to fail.  */
23050   int reverse = 0;
23051
23052   /* If we start with a return insn, we only succeed if we find another one.  */
23053   int seeking_return = 0;
23054   enum rtx_code return_code = UNKNOWN;
23055
23056   /* START_INSN will hold the insn from where we start looking.  This is the
23057      first insn after the following code_label if REVERSE is true.  */
23058   rtx_insn *start_insn = insn;
23059
23060   /* If in state 4, check if the target branch is reached, in order to
23061      change back to state 0.  */
23062   if (arm_ccfsm_state == 4)
23063     {
23064       if (insn == arm_target_insn)
23065         {
23066           arm_target_insn = NULL;
23067           arm_ccfsm_state = 0;
23068         }
23069       return;
23070     }
23071
23072   /* If in state 3, it is possible to repeat the trick, if this insn is an
23073      unconditional branch to a label, and immediately following this branch
23074      is the previous target label which is only used once, and the label this
23075      branch jumps to is not too far off.  */
23076   if (arm_ccfsm_state == 3)
23077     {
23078       if (simplejump_p (insn))
23079         {
23080           start_insn = next_nonnote_insn (start_insn);
23081           if (BARRIER_P (start_insn))
23082             {
23083               /* XXX Isn't this always a barrier?  */
23084               start_insn = next_nonnote_insn (start_insn);
23085             }
23086           if (LABEL_P (start_insn)
23087               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23088               && LABEL_NUSES (start_insn) == 1)
23089             reverse = TRUE;
23090           else
23091             return;
23092         }
23093       else if (ANY_RETURN_P (body))
23094         {
23095           start_insn = next_nonnote_insn (start_insn);
23096           if (BARRIER_P (start_insn))
23097             start_insn = next_nonnote_insn (start_insn);
23098           if (LABEL_P (start_insn)
23099               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23100               && LABEL_NUSES (start_insn) == 1)
23101             {
23102               reverse = TRUE;
23103               seeking_return = 1;
23104               return_code = GET_CODE (body);
23105             }
23106           else
23107             return;
23108         }
23109       else
23110         return;
23111     }
23112
23113   gcc_assert (!arm_ccfsm_state || reverse);
23114   if (!JUMP_P (insn))
23115     return;
23116
23117   /* This jump might be paralleled with a clobber of the condition codes
23118      the jump should always come first */
23119   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23120     body = XVECEXP (body, 0, 0);
23121
23122   if (reverse
23123       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23124           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23125     {
23126       int insns_skipped;
23127       int fail = FALSE, succeed = FALSE;
23128       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23129       int then_not_else = TRUE;
23130       rtx_insn *this_insn = start_insn;
23131       rtx label = 0;
23132
23133       /* Register the insn jumped to.  */
23134       if (reverse)
23135         {
23136           if (!seeking_return)
23137             label = XEXP (SET_SRC (body), 0);
23138         }
23139       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23140         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23141       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23142         {
23143           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23144           then_not_else = FALSE;
23145         }
23146       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23147         {
23148           seeking_return = 1;
23149           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23150         }
23151       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23152         {
23153           seeking_return = 1;
23154           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23155           then_not_else = FALSE;
23156         }
23157       else
23158         gcc_unreachable ();
23159
23160       /* See how many insns this branch skips, and what kind of insns.  If all
23161          insns are okay, and the label or unconditional branch to the same
23162          label is not too far away, succeed.  */
23163       for (insns_skipped = 0;
23164            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23165         {
23166           rtx scanbody;
23167
23168           this_insn = next_nonnote_insn (this_insn);
23169           if (!this_insn)
23170             break;
23171
23172           switch (GET_CODE (this_insn))
23173             {
23174             case CODE_LABEL:
23175               /* Succeed if it is the target label, otherwise fail since
23176                  control falls in from somewhere else.  */
23177               if (this_insn == label)
23178                 {
23179                   arm_ccfsm_state = 1;
23180                   succeed = TRUE;
23181                 }
23182               else
23183                 fail = TRUE;
23184               break;
23185
23186             case BARRIER:
23187               /* Succeed if the following insn is the target label.
23188                  Otherwise fail.
23189                  If return insns are used then the last insn in a function
23190                  will be a barrier.  */
23191               this_insn = next_nonnote_insn (this_insn);
23192               if (this_insn && this_insn == label)
23193                 {
23194                   arm_ccfsm_state = 1;
23195                   succeed = TRUE;
23196                 }
23197               else
23198                 fail = TRUE;
23199               break;
23200
23201             case CALL_INSN:
23202               /* The AAPCS says that conditional calls should not be
23203                  used since they make interworking inefficient (the
23204                  linker can't transform BL<cond> into BLX).  That's
23205                  only a problem if the machine has BLX.  */
23206               if (arm_arch5)
23207                 {
23208                   fail = TRUE;
23209                   break;
23210                 }
23211
23212               /* Succeed if the following insn is the target label, or
23213                  if the following two insns are a barrier and the
23214                  target label.  */
23215               this_insn = next_nonnote_insn (this_insn);
23216               if (this_insn && BARRIER_P (this_insn))
23217                 this_insn = next_nonnote_insn (this_insn);
23218
23219               if (this_insn && this_insn == label
23220                   && insns_skipped < max_insns_skipped)
23221                 {
23222                   arm_ccfsm_state = 1;
23223                   succeed = TRUE;
23224                 }
23225               else
23226                 fail = TRUE;
23227               break;
23228
23229             case JUMP_INSN:
23230               /* If this is an unconditional branch to the same label, succeed.
23231                  If it is to another label, do nothing.  If it is conditional,
23232                  fail.  */
23233               /* XXX Probably, the tests for SET and the PC are
23234                  unnecessary.  */
23235
23236               scanbody = PATTERN (this_insn);
23237               if (GET_CODE (scanbody) == SET
23238                   && GET_CODE (SET_DEST (scanbody)) == PC)
23239                 {
23240                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23241                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23242                     {
23243                       arm_ccfsm_state = 2;
23244                       succeed = TRUE;
23245                     }
23246                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23247                     fail = TRUE;
23248                 }
23249               /* Fail if a conditional return is undesirable (e.g. on a
23250                  StrongARM), but still allow this if optimizing for size.  */
23251               else if (GET_CODE (scanbody) == return_code
23252                        && !use_return_insn (TRUE, NULL)
23253                        && !optimize_size)
23254                 fail = TRUE;
23255               else if (GET_CODE (scanbody) == return_code)
23256                 {
23257                   arm_ccfsm_state = 2;
23258                   succeed = TRUE;
23259                 }
23260               else if (GET_CODE (scanbody) == PARALLEL)
23261                 {
23262                   switch (get_attr_conds (this_insn))
23263                     {
23264                     case CONDS_NOCOND:
23265                       break;
23266                     default:
23267                       fail = TRUE;
23268                       break;
23269                     }
23270                 }
23271               else
23272                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23273
23274               break;
23275
23276             case INSN:
23277               /* Instructions using or affecting the condition codes make it
23278                  fail.  */
23279               scanbody = PATTERN (this_insn);
23280               if (!(GET_CODE (scanbody) == SET
23281                     || GET_CODE (scanbody) == PARALLEL)
23282                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23283                 fail = TRUE;
23284               break;
23285
23286             default:
23287               break;
23288             }
23289         }
23290       if (succeed)
23291         {
23292           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23293             arm_target_label = CODE_LABEL_NUMBER (label);
23294           else
23295             {
23296               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23297
23298               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23299                 {
23300                   this_insn = next_nonnote_insn (this_insn);
23301                   gcc_assert (!this_insn
23302                               || (!BARRIER_P (this_insn)
23303                                   && !LABEL_P (this_insn)));
23304                 }
23305               if (!this_insn)
23306                 {
23307                   /* Oh, dear! we ran off the end.. give up.  */
23308                   extract_constrain_insn_cached (insn);
23309                   arm_ccfsm_state = 0;
23310                   arm_target_insn = NULL;
23311                   return;
23312                 }
23313               arm_target_insn = this_insn;
23314             }
23315
23316           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23317              what it was.  */
23318           if (!reverse)
23319             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23320
23321           if (reverse || then_not_else)
23322             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23323         }
23324
23325       /* Restore recog_data (getting the attributes of other insns can
23326          destroy this array, but final.c assumes that it remains intact
23327          across this call.  */
23328       extract_constrain_insn_cached (insn);
23329     }
23330 }
23331
23332 /* Output IT instructions.  */
23333 void
23334 thumb2_asm_output_opcode (FILE * stream)
23335 {
23336   char buff[5];
23337   int n;
23338
23339   if (arm_condexec_mask)
23340     {
23341       for (n = 0; n < arm_condexec_masklen; n++)
23342         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23343       buff[n] = 0;
23344       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23345                   arm_condition_codes[arm_current_cc]);
23346       arm_condexec_mask = 0;
23347     }
23348 }
23349
23350 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23351 static bool
23352 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23353 {
23354   if (GET_MODE_CLASS (mode) == MODE_CC)
23355     return (regno == CC_REGNUM
23356             || (TARGET_HARD_FLOAT
23357                 && regno == VFPCC_REGNUM));
23358
23359   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23360     return false;
23361
23362   if (TARGET_THUMB1)
23363     /* For the Thumb we only allow values bigger than SImode in
23364        registers 0 - 6, so that there is always a second low
23365        register available to hold the upper part of the value.
23366        We probably we ought to ensure that the register is the
23367        start of an even numbered register pair.  */
23368     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23369
23370   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23371     {
23372       if (mode == SFmode || mode == SImode)
23373         return VFP_REGNO_OK_FOR_SINGLE (regno);
23374
23375       if (mode == DFmode)
23376         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23377
23378       if (mode == HFmode)
23379         return VFP_REGNO_OK_FOR_SINGLE (regno);
23380
23381       /* VFP registers can hold HImode values.  */
23382       if (mode == HImode)
23383         return VFP_REGNO_OK_FOR_SINGLE (regno);
23384
23385       if (TARGET_NEON)
23386         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23387                || (VALID_NEON_QREG_MODE (mode)
23388                    && NEON_REGNO_OK_FOR_QUAD (regno))
23389                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23390                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23391                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23392                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23393                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23394
23395       return false;
23396     }
23397
23398   if (TARGET_REALLY_IWMMXT)
23399     {
23400       if (IS_IWMMXT_GR_REGNUM (regno))
23401         return mode == SImode;
23402
23403       if (IS_IWMMXT_REGNUM (regno))
23404         return VALID_IWMMXT_REG_MODE (mode);
23405     }
23406
23407   /* We allow almost any value to be stored in the general registers.
23408      Restrict doubleword quantities to even register pairs in ARM state
23409      so that we can use ldrd.  Do not allow very large Neon structure
23410      opaque modes in general registers; they would use too many.  */
23411   if (regno <= LAST_ARM_REGNUM)
23412     {
23413       if (ARM_NUM_REGS (mode) > 4)
23414         return false;
23415
23416       if (TARGET_THUMB2)
23417         return true;
23418
23419       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23420     }
23421
23422   if (regno == FRAME_POINTER_REGNUM
23423       || regno == ARG_POINTER_REGNUM)
23424     /* We only allow integers in the fake hard registers.  */
23425     return GET_MODE_CLASS (mode) == MODE_INT;
23426
23427   return false;
23428 }
23429
23430 /* Implement MODES_TIEABLE_P.  */
23431
23432 bool
23433 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23434 {
23435   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23436     return true;
23437
23438   /* We specifically want to allow elements of "structure" modes to
23439      be tieable to the structure.  This more general condition allows
23440      other rarer situations too.  */
23441   if (TARGET_NEON
23442       && (VALID_NEON_DREG_MODE (mode1)
23443           || VALID_NEON_QREG_MODE (mode1)
23444           || VALID_NEON_STRUCT_MODE (mode1))
23445       && (VALID_NEON_DREG_MODE (mode2)
23446           || VALID_NEON_QREG_MODE (mode2)
23447           || VALID_NEON_STRUCT_MODE (mode2)))
23448     return true;
23449
23450   return false;
23451 }
23452
23453 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23454    not used in arm mode.  */
23455
23456 enum reg_class
23457 arm_regno_class (int regno)
23458 {
23459   if (regno == PC_REGNUM)
23460     return NO_REGS;
23461
23462   if (TARGET_THUMB1)
23463     {
23464       if (regno == STACK_POINTER_REGNUM)
23465         return STACK_REG;
23466       if (regno == CC_REGNUM)
23467         return CC_REG;
23468       if (regno < 8)
23469         return LO_REGS;
23470       return HI_REGS;
23471     }
23472
23473   if (TARGET_THUMB2 && regno < 8)
23474     return LO_REGS;
23475
23476   if (   regno <= LAST_ARM_REGNUM
23477       || regno == FRAME_POINTER_REGNUM
23478       || regno == ARG_POINTER_REGNUM)
23479     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23480
23481   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23482     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23483
23484   if (IS_VFP_REGNUM (regno))
23485     {
23486       if (regno <= D7_VFP_REGNUM)
23487         return VFP_D0_D7_REGS;
23488       else if (regno <= LAST_LO_VFP_REGNUM)
23489         return VFP_LO_REGS;
23490       else
23491         return VFP_HI_REGS;
23492     }
23493
23494   if (IS_IWMMXT_REGNUM (regno))
23495     return IWMMXT_REGS;
23496
23497   if (IS_IWMMXT_GR_REGNUM (regno))
23498     return IWMMXT_GR_REGS;
23499
23500   return NO_REGS;
23501 }
23502
23503 /* Handle a special case when computing the offset
23504    of an argument from the frame pointer.  */
23505 int
23506 arm_debugger_arg_offset (int value, rtx addr)
23507 {
23508   rtx_insn *insn;
23509
23510   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23511   if (value != 0)
23512     return 0;
23513
23514   /* We can only cope with the case where the address is held in a register.  */
23515   if (!REG_P (addr))
23516     return 0;
23517
23518   /* If we are using the frame pointer to point at the argument, then
23519      an offset of 0 is correct.  */
23520   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23521     return 0;
23522
23523   /* If we are using the stack pointer to point at the
23524      argument, then an offset of 0 is correct.  */
23525   /* ??? Check this is consistent with thumb2 frame layout.  */
23526   if ((TARGET_THUMB || !frame_pointer_needed)
23527       && REGNO (addr) == SP_REGNUM)
23528     return 0;
23529
23530   /* Oh dear.  The argument is pointed to by a register rather
23531      than being held in a register, or being stored at a known
23532      offset from the frame pointer.  Since GDB only understands
23533      those two kinds of argument we must translate the address
23534      held in the register into an offset from the frame pointer.
23535      We do this by searching through the insns for the function
23536      looking to see where this register gets its value.  If the
23537      register is initialized from the frame pointer plus an offset
23538      then we are in luck and we can continue, otherwise we give up.
23539
23540      This code is exercised by producing debugging information
23541      for a function with arguments like this:
23542
23543            double func (double a, double b, int c, double d) {return d;}
23544
23545      Without this code the stab for parameter 'd' will be set to
23546      an offset of 0 from the frame pointer, rather than 8.  */
23547
23548   /* The if() statement says:
23549
23550      If the insn is a normal instruction
23551      and if the insn is setting the value in a register
23552      and if the register being set is the register holding the address of the argument
23553      and if the address is computing by an addition
23554      that involves adding to a register
23555      which is the frame pointer
23556      a constant integer
23557
23558      then...  */
23559
23560   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23561     {
23562       if (   NONJUMP_INSN_P (insn)
23563           && GET_CODE (PATTERN (insn)) == SET
23564           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23565           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23566           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23567           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23568           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23569              )
23570         {
23571           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23572
23573           break;
23574         }
23575     }
23576
23577   if (value == 0)
23578     {
23579       debug_rtx (addr);
23580       warning (0, "unable to compute real location of stacked parameter");
23581       value = 8; /* XXX magic hack */
23582     }
23583
23584   return value;
23585 }
23586 \f
23587 /* Implement TARGET_PROMOTED_TYPE.  */
23588
23589 static tree
23590 arm_promoted_type (const_tree t)
23591 {
23592   if (SCALAR_FLOAT_TYPE_P (t)
23593       && TYPE_PRECISION (t) == 16
23594       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23595     return float_type_node;
23596   return NULL_TREE;
23597 }
23598
23599 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23600    This simply adds HFmode as a supported mode; even though we don't
23601    implement arithmetic on this type directly, it's supported by
23602    optabs conversions, much the way the double-word arithmetic is
23603    special-cased in the default hook.  */
23604
23605 static bool
23606 arm_scalar_mode_supported_p (scalar_mode mode)
23607 {
23608   if (mode == HFmode)
23609     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23610   else if (ALL_FIXED_POINT_MODE_P (mode))
23611     return true;
23612   else
23613     return default_scalar_mode_supported_p (mode);
23614 }
23615
23616 /* Set the value of FLT_EVAL_METHOD.
23617    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23618
23619     0: evaluate all operations and constants, whose semantic type has at
23620        most the range and precision of type float, to the range and
23621        precision of float; evaluate all other operations and constants to
23622        the range and precision of the semantic type;
23623
23624     N, where _FloatN is a supported interchange floating type
23625        evaluate all operations and constants, whose semantic type has at
23626        most the range and precision of _FloatN type, to the range and
23627        precision of the _FloatN type; evaluate all other operations and
23628        constants to the range and precision of the semantic type;
23629
23630    If we have the ARMv8.2-A extensions then we support _Float16 in native
23631    precision, so we should set this to 16.  Otherwise, we support the type,
23632    but want to evaluate expressions in float precision, so set this to
23633    0.  */
23634
23635 static enum flt_eval_method
23636 arm_excess_precision (enum excess_precision_type type)
23637 {
23638   switch (type)
23639     {
23640       case EXCESS_PRECISION_TYPE_FAST:
23641       case EXCESS_PRECISION_TYPE_STANDARD:
23642         /* We can calculate either in 16-bit range and precision or
23643            32-bit range and precision.  Make that decision based on whether
23644            we have native support for the ARMv8.2-A 16-bit floating-point
23645            instructions or not.  */
23646         return (TARGET_VFP_FP16INST
23647                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23648                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23649       case EXCESS_PRECISION_TYPE_IMPLICIT:
23650         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23651       default:
23652         gcc_unreachable ();
23653     }
23654   return FLT_EVAL_METHOD_UNPREDICTABLE;
23655 }
23656
23657
23658 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23659    _Float16 if we are using anything other than ieee format for 16-bit
23660    floating point.  Otherwise, punt to the default implementation.  */
23661 static opt_scalar_float_mode
23662 arm_floatn_mode (int n, bool extended)
23663 {
23664   if (!extended && n == 16)
23665     {
23666       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23667         return HFmode;
23668       return opt_scalar_float_mode ();
23669     }
23670
23671   return default_floatn_mode (n, extended);
23672 }
23673
23674
23675 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23676    not to early-clobber SRC registers in the process.
23677
23678    We assume that the operands described by SRC and DEST represent a
23679    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23680    number of components into which the copy has been decomposed.  */
23681 void
23682 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23683 {
23684   unsigned int i;
23685
23686   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23687       || REGNO (operands[0]) < REGNO (operands[1]))
23688     {
23689       for (i = 0; i < count; i++)
23690         {
23691           operands[2 * i] = dest[i];
23692           operands[2 * i + 1] = src[i];
23693         }
23694     }
23695   else
23696     {
23697       for (i = 0; i < count; i++)
23698         {
23699           operands[2 * i] = dest[count - i - 1];
23700           operands[2 * i + 1] = src[count - i - 1];
23701         }
23702     }
23703 }
23704
23705 /* Split operands into moves from op[1] + op[2] into op[0].  */
23706
23707 void
23708 neon_split_vcombine (rtx operands[3])
23709 {
23710   unsigned int dest = REGNO (operands[0]);
23711   unsigned int src1 = REGNO (operands[1]);
23712   unsigned int src2 = REGNO (operands[2]);
23713   machine_mode halfmode = GET_MODE (operands[1]);
23714   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23715   rtx destlo, desthi;
23716
23717   if (src1 == dest && src2 == dest + halfregs)
23718     {
23719       /* No-op move.  Can't split to nothing; emit something.  */
23720       emit_note (NOTE_INSN_DELETED);
23721       return;
23722     }
23723
23724   /* Preserve register attributes for variable tracking.  */
23725   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23726   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23727                                GET_MODE_SIZE (halfmode));
23728
23729   /* Special case of reversed high/low parts.  Use VSWP.  */
23730   if (src2 == dest && src1 == dest + halfregs)
23731     {
23732       rtx x = gen_rtx_SET (destlo, operands[1]);
23733       rtx y = gen_rtx_SET (desthi, operands[2]);
23734       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23735       return;
23736     }
23737
23738   if (!reg_overlap_mentioned_p (operands[2], destlo))
23739     {
23740       /* Try to avoid unnecessary moves if part of the result
23741          is in the right place already.  */
23742       if (src1 != dest)
23743         emit_move_insn (destlo, operands[1]);
23744       if (src2 != dest + halfregs)
23745         emit_move_insn (desthi, operands[2]);
23746     }
23747   else
23748     {
23749       if (src2 != dest + halfregs)
23750         emit_move_insn (desthi, operands[2]);
23751       if (src1 != dest)
23752         emit_move_insn (destlo, operands[1]);
23753     }
23754 }
23755 \f
23756 /* Return the number (counting from 0) of
23757    the least significant set bit in MASK.  */
23758
23759 inline static int
23760 number_of_first_bit_set (unsigned mask)
23761 {
23762   return ctz_hwi (mask);
23763 }
23764
23765 /* Like emit_multi_reg_push, but allowing for a different set of
23766    registers to be described as saved.  MASK is the set of registers
23767    to be saved; REAL_REGS is the set of registers to be described as
23768    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23769
23770 static rtx_insn *
23771 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23772 {
23773   unsigned long regno;
23774   rtx par[10], tmp, reg;
23775   rtx_insn *insn;
23776   int i, j;
23777
23778   /* Build the parallel of the registers actually being stored.  */
23779   for (i = 0; mask; ++i, mask &= mask - 1)
23780     {
23781       regno = ctz_hwi (mask);
23782       reg = gen_rtx_REG (SImode, regno);
23783
23784       if (i == 0)
23785         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23786       else
23787         tmp = gen_rtx_USE (VOIDmode, reg);
23788
23789       par[i] = tmp;
23790     }
23791
23792   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23793   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23794   tmp = gen_frame_mem (BLKmode, tmp);
23795   tmp = gen_rtx_SET (tmp, par[0]);
23796   par[0] = tmp;
23797
23798   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23799   insn = emit_insn (tmp);
23800
23801   /* Always build the stack adjustment note for unwind info.  */
23802   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23803   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23804   par[0] = tmp;
23805
23806   /* Build the parallel of the registers recorded as saved for unwind.  */
23807   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23808     {
23809       regno = ctz_hwi (real_regs);
23810       reg = gen_rtx_REG (SImode, regno);
23811
23812       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23813       tmp = gen_frame_mem (SImode, tmp);
23814       tmp = gen_rtx_SET (tmp, reg);
23815       RTX_FRAME_RELATED_P (tmp) = 1;
23816       par[j + 1] = tmp;
23817     }
23818
23819   if (j == 0)
23820     tmp = par[0];
23821   else
23822     {
23823       RTX_FRAME_RELATED_P (par[0]) = 1;
23824       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23825     }
23826
23827   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23828
23829   return insn;
23830 }
23831
23832 /* Emit code to push or pop registers to or from the stack.  F is the
23833    assembly file.  MASK is the registers to pop.  */
23834 static void
23835 thumb_pop (FILE *f, unsigned long mask)
23836 {
23837   int regno;
23838   int lo_mask = mask & 0xFF;
23839
23840   gcc_assert (mask);
23841
23842   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23843     {
23844       /* Special case.  Do not generate a POP PC statement here, do it in
23845          thumb_exit() */
23846       thumb_exit (f, -1);
23847       return;
23848     }
23849
23850   fprintf (f, "\tpop\t{");
23851
23852   /* Look at the low registers first.  */
23853   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23854     {
23855       if (lo_mask & 1)
23856         {
23857           asm_fprintf (f, "%r", regno);
23858
23859           if ((lo_mask & ~1) != 0)
23860             fprintf (f, ", ");
23861         }
23862     }
23863
23864   if (mask & (1 << PC_REGNUM))
23865     {
23866       /* Catch popping the PC.  */
23867       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23868           || IS_CMSE_ENTRY (arm_current_func_type ()))
23869         {
23870           /* The PC is never poped directly, instead
23871              it is popped into r3 and then BX is used.  */
23872           fprintf (f, "}\n");
23873
23874           thumb_exit (f, -1);
23875
23876           return;
23877         }
23878       else
23879         {
23880           if (mask & 0xFF)
23881             fprintf (f, ", ");
23882
23883           asm_fprintf (f, "%r", PC_REGNUM);
23884         }
23885     }
23886
23887   fprintf (f, "}\n");
23888 }
23889
23890 /* Generate code to return from a thumb function.
23891    If 'reg_containing_return_addr' is -1, then the return address is
23892    actually on the stack, at the stack pointer.  */
23893 static void
23894 thumb_exit (FILE *f, int reg_containing_return_addr)
23895 {
23896   unsigned regs_available_for_popping;
23897   unsigned regs_to_pop;
23898   int pops_needed;
23899   unsigned available;
23900   unsigned required;
23901   machine_mode mode;
23902   int size;
23903   int restore_a4 = FALSE;
23904
23905   /* Compute the registers we need to pop.  */
23906   regs_to_pop = 0;
23907   pops_needed = 0;
23908
23909   if (reg_containing_return_addr == -1)
23910     {
23911       regs_to_pop |= 1 << LR_REGNUM;
23912       ++pops_needed;
23913     }
23914
23915   if (TARGET_BACKTRACE)
23916     {
23917       /* Restore the (ARM) frame pointer and stack pointer.  */
23918       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23919       pops_needed += 2;
23920     }
23921
23922   /* If there is nothing to pop then just emit the BX instruction and
23923      return.  */
23924   if (pops_needed == 0)
23925     {
23926       if (crtl->calls_eh_return)
23927         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23928
23929       if (IS_CMSE_ENTRY (arm_current_func_type ()))
23930         {
23931           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23932                        reg_containing_return_addr);
23933           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23934         }
23935       else
23936         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23937       return;
23938     }
23939   /* Otherwise if we are not supporting interworking and we have not created
23940      a backtrace structure and the function was not entered in ARM mode then
23941      just pop the return address straight into the PC.  */
23942   else if (!TARGET_INTERWORK
23943            && !TARGET_BACKTRACE
23944            && !is_called_in_ARM_mode (current_function_decl)
23945            && !crtl->calls_eh_return
23946            && !IS_CMSE_ENTRY (arm_current_func_type ()))
23947     {
23948       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23949       return;
23950     }
23951
23952   /* Find out how many of the (return) argument registers we can corrupt.  */
23953   regs_available_for_popping = 0;
23954
23955   /* If returning via __builtin_eh_return, the bottom three registers
23956      all contain information needed for the return.  */
23957   if (crtl->calls_eh_return)
23958     size = 12;
23959   else
23960     {
23961       /* If we can deduce the registers used from the function's
23962          return value.  This is more reliable that examining
23963          df_regs_ever_live_p () because that will be set if the register is
23964          ever used in the function, not just if the register is used
23965          to hold a return value.  */
23966
23967       if (crtl->return_rtx != 0)
23968         mode = GET_MODE (crtl->return_rtx);
23969       else
23970         mode = DECL_MODE (DECL_RESULT (current_function_decl));
23971
23972       size = GET_MODE_SIZE (mode);
23973
23974       if (size == 0)
23975         {
23976           /* In a void function we can use any argument register.
23977              In a function that returns a structure on the stack
23978              we can use the second and third argument registers.  */
23979           if (mode == VOIDmode)
23980             regs_available_for_popping =
23981               (1 << ARG_REGISTER (1))
23982               | (1 << ARG_REGISTER (2))
23983               | (1 << ARG_REGISTER (3));
23984           else
23985             regs_available_for_popping =
23986               (1 << ARG_REGISTER (2))
23987               | (1 << ARG_REGISTER (3));
23988         }
23989       else if (size <= 4)
23990         regs_available_for_popping =
23991           (1 << ARG_REGISTER (2))
23992           | (1 << ARG_REGISTER (3));
23993       else if (size <= 8)
23994         regs_available_for_popping =
23995           (1 << ARG_REGISTER (3));
23996     }
23997
23998   /* Match registers to be popped with registers into which we pop them.  */
23999   for (available = regs_available_for_popping,
24000        required  = regs_to_pop;
24001        required != 0 && available != 0;
24002        available &= ~(available & - available),
24003        required  &= ~(required  & - required))
24004     -- pops_needed;
24005
24006   /* If we have any popping registers left over, remove them.  */
24007   if (available > 0)
24008     regs_available_for_popping &= ~available;
24009
24010   /* Otherwise if we need another popping register we can use
24011      the fourth argument register.  */
24012   else if (pops_needed)
24013     {
24014       /* If we have not found any free argument registers and
24015          reg a4 contains the return address, we must move it.  */
24016       if (regs_available_for_popping == 0
24017           && reg_containing_return_addr == LAST_ARG_REGNUM)
24018         {
24019           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24020           reg_containing_return_addr = LR_REGNUM;
24021         }
24022       else if (size > 12)
24023         {
24024           /* Register a4 is being used to hold part of the return value,
24025              but we have dire need of a free, low register.  */
24026           restore_a4 = TRUE;
24027
24028           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24029         }
24030
24031       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24032         {
24033           /* The fourth argument register is available.  */
24034           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24035
24036           --pops_needed;
24037         }
24038     }
24039
24040   /* Pop as many registers as we can.  */
24041   thumb_pop (f, regs_available_for_popping);
24042
24043   /* Process the registers we popped.  */
24044   if (reg_containing_return_addr == -1)
24045     {
24046       /* The return address was popped into the lowest numbered register.  */
24047       regs_to_pop &= ~(1 << LR_REGNUM);
24048
24049       reg_containing_return_addr =
24050         number_of_first_bit_set (regs_available_for_popping);
24051
24052       /* Remove this register for the mask of available registers, so that
24053          the return address will not be corrupted by further pops.  */
24054       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24055     }
24056
24057   /* If we popped other registers then handle them here.  */
24058   if (regs_available_for_popping)
24059     {
24060       int frame_pointer;
24061
24062       /* Work out which register currently contains the frame pointer.  */
24063       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24064
24065       /* Move it into the correct place.  */
24066       asm_fprintf (f, "\tmov\t%r, %r\n",
24067                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24068
24069       /* (Temporarily) remove it from the mask of popped registers.  */
24070       regs_available_for_popping &= ~(1 << frame_pointer);
24071       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24072
24073       if (regs_available_for_popping)
24074         {
24075           int stack_pointer;
24076
24077           /* We popped the stack pointer as well,
24078              find the register that contains it.  */
24079           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24080
24081           /* Move it into the stack register.  */
24082           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24083
24084           /* At this point we have popped all necessary registers, so
24085              do not worry about restoring regs_available_for_popping
24086              to its correct value:
24087
24088              assert (pops_needed == 0)
24089              assert (regs_available_for_popping == (1 << frame_pointer))
24090              assert (regs_to_pop == (1 << STACK_POINTER))  */
24091         }
24092       else
24093         {
24094           /* Since we have just move the popped value into the frame
24095              pointer, the popping register is available for reuse, and
24096              we know that we still have the stack pointer left to pop.  */
24097           regs_available_for_popping |= (1 << frame_pointer);
24098         }
24099     }
24100
24101   /* If we still have registers left on the stack, but we no longer have
24102      any registers into which we can pop them, then we must move the return
24103      address into the link register and make available the register that
24104      contained it.  */
24105   if (regs_available_for_popping == 0 && pops_needed > 0)
24106     {
24107       regs_available_for_popping |= 1 << reg_containing_return_addr;
24108
24109       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24110                    reg_containing_return_addr);
24111
24112       reg_containing_return_addr = LR_REGNUM;
24113     }
24114
24115   /* If we have registers left on the stack then pop some more.
24116      We know that at most we will want to pop FP and SP.  */
24117   if (pops_needed > 0)
24118     {
24119       int  popped_into;
24120       int  move_to;
24121
24122       thumb_pop (f, regs_available_for_popping);
24123
24124       /* We have popped either FP or SP.
24125          Move whichever one it is into the correct register.  */
24126       popped_into = number_of_first_bit_set (regs_available_for_popping);
24127       move_to     = number_of_first_bit_set (regs_to_pop);
24128
24129       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24130       --pops_needed;
24131     }
24132
24133   /* If we still have not popped everything then we must have only
24134      had one register available to us and we are now popping the SP.  */
24135   if (pops_needed > 0)
24136     {
24137       int  popped_into;
24138
24139       thumb_pop (f, regs_available_for_popping);
24140
24141       popped_into = number_of_first_bit_set (regs_available_for_popping);
24142
24143       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24144       /*
24145         assert (regs_to_pop == (1 << STACK_POINTER))
24146         assert (pops_needed == 1)
24147       */
24148     }
24149
24150   /* If necessary restore the a4 register.  */
24151   if (restore_a4)
24152     {
24153       if (reg_containing_return_addr != LR_REGNUM)
24154         {
24155           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24156           reg_containing_return_addr = LR_REGNUM;
24157         }
24158
24159       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24160     }
24161
24162   if (crtl->calls_eh_return)
24163     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24164
24165   /* Return to caller.  */
24166   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24167     {
24168       /* This is for the cases where LR is not being used to contain the return
24169          address.  It may therefore contain information that we might not want
24170          to leak, hence it must be cleared.  The value in R0 will never be a
24171          secret at this point, so it is safe to use it, see the clearing code
24172          in 'cmse_nonsecure_entry_clear_before_return'.  */
24173       if (reg_containing_return_addr != LR_REGNUM)
24174         asm_fprintf (f, "\tmov\tlr, r0\n");
24175
24176       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24177       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24178     }
24179   else
24180     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24181 }
24182 \f
24183 /* Scan INSN just before assembler is output for it.
24184    For Thumb-1, we track the status of the condition codes; this
24185    information is used in the cbranchsi4_insn pattern.  */
24186 void
24187 thumb1_final_prescan_insn (rtx_insn *insn)
24188 {
24189   if (flag_print_asm_name)
24190     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24191                  INSN_ADDRESSES (INSN_UID (insn)));
24192   /* Don't overwrite the previous setter when we get to a cbranch.  */
24193   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24194     {
24195       enum attr_conds conds;
24196
24197       if (cfun->machine->thumb1_cc_insn)
24198         {
24199           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24200               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24201             CC_STATUS_INIT;
24202         }
24203       conds = get_attr_conds (insn);
24204       if (conds == CONDS_SET)
24205         {
24206           rtx set = single_set (insn);
24207           cfun->machine->thumb1_cc_insn = insn;
24208           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24209           cfun->machine->thumb1_cc_op1 = const0_rtx;
24210           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24211           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24212             {
24213               rtx src1 = XEXP (SET_SRC (set), 1);
24214               if (src1 == const0_rtx)
24215                 cfun->machine->thumb1_cc_mode = CCmode;
24216             }
24217           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24218             {
24219               /* Record the src register operand instead of dest because
24220                  cprop_hardreg pass propagates src.  */
24221               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24222             }
24223         }
24224       else if (conds != CONDS_NOCOND)
24225         cfun->machine->thumb1_cc_insn = NULL_RTX;
24226     }
24227
24228     /* Check if unexpected far jump is used.  */
24229     if (cfun->machine->lr_save_eliminated
24230         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24231       internal_error("Unexpected thumb1 far jump");
24232 }
24233
24234 int
24235 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24236 {
24237   unsigned HOST_WIDE_INT mask = 0xff;
24238   int i;
24239
24240   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24241   if (val == 0) /* XXX */
24242     return 0;
24243
24244   for (i = 0; i < 25; i++)
24245     if ((val & (mask << i)) == val)
24246       return 1;
24247
24248   return 0;
24249 }
24250
24251 /* Returns nonzero if the current function contains,
24252    or might contain a far jump.  */
24253 static int
24254 thumb_far_jump_used_p (void)
24255 {
24256   rtx_insn *insn;
24257   bool far_jump = false;
24258   unsigned int func_size = 0;
24259
24260   /* If we have already decided that far jumps may be used,
24261      do not bother checking again, and always return true even if
24262      it turns out that they are not being used.  Once we have made
24263      the decision that far jumps are present (and that hence the link
24264      register will be pushed onto the stack) we cannot go back on it.  */
24265   if (cfun->machine->far_jump_used)
24266     return 1;
24267
24268   /* If this function is not being called from the prologue/epilogue
24269      generation code then it must be being called from the
24270      INITIAL_ELIMINATION_OFFSET macro.  */
24271   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24272     {
24273       /* In this case we know that we are being asked about the elimination
24274          of the arg pointer register.  If that register is not being used,
24275          then there are no arguments on the stack, and we do not have to
24276          worry that a far jump might force the prologue to push the link
24277          register, changing the stack offsets.  In this case we can just
24278          return false, since the presence of far jumps in the function will
24279          not affect stack offsets.
24280
24281          If the arg pointer is live (or if it was live, but has now been
24282          eliminated and so set to dead) then we do have to test to see if
24283          the function might contain a far jump.  This test can lead to some
24284          false negatives, since before reload is completed, then length of
24285          branch instructions is not known, so gcc defaults to returning their
24286          longest length, which in turn sets the far jump attribute to true.
24287
24288          A false negative will not result in bad code being generated, but it
24289          will result in a needless push and pop of the link register.  We
24290          hope that this does not occur too often.
24291
24292          If we need doubleword stack alignment this could affect the other
24293          elimination offsets so we can't risk getting it wrong.  */
24294       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24295         cfun->machine->arg_pointer_live = 1;
24296       else if (!cfun->machine->arg_pointer_live)
24297         return 0;
24298     }
24299
24300   /* We should not change far_jump_used during or after reload, as there is
24301      no chance to change stack frame layout.  */
24302   if (reload_in_progress || reload_completed)
24303     return 0;
24304
24305   /* Check to see if the function contains a branch
24306      insn with the far jump attribute set.  */
24307   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24308     {
24309       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24310         {
24311           far_jump = true;
24312         }
24313       func_size += get_attr_length (insn);
24314     }
24315
24316   /* Attribute far_jump will always be true for thumb1 before
24317      shorten_branch pass.  So checking far_jump attribute before
24318      shorten_branch isn't much useful.
24319
24320      Following heuristic tries to estimate more accurately if a far jump
24321      may finally be used.  The heuristic is very conservative as there is
24322      no chance to roll-back the decision of not to use far jump.
24323
24324      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24325      2-byte insn is associated with a 4 byte constant pool.  Using
24326      function size 2048/3 as the threshold is conservative enough.  */
24327   if (far_jump)
24328     {
24329       if ((func_size * 3) >= 2048)
24330         {
24331           /* Record the fact that we have decided that
24332              the function does use far jumps.  */
24333           cfun->machine->far_jump_used = 1;
24334           return 1;
24335         }
24336     }
24337
24338   return 0;
24339 }
24340
24341 /* Return nonzero if FUNC must be entered in ARM mode.  */
24342 static bool
24343 is_called_in_ARM_mode (tree func)
24344 {
24345   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24346
24347   /* Ignore the problem about functions whose address is taken.  */
24348   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24349     return true;
24350
24351 #ifdef ARM_PE
24352   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24353 #else
24354   return false;
24355 #endif
24356 }
24357
24358 /* Given the stack offsets and register mask in OFFSETS, decide how
24359    many additional registers to push instead of subtracting a constant
24360    from SP.  For epilogues the principle is the same except we use pop.
24361    FOR_PROLOGUE indicates which we're generating.  */
24362 static int
24363 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24364 {
24365   HOST_WIDE_INT amount;
24366   unsigned long live_regs_mask = offsets->saved_regs_mask;
24367   /* Extract a mask of the ones we can give to the Thumb's push/pop
24368      instruction.  */
24369   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24370   /* Then count how many other high registers will need to be pushed.  */
24371   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24372   int n_free, reg_base, size;
24373
24374   if (!for_prologue && frame_pointer_needed)
24375     amount = offsets->locals_base - offsets->saved_regs;
24376   else
24377     amount = offsets->outgoing_args - offsets->saved_regs;
24378
24379   /* If the stack frame size is 512 exactly, we can save one load
24380      instruction, which should make this a win even when optimizing
24381      for speed.  */
24382   if (!optimize_size && amount != 512)
24383     return 0;
24384
24385   /* Can't do this if there are high registers to push.  */
24386   if (high_regs_pushed != 0)
24387     return 0;
24388
24389   /* Shouldn't do it in the prologue if no registers would normally
24390      be pushed at all.  In the epilogue, also allow it if we'll have
24391      a pop insn for the PC.  */
24392   if  (l_mask == 0
24393        && (for_prologue
24394            || TARGET_BACKTRACE
24395            || (live_regs_mask & 1 << LR_REGNUM) == 0
24396            || TARGET_INTERWORK
24397            || crtl->args.pretend_args_size != 0))
24398     return 0;
24399
24400   /* Don't do this if thumb_expand_prologue wants to emit instructions
24401      between the push and the stack frame allocation.  */
24402   if (for_prologue
24403       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24404           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24405     return 0;
24406
24407   reg_base = 0;
24408   n_free = 0;
24409   if (!for_prologue)
24410     {
24411       size = arm_size_return_regs ();
24412       reg_base = ARM_NUM_INTS (size);
24413       live_regs_mask >>= reg_base;
24414     }
24415
24416   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24417          && (for_prologue || call_used_regs[reg_base + n_free]))
24418     {
24419       live_regs_mask >>= 1;
24420       n_free++;
24421     }
24422
24423   if (n_free == 0)
24424     return 0;
24425   gcc_assert (amount / 4 * 4 == amount);
24426
24427   if (amount >= 512 && (amount - n_free * 4) < 512)
24428     return (amount - 508) / 4;
24429   if (amount <= n_free * 4)
24430     return amount / 4;
24431   return 0;
24432 }
24433
24434 /* The bits which aren't usefully expanded as rtl.  */
24435 const char *
24436 thumb1_unexpanded_epilogue (void)
24437 {
24438   arm_stack_offsets *offsets;
24439   int regno;
24440   unsigned long live_regs_mask = 0;
24441   int high_regs_pushed = 0;
24442   int extra_pop;
24443   int had_to_push_lr;
24444   int size;
24445
24446   if (cfun->machine->return_used_this_function != 0)
24447     return "";
24448
24449   if (IS_NAKED (arm_current_func_type ()))
24450     return "";
24451
24452   offsets = arm_get_frame_offsets ();
24453   live_regs_mask = offsets->saved_regs_mask;
24454   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24455
24456   /* If we can deduce the registers used from the function's return value.
24457      This is more reliable that examining df_regs_ever_live_p () because that
24458      will be set if the register is ever used in the function, not just if
24459      the register is used to hold a return value.  */
24460   size = arm_size_return_regs ();
24461
24462   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24463   if (extra_pop > 0)
24464     {
24465       unsigned long extra_mask = (1 << extra_pop) - 1;
24466       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24467     }
24468
24469   /* The prolog may have pushed some high registers to use as
24470      work registers.  e.g. the testsuite file:
24471      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24472      compiles to produce:
24473         push    {r4, r5, r6, r7, lr}
24474         mov     r7, r9
24475         mov     r6, r8
24476         push    {r6, r7}
24477      as part of the prolog.  We have to undo that pushing here.  */
24478
24479   if (high_regs_pushed)
24480     {
24481       unsigned long mask = live_regs_mask & 0xff;
24482       int next_hi_reg;
24483
24484       /* The available low registers depend on the size of the value we are
24485          returning.  */
24486       if (size <= 12)
24487         mask |=  1 << 3;
24488       if (size <= 8)
24489         mask |= 1 << 2;
24490
24491       if (mask == 0)
24492         /* Oh dear!  We have no low registers into which we can pop
24493            high registers!  */
24494         internal_error
24495           ("no low registers available for popping high registers");
24496
24497       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24498         if (live_regs_mask & (1 << next_hi_reg))
24499           break;
24500
24501       while (high_regs_pushed)
24502         {
24503           /* Find lo register(s) into which the high register(s) can
24504              be popped.  */
24505           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24506             {
24507               if (mask & (1 << regno))
24508                 high_regs_pushed--;
24509               if (high_regs_pushed == 0)
24510                 break;
24511             }
24512
24513           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24514
24515           /* Pop the values into the low register(s).  */
24516           thumb_pop (asm_out_file, mask);
24517
24518           /* Move the value(s) into the high registers.  */
24519           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24520             {
24521               if (mask & (1 << regno))
24522                 {
24523                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24524                                regno);
24525
24526                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24527                     if (live_regs_mask & (1 << next_hi_reg))
24528                       break;
24529                 }
24530             }
24531         }
24532       live_regs_mask &= ~0x0f00;
24533     }
24534
24535   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24536   live_regs_mask &= 0xff;
24537
24538   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24539     {
24540       /* Pop the return address into the PC.  */
24541       if (had_to_push_lr)
24542         live_regs_mask |= 1 << PC_REGNUM;
24543
24544       /* Either no argument registers were pushed or a backtrace
24545          structure was created which includes an adjusted stack
24546          pointer, so just pop everything.  */
24547       if (live_regs_mask)
24548         thumb_pop (asm_out_file, live_regs_mask);
24549
24550       /* We have either just popped the return address into the
24551          PC or it is was kept in LR for the entire function.
24552          Note that thumb_pop has already called thumb_exit if the
24553          PC was in the list.  */
24554       if (!had_to_push_lr)
24555         thumb_exit (asm_out_file, LR_REGNUM);
24556     }
24557   else
24558     {
24559       /* Pop everything but the return address.  */
24560       if (live_regs_mask)
24561         thumb_pop (asm_out_file, live_regs_mask);
24562
24563       if (had_to_push_lr)
24564         {
24565           if (size > 12)
24566             {
24567               /* We have no free low regs, so save one.  */
24568               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24569                            LAST_ARG_REGNUM);
24570             }
24571
24572           /* Get the return address into a temporary register.  */
24573           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24574
24575           if (size > 12)
24576             {
24577               /* Move the return address to lr.  */
24578               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24579                            LAST_ARG_REGNUM);
24580               /* Restore the low register.  */
24581               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24582                            IP_REGNUM);
24583               regno = LR_REGNUM;
24584             }
24585           else
24586             regno = LAST_ARG_REGNUM;
24587         }
24588       else
24589         regno = LR_REGNUM;
24590
24591       /* Remove the argument registers that were pushed onto the stack.  */
24592       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24593                    SP_REGNUM, SP_REGNUM,
24594                    crtl->args.pretend_args_size);
24595
24596       thumb_exit (asm_out_file, regno);
24597     }
24598
24599   return "";
24600 }
24601
24602 /* Functions to save and restore machine-specific function data.  */
24603 static struct machine_function *
24604 arm_init_machine_status (void)
24605 {
24606   struct machine_function *machine;
24607   machine = ggc_cleared_alloc<machine_function> ();
24608
24609 #if ARM_FT_UNKNOWN != 0
24610   machine->func_type = ARM_FT_UNKNOWN;
24611 #endif
24612   return machine;
24613 }
24614
24615 /* Return an RTX indicating where the return address to the
24616    calling function can be found.  */
24617 rtx
24618 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24619 {
24620   if (count != 0)
24621     return NULL_RTX;
24622
24623   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24624 }
24625
24626 /* Do anything needed before RTL is emitted for each function.  */
24627 void
24628 arm_init_expanders (void)
24629 {
24630   /* Arrange to initialize and mark the machine per-function status.  */
24631   init_machine_status = arm_init_machine_status;
24632
24633   /* This is to stop the combine pass optimizing away the alignment
24634      adjustment of va_arg.  */
24635   /* ??? It is claimed that this should not be necessary.  */
24636   if (cfun)
24637     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24638 }
24639
24640 /* Check that FUNC is called with a different mode.  */
24641
24642 bool
24643 arm_change_mode_p (tree func)
24644 {
24645   if (TREE_CODE (func) != FUNCTION_DECL)
24646     return false;
24647
24648   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24649
24650   if (!callee_tree)
24651     callee_tree = target_option_default_node;
24652
24653   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24654   int flags = callee_opts->x_target_flags;
24655
24656   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24657 }
24658
24659 /* Like arm_compute_initial_elimination offset.  Simpler because there
24660    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24661    to point at the base of the local variables after static stack
24662    space for a function has been allocated.  */
24663
24664 HOST_WIDE_INT
24665 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24666 {
24667   arm_stack_offsets *offsets;
24668
24669   offsets = arm_get_frame_offsets ();
24670
24671   switch (from)
24672     {
24673     case ARG_POINTER_REGNUM:
24674       switch (to)
24675         {
24676         case STACK_POINTER_REGNUM:
24677           return offsets->outgoing_args - offsets->saved_args;
24678
24679         case FRAME_POINTER_REGNUM:
24680           return offsets->soft_frame - offsets->saved_args;
24681
24682         case ARM_HARD_FRAME_POINTER_REGNUM:
24683           return offsets->saved_regs - offsets->saved_args;
24684
24685         case THUMB_HARD_FRAME_POINTER_REGNUM:
24686           return offsets->locals_base - offsets->saved_args;
24687
24688         default:
24689           gcc_unreachable ();
24690         }
24691       break;
24692
24693     case FRAME_POINTER_REGNUM:
24694       switch (to)
24695         {
24696         case STACK_POINTER_REGNUM:
24697           return offsets->outgoing_args - offsets->soft_frame;
24698
24699         case ARM_HARD_FRAME_POINTER_REGNUM:
24700           return offsets->saved_regs - offsets->soft_frame;
24701
24702         case THUMB_HARD_FRAME_POINTER_REGNUM:
24703           return offsets->locals_base - offsets->soft_frame;
24704
24705         default:
24706           gcc_unreachable ();
24707         }
24708       break;
24709
24710     default:
24711       gcc_unreachable ();
24712     }
24713 }
24714
24715 /* Generate the function's prologue.  */
24716
24717 void
24718 thumb1_expand_prologue (void)
24719 {
24720   rtx_insn *insn;
24721
24722   HOST_WIDE_INT amount;
24723   HOST_WIDE_INT size;
24724   arm_stack_offsets *offsets;
24725   unsigned long func_type;
24726   int regno;
24727   unsigned long live_regs_mask;
24728   unsigned long l_mask;
24729   unsigned high_regs_pushed = 0;
24730   bool lr_needs_saving;
24731
24732   func_type = arm_current_func_type ();
24733
24734   /* Naked functions don't have prologues.  */
24735   if (IS_NAKED (func_type))
24736     {
24737       if (flag_stack_usage_info)
24738         current_function_static_stack_size = 0;
24739       return;
24740     }
24741
24742   if (IS_INTERRUPT (func_type))
24743     {
24744       error ("interrupt Service Routines cannot be coded in Thumb mode");
24745       return;
24746     }
24747
24748   if (is_called_in_ARM_mode (current_function_decl))
24749     emit_insn (gen_prologue_thumb1_interwork ());
24750
24751   offsets = arm_get_frame_offsets ();
24752   live_regs_mask = offsets->saved_regs_mask;
24753   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24754
24755   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24756   l_mask = live_regs_mask & 0x40ff;
24757   /* Then count how many other high registers will need to be pushed.  */
24758   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24759
24760   if (crtl->args.pretend_args_size)
24761     {
24762       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24763
24764       if (cfun->machine->uses_anonymous_args)
24765         {
24766           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24767           unsigned long mask;
24768
24769           mask = 1ul << (LAST_ARG_REGNUM + 1);
24770           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24771
24772           insn = thumb1_emit_multi_reg_push (mask, 0);
24773         }
24774       else
24775         {
24776           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24777                                         stack_pointer_rtx, x));
24778         }
24779       RTX_FRAME_RELATED_P (insn) = 1;
24780     }
24781
24782   if (TARGET_BACKTRACE)
24783     {
24784       HOST_WIDE_INT offset = 0;
24785       unsigned work_register;
24786       rtx work_reg, x, arm_hfp_rtx;
24787
24788       /* We have been asked to create a stack backtrace structure.
24789          The code looks like this:
24790
24791          0   .align 2
24792          0   func:
24793          0     sub   SP, #16         Reserve space for 4 registers.
24794          2     push  {R7}            Push low registers.
24795          4     add   R7, SP, #20     Get the stack pointer before the push.
24796          6     str   R7, [SP, #8]    Store the stack pointer
24797                                         (before reserving the space).
24798          8     mov   R7, PC          Get hold of the start of this code + 12.
24799         10     str   R7, [SP, #16]   Store it.
24800         12     mov   R7, FP          Get hold of the current frame pointer.
24801         14     str   R7, [SP, #4]    Store it.
24802         16     mov   R7, LR          Get hold of the current return address.
24803         18     str   R7, [SP, #12]   Store it.
24804         20     add   R7, SP, #16     Point at the start of the
24805                                         backtrace structure.
24806         22     mov   FP, R7          Put this value into the frame pointer.  */
24807
24808       work_register = thumb_find_work_register (live_regs_mask);
24809       work_reg = gen_rtx_REG (SImode, work_register);
24810       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24811
24812       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24813                                     stack_pointer_rtx, GEN_INT (-16)));
24814       RTX_FRAME_RELATED_P (insn) = 1;
24815
24816       if (l_mask)
24817         {
24818           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24819           RTX_FRAME_RELATED_P (insn) = 1;
24820           lr_needs_saving = false;
24821
24822           offset = bit_count (l_mask) * UNITS_PER_WORD;
24823         }
24824
24825       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24826       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24827
24828       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24829       x = gen_frame_mem (SImode, x);
24830       emit_move_insn (x, work_reg);
24831
24832       /* Make sure that the instruction fetching the PC is in the right place
24833          to calculate "start of backtrace creation code + 12".  */
24834       /* ??? The stores using the common WORK_REG ought to be enough to
24835          prevent the scheduler from doing anything weird.  Failing that
24836          we could always move all of the following into an UNSPEC_VOLATILE.  */
24837       if (l_mask)
24838         {
24839           x = gen_rtx_REG (SImode, PC_REGNUM);
24840           emit_move_insn (work_reg, x);
24841
24842           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24843           x = gen_frame_mem (SImode, x);
24844           emit_move_insn (x, work_reg);
24845
24846           emit_move_insn (work_reg, arm_hfp_rtx);
24847
24848           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24849           x = gen_frame_mem (SImode, x);
24850           emit_move_insn (x, work_reg);
24851         }
24852       else
24853         {
24854           emit_move_insn (work_reg, arm_hfp_rtx);
24855
24856           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24857           x = gen_frame_mem (SImode, x);
24858           emit_move_insn (x, work_reg);
24859
24860           x = gen_rtx_REG (SImode, PC_REGNUM);
24861           emit_move_insn (work_reg, x);
24862
24863           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24864           x = gen_frame_mem (SImode, x);
24865           emit_move_insn (x, work_reg);
24866         }
24867
24868       x = gen_rtx_REG (SImode, LR_REGNUM);
24869       emit_move_insn (work_reg, x);
24870
24871       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24872       x = gen_frame_mem (SImode, x);
24873       emit_move_insn (x, work_reg);
24874
24875       x = GEN_INT (offset + 12);
24876       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24877
24878       emit_move_insn (arm_hfp_rtx, work_reg);
24879     }
24880   /* Optimization:  If we are not pushing any low registers but we are going
24881      to push some high registers then delay our first push.  This will just
24882      be a push of LR and we can combine it with the push of the first high
24883      register.  */
24884   else if ((l_mask & 0xff) != 0
24885            || (high_regs_pushed == 0 && lr_needs_saving))
24886     {
24887       unsigned long mask = l_mask;
24888       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24889       insn = thumb1_emit_multi_reg_push (mask, mask);
24890       RTX_FRAME_RELATED_P (insn) = 1;
24891       lr_needs_saving = false;
24892     }
24893
24894   if (high_regs_pushed)
24895     {
24896       unsigned pushable_regs;
24897       unsigned next_hi_reg;
24898       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24899                                                  : crtl->args.info.nregs;
24900       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24901
24902       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24903         if (live_regs_mask & (1 << next_hi_reg))
24904           break;
24905
24906       /* Here we need to mask out registers used for passing arguments
24907          even if they can be pushed.  This is to avoid using them to stash the high
24908          registers.  Such kind of stash may clobber the use of arguments.  */
24909       pushable_regs = l_mask & (~arg_regs_mask);
24910       if (lr_needs_saving)
24911         pushable_regs &= ~(1 << LR_REGNUM);
24912
24913       if (pushable_regs == 0)
24914         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24915
24916       while (high_regs_pushed > 0)
24917         {
24918           unsigned long real_regs_mask = 0;
24919           unsigned long push_mask = 0;
24920
24921           for (regno = LR_REGNUM; regno >= 0; regno --)
24922             {
24923               if (pushable_regs & (1 << regno))
24924                 {
24925                   emit_move_insn (gen_rtx_REG (SImode, regno),
24926                                   gen_rtx_REG (SImode, next_hi_reg));
24927
24928                   high_regs_pushed --;
24929                   real_regs_mask |= (1 << next_hi_reg);
24930                   push_mask |= (1 << regno);
24931
24932                   if (high_regs_pushed)
24933                     {
24934                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24935                            next_hi_reg --)
24936                         if (live_regs_mask & (1 << next_hi_reg))
24937                           break;
24938                     }
24939                   else
24940                     break;
24941                 }
24942             }
24943
24944           /* If we had to find a work register and we have not yet
24945              saved the LR then add it to the list of regs to push.  */
24946           if (lr_needs_saving)
24947             {
24948               push_mask |= 1 << LR_REGNUM;
24949               real_regs_mask |= 1 << LR_REGNUM;
24950               lr_needs_saving = false;
24951             }
24952
24953           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24954           RTX_FRAME_RELATED_P (insn) = 1;
24955         }
24956     }
24957
24958   /* Load the pic register before setting the frame pointer,
24959      so we can use r7 as a temporary work register.  */
24960   if (flag_pic && arm_pic_register != INVALID_REGNUM)
24961     arm_load_pic_register (live_regs_mask);
24962
24963   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24964     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24965                     stack_pointer_rtx);
24966
24967   size = offsets->outgoing_args - offsets->saved_args;
24968   if (flag_stack_usage_info)
24969     current_function_static_stack_size = size;
24970
24971   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
24972   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24973     sorry ("-fstack-check=specific for Thumb-1");
24974
24975   amount = offsets->outgoing_args - offsets->saved_regs;
24976   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24977   if (amount)
24978     {
24979       if (amount < 512)
24980         {
24981           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24982                                         GEN_INT (- amount)));
24983           RTX_FRAME_RELATED_P (insn) = 1;
24984         }
24985       else
24986         {
24987           rtx reg, dwarf;
24988
24989           /* The stack decrement is too big for an immediate value in a single
24990              insn.  In theory we could issue multiple subtracts, but after
24991              three of them it becomes more space efficient to place the full
24992              value in the constant pool and load into a register.  (Also the
24993              ARM debugger really likes to see only one stack decrement per
24994              function).  So instead we look for a scratch register into which
24995              we can load the decrement, and then we subtract this from the
24996              stack pointer.  Unfortunately on the thumb the only available
24997              scratch registers are the argument registers, and we cannot use
24998              these as they may hold arguments to the function.  Instead we
24999              attempt to locate a call preserved register which is used by this
25000              function.  If we can find one, then we know that it will have
25001              been pushed at the start of the prologue and so we can corrupt
25002              it now.  */
25003           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25004             if (live_regs_mask & (1 << regno))
25005               break;
25006
25007           gcc_assert(regno <= LAST_LO_REGNUM);
25008
25009           reg = gen_rtx_REG (SImode, regno);
25010
25011           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25012
25013           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25014                                         stack_pointer_rtx, reg));
25015
25016           dwarf = gen_rtx_SET (stack_pointer_rtx,
25017                                plus_constant (Pmode, stack_pointer_rtx,
25018                                               -amount));
25019           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25020           RTX_FRAME_RELATED_P (insn) = 1;
25021         }
25022     }
25023
25024   if (frame_pointer_needed)
25025     thumb_set_frame_pointer (offsets);
25026
25027   /* If we are profiling, make sure no instructions are scheduled before
25028      the call to mcount.  Similarly if the user has requested no
25029      scheduling in the prolog.  Similarly if we want non-call exceptions
25030      using the EABI unwinder, to prevent faulting instructions from being
25031      swapped with a stack adjustment.  */
25032   if (crtl->profile || !TARGET_SCHED_PROLOG
25033       || (arm_except_unwind_info (&global_options) == UI_TARGET
25034           && cfun->can_throw_non_call_exceptions))
25035     emit_insn (gen_blockage ());
25036
25037   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25038   if (live_regs_mask & 0xff)
25039     cfun->machine->lr_save_eliminated = 0;
25040 }
25041
25042 /* Clear caller saved registers not used to pass return values and leaked
25043    condition flags before exiting a cmse_nonsecure_entry function.  */
25044
25045 void
25046 cmse_nonsecure_entry_clear_before_return (void)
25047 {
25048   uint64_t to_clear_mask[2];
25049   uint32_t padding_bits_to_clear = 0;
25050   uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25051   int regno, maxregno = IP_REGNUM;
25052   tree result_type;
25053   rtx result_rtl;
25054
25055   to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
25056   to_clear_mask[0] |= (1ULL << IP_REGNUM);
25057
25058   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25059      registers.  We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25060      to make sure the instructions used to clear them are present.  */
25061   if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
25062     {
25063       uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
25064       maxregno = LAST_VFP_REGNUM;
25065
25066       float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
25067       to_clear_mask[0] |= float_mask;
25068
25069       float_mask = (1ULL << (maxregno - 63)) - 1;
25070       to_clear_mask[1] = float_mask;
25071
25072       /* Make sure we don't clear the two scratch registers used to clear the
25073          relevant FPSCR bits in output_return_instruction.  */
25074       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25075       to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
25076       emit_use (gen_rtx_REG (SImode, 4));
25077       to_clear_mask[0] &= ~(1ULL << 4);
25078     }
25079
25080   /* If the user has defined registers to be caller saved, these are no longer
25081      restored by the function before returning and must thus be cleared for
25082      security purposes.  */
25083   for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
25084     {
25085       /* We do not touch registers that can be used to pass arguments as per
25086          the AAPCS, since these should never be made callee-saved by user
25087          options.  */
25088       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25089         continue;
25090       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25091         continue;
25092       if (call_used_regs[regno])
25093         to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
25094     }
25095
25096   /* Make sure we do not clear the registers used to return the result in.  */
25097   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25098   if (!VOID_TYPE_P (result_type))
25099     {
25100       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25101
25102       /* No need to check that we return in registers, because we don't
25103          support returning on stack yet.  */
25104       to_clear_mask[0]
25105         &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
25106                                        padding_bits_to_clear_ptr);
25107     }
25108
25109   if (padding_bits_to_clear != 0)
25110     {
25111       rtx reg_rtx;
25112       /* Padding bits to clear is not 0 so we know we are dealing with
25113          returning a composite type, which only uses r0.  Let's make sure that
25114          r1-r3 is cleared too, we will use r1 as a scratch register.  */
25115       gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
25116
25117       reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25118
25119       /* Fill the lower half of the negated padding_bits_to_clear.  */
25120       emit_move_insn (reg_rtx,
25121                       GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25122
25123       /* Also fill the top half of the negated padding_bits_to_clear.  */
25124       if (((~padding_bits_to_clear) >> 16) > 0)
25125         emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25126                                                       GEN_INT (16),
25127                                                       GEN_INT (16)),
25128                                 GEN_INT ((~padding_bits_to_clear) >> 16)));
25129
25130       emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25131                            gen_rtx_REG (SImode, R0_REGNUM),
25132                            reg_rtx));
25133     }
25134
25135   for (regno = R0_REGNUM; regno <= maxregno; regno++)
25136     {
25137       if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25138         continue;
25139
25140       if (IS_VFP_REGNUM (regno))
25141         {
25142           /* If regno is an even vfp register and its successor is also to
25143              be cleared, use vmov.  */
25144           if (TARGET_VFP_DOUBLE
25145               && VFP_REGNO_OK_FOR_DOUBLE (regno)
25146               && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25147             {
25148               emit_move_insn (gen_rtx_REG (DFmode, regno),
25149                               CONST1_RTX (DFmode));
25150               emit_use (gen_rtx_REG (DFmode, regno));
25151               regno++;
25152             }
25153           else
25154             {
25155               emit_move_insn (gen_rtx_REG (SFmode, regno),
25156                               CONST1_RTX (SFmode));
25157               emit_use (gen_rtx_REG (SFmode, regno));
25158             }
25159         }
25160       else
25161         {
25162           if (TARGET_THUMB1)
25163             {
25164               if (regno == R0_REGNUM)
25165                 emit_move_insn (gen_rtx_REG (SImode, regno),
25166                                 const0_rtx);
25167               else
25168                 /* R0 has either been cleared before, see code above, or it
25169                    holds a return value, either way it is not secret
25170                    information.  */
25171                 emit_move_insn (gen_rtx_REG (SImode, regno),
25172                                 gen_rtx_REG (SImode, R0_REGNUM));
25173               emit_use (gen_rtx_REG (SImode, regno));
25174             }
25175           else
25176             {
25177               emit_move_insn (gen_rtx_REG (SImode, regno),
25178                               gen_rtx_REG (SImode, LR_REGNUM));
25179               emit_use (gen_rtx_REG (SImode, regno));
25180             }
25181         }
25182     }
25183 }
25184
25185 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25186    POP instruction can be generated.  LR should be replaced by PC.  All
25187    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25188    all we really need to check here is if single register is to be
25189    returned, or multiple register return.  */
25190 void
25191 thumb2_expand_return (bool simple_return)
25192 {
25193   int i, num_regs;
25194   unsigned long saved_regs_mask;
25195   arm_stack_offsets *offsets;
25196
25197   offsets = arm_get_frame_offsets ();
25198   saved_regs_mask = offsets->saved_regs_mask;
25199
25200   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25201     if (saved_regs_mask & (1 << i))
25202       num_regs++;
25203
25204   if (!simple_return && saved_regs_mask)
25205     {
25206       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25207          functions or adapt code to handle according to ACLE.  This path should
25208          not be reachable for cmse_nonsecure_entry functions though we prefer
25209          to assert it for now to ensure that future code changes do not silently
25210          change this behavior.  */
25211       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25212       if (num_regs == 1)
25213         {
25214           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25215           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25216           rtx addr = gen_rtx_MEM (SImode,
25217                                   gen_rtx_POST_INC (SImode,
25218                                                     stack_pointer_rtx));
25219           set_mem_alias_set (addr, get_frame_alias_set ());
25220           XVECEXP (par, 0, 0) = ret_rtx;
25221           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25222           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25223           emit_jump_insn (par);
25224         }
25225       else
25226         {
25227           saved_regs_mask &= ~ (1 << LR_REGNUM);
25228           saved_regs_mask |=   (1 << PC_REGNUM);
25229           arm_emit_multi_reg_pop (saved_regs_mask);
25230         }
25231     }
25232   else
25233     {
25234       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25235         cmse_nonsecure_entry_clear_before_return ();
25236       emit_jump_insn (simple_return_rtx);
25237     }
25238 }
25239
25240 void
25241 thumb1_expand_epilogue (void)
25242 {
25243   HOST_WIDE_INT amount;
25244   arm_stack_offsets *offsets;
25245   int regno;
25246
25247   /* Naked functions don't have prologues.  */
25248   if (IS_NAKED (arm_current_func_type ()))
25249     return;
25250
25251   offsets = arm_get_frame_offsets ();
25252   amount = offsets->outgoing_args - offsets->saved_regs;
25253
25254   if (frame_pointer_needed)
25255     {
25256       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25257       amount = offsets->locals_base - offsets->saved_regs;
25258     }
25259   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25260
25261   gcc_assert (amount >= 0);
25262   if (amount)
25263     {
25264       emit_insn (gen_blockage ());
25265
25266       if (amount < 512)
25267         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25268                                GEN_INT (amount)));
25269       else
25270         {
25271           /* r3 is always free in the epilogue.  */
25272           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25273
25274           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25275           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25276         }
25277     }
25278
25279   /* Emit a USE (stack_pointer_rtx), so that
25280      the stack adjustment will not be deleted.  */
25281   emit_insn (gen_force_register_use (stack_pointer_rtx));
25282
25283   if (crtl->profile || !TARGET_SCHED_PROLOG)
25284     emit_insn (gen_blockage ());
25285
25286   /* Emit a clobber for each insn that will be restored in the epilogue,
25287      so that flow2 will get register lifetimes correct.  */
25288   for (regno = 0; regno < 13; regno++)
25289     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25290       emit_clobber (gen_rtx_REG (SImode, regno));
25291
25292   if (! df_regs_ever_live_p (LR_REGNUM))
25293     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25294
25295   /* Clear all caller-saved regs that are not used to return.  */
25296   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25297     cmse_nonsecure_entry_clear_before_return ();
25298 }
25299
25300 /* Epilogue code for APCS frame.  */
25301 static void
25302 arm_expand_epilogue_apcs_frame (bool really_return)
25303 {
25304   unsigned long func_type;
25305   unsigned long saved_regs_mask;
25306   int num_regs = 0;
25307   int i;
25308   int floats_from_frame = 0;
25309   arm_stack_offsets *offsets;
25310
25311   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25312   func_type = arm_current_func_type ();
25313
25314   /* Get frame offsets for ARM.  */
25315   offsets = arm_get_frame_offsets ();
25316   saved_regs_mask = offsets->saved_regs_mask;
25317
25318   /* Find the offset of the floating-point save area in the frame.  */
25319   floats_from_frame
25320     = (offsets->saved_args
25321        + arm_compute_static_chain_stack_bytes ()
25322        - offsets->frame);
25323
25324   /* Compute how many core registers saved and how far away the floats are.  */
25325   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25326     if (saved_regs_mask & (1 << i))
25327       {
25328         num_regs++;
25329         floats_from_frame += 4;
25330       }
25331
25332   if (TARGET_HARD_FLOAT)
25333     {
25334       int start_reg;
25335       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25336
25337       /* The offset is from IP_REGNUM.  */
25338       int saved_size = arm_get_vfp_saved_size ();
25339       if (saved_size > 0)
25340         {
25341           rtx_insn *insn;
25342           floats_from_frame += saved_size;
25343           insn = emit_insn (gen_addsi3 (ip_rtx,
25344                                         hard_frame_pointer_rtx,
25345                                         GEN_INT (-floats_from_frame)));
25346           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25347                                        ip_rtx, hard_frame_pointer_rtx);
25348         }
25349
25350       /* Generate VFP register multi-pop.  */
25351       start_reg = FIRST_VFP_REGNUM;
25352
25353       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25354         /* Look for a case where a reg does not need restoring.  */
25355         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25356             && (!df_regs_ever_live_p (i + 1)
25357                 || call_used_regs[i + 1]))
25358           {
25359             if (start_reg != i)
25360               arm_emit_vfp_multi_reg_pop (start_reg,
25361                                           (i - start_reg) / 2,
25362                                           gen_rtx_REG (SImode,
25363                                                        IP_REGNUM));
25364             start_reg = i + 2;
25365           }
25366
25367       /* Restore the remaining regs that we have discovered (or possibly
25368          even all of them, if the conditional in the for loop never
25369          fired).  */
25370       if (start_reg != i)
25371         arm_emit_vfp_multi_reg_pop (start_reg,
25372                                     (i - start_reg) / 2,
25373                                     gen_rtx_REG (SImode, IP_REGNUM));
25374     }
25375
25376   if (TARGET_IWMMXT)
25377     {
25378       /* The frame pointer is guaranteed to be non-double-word aligned, as
25379          it is set to double-word-aligned old_stack_pointer - 4.  */
25380       rtx_insn *insn;
25381       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25382
25383       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25384         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25385           {
25386             rtx addr = gen_frame_mem (V2SImode,
25387                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25388                                                 - lrm_count * 4));
25389             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25390             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25391                                                gen_rtx_REG (V2SImode, i),
25392                                                NULL_RTX);
25393             lrm_count += 2;
25394           }
25395     }
25396
25397   /* saved_regs_mask should contain IP which contains old stack pointer
25398      at the time of activation creation.  Since SP and IP are adjacent registers,
25399      we can restore the value directly into SP.  */
25400   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25401   saved_regs_mask &= ~(1 << IP_REGNUM);
25402   saved_regs_mask |= (1 << SP_REGNUM);
25403
25404   /* There are two registers left in saved_regs_mask - LR and PC.  We
25405      only need to restore LR (the return address), but to
25406      save time we can load it directly into PC, unless we need a
25407      special function exit sequence, or we are not really returning.  */
25408   if (really_return
25409       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25410       && !crtl->calls_eh_return)
25411     /* Delete LR from the register mask, so that LR on
25412        the stack is loaded into the PC in the register mask.  */
25413     saved_regs_mask &= ~(1 << LR_REGNUM);
25414   else
25415     saved_regs_mask &= ~(1 << PC_REGNUM);
25416
25417   num_regs = bit_count (saved_regs_mask);
25418   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25419     {
25420       rtx_insn *insn;
25421       emit_insn (gen_blockage ());
25422       /* Unwind the stack to just below the saved registers.  */
25423       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25424                                     hard_frame_pointer_rtx,
25425                                     GEN_INT (- 4 * num_regs)));
25426
25427       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25428                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25429     }
25430
25431   arm_emit_multi_reg_pop (saved_regs_mask);
25432
25433   if (IS_INTERRUPT (func_type))
25434     {
25435       /* Interrupt handlers will have pushed the
25436          IP onto the stack, so restore it now.  */
25437       rtx_insn *insn;
25438       rtx addr = gen_rtx_MEM (SImode,
25439                               gen_rtx_POST_INC (SImode,
25440                               stack_pointer_rtx));
25441       set_mem_alias_set (addr, get_frame_alias_set ());
25442       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25443       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25444                                          gen_rtx_REG (SImode, IP_REGNUM),
25445                                          NULL_RTX);
25446     }
25447
25448   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25449     return;
25450
25451   if (crtl->calls_eh_return)
25452     emit_insn (gen_addsi3 (stack_pointer_rtx,
25453                            stack_pointer_rtx,
25454                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25455
25456   if (IS_STACKALIGN (func_type))
25457     /* Restore the original stack pointer.  Before prologue, the stack was
25458        realigned and the original stack pointer saved in r0.  For details,
25459        see comment in arm_expand_prologue.  */
25460     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25461
25462   emit_jump_insn (simple_return_rtx);
25463 }
25464
25465 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25466    function is not a sibcall.  */
25467 void
25468 arm_expand_epilogue (bool really_return)
25469 {
25470   unsigned long func_type;
25471   unsigned long saved_regs_mask;
25472   int num_regs = 0;
25473   int i;
25474   int amount;
25475   arm_stack_offsets *offsets;
25476
25477   func_type = arm_current_func_type ();
25478
25479   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25480      let output_return_instruction take care of instruction emission if any.  */
25481   if (IS_NAKED (func_type)
25482       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25483     {
25484       if (really_return)
25485         emit_jump_insn (simple_return_rtx);
25486       return;
25487     }
25488
25489   /* If we are throwing an exception, then we really must be doing a
25490      return, so we can't tail-call.  */
25491   gcc_assert (!crtl->calls_eh_return || really_return);
25492
25493   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25494     {
25495       arm_expand_epilogue_apcs_frame (really_return);
25496       return;
25497     }
25498
25499   /* Get frame offsets for ARM.  */
25500   offsets = arm_get_frame_offsets ();
25501   saved_regs_mask = offsets->saved_regs_mask;
25502   num_regs = bit_count (saved_regs_mask);
25503
25504   if (frame_pointer_needed)
25505     {
25506       rtx_insn *insn;
25507       /* Restore stack pointer if necessary.  */
25508       if (TARGET_ARM)
25509         {
25510           /* In ARM mode, frame pointer points to first saved register.
25511              Restore stack pointer to last saved register.  */
25512           amount = offsets->frame - offsets->saved_regs;
25513
25514           /* Force out any pending memory operations that reference stacked data
25515              before stack de-allocation occurs.  */
25516           emit_insn (gen_blockage ());
25517           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25518                             hard_frame_pointer_rtx,
25519                             GEN_INT (amount)));
25520           arm_add_cfa_adjust_cfa_note (insn, amount,
25521                                        stack_pointer_rtx,
25522                                        hard_frame_pointer_rtx);
25523
25524           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25525              deleted.  */
25526           emit_insn (gen_force_register_use (stack_pointer_rtx));
25527         }
25528       else
25529         {
25530           /* In Thumb-2 mode, the frame pointer points to the last saved
25531              register.  */
25532           amount = offsets->locals_base - offsets->saved_regs;
25533           if (amount)
25534             {
25535               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25536                                 hard_frame_pointer_rtx,
25537                                 GEN_INT (amount)));
25538               arm_add_cfa_adjust_cfa_note (insn, amount,
25539                                            hard_frame_pointer_rtx,
25540                                            hard_frame_pointer_rtx);
25541             }
25542
25543           /* Force out any pending memory operations that reference stacked data
25544              before stack de-allocation occurs.  */
25545           emit_insn (gen_blockage ());
25546           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25547                                        hard_frame_pointer_rtx));
25548           arm_add_cfa_adjust_cfa_note (insn, 0,
25549                                        stack_pointer_rtx,
25550                                        hard_frame_pointer_rtx);
25551           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25552              deleted.  */
25553           emit_insn (gen_force_register_use (stack_pointer_rtx));
25554         }
25555     }
25556   else
25557     {
25558       /* Pop off outgoing args and local frame to adjust stack pointer to
25559          last saved register.  */
25560       amount = offsets->outgoing_args - offsets->saved_regs;
25561       if (amount)
25562         {
25563           rtx_insn *tmp;
25564           /* Force out any pending memory operations that reference stacked data
25565              before stack de-allocation occurs.  */
25566           emit_insn (gen_blockage ());
25567           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25568                                        stack_pointer_rtx,
25569                                        GEN_INT (amount)));
25570           arm_add_cfa_adjust_cfa_note (tmp, amount,
25571                                        stack_pointer_rtx, stack_pointer_rtx);
25572           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25573              not deleted.  */
25574           emit_insn (gen_force_register_use (stack_pointer_rtx));
25575         }
25576     }
25577
25578   if (TARGET_HARD_FLOAT)
25579     {
25580       /* Generate VFP register multi-pop.  */
25581       int end_reg = LAST_VFP_REGNUM + 1;
25582
25583       /* Scan the registers in reverse order.  We need to match
25584          any groupings made in the prologue and generate matching
25585          vldm operations.  The need to match groups is because,
25586          unlike pop, vldm can only do consecutive regs.  */
25587       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25588         /* Look for a case where a reg does not need restoring.  */
25589         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25590             && (!df_regs_ever_live_p (i + 1)
25591                 || call_used_regs[i + 1]))
25592           {
25593             /* Restore the regs discovered so far (from reg+2 to
25594                end_reg).  */
25595             if (end_reg > i + 2)
25596               arm_emit_vfp_multi_reg_pop (i + 2,
25597                                           (end_reg - (i + 2)) / 2,
25598                                           stack_pointer_rtx);
25599             end_reg = i;
25600           }
25601
25602       /* Restore the remaining regs that we have discovered (or possibly
25603          even all of them, if the conditional in the for loop never
25604          fired).  */
25605       if (end_reg > i + 2)
25606         arm_emit_vfp_multi_reg_pop (i + 2,
25607                                     (end_reg - (i + 2)) / 2,
25608                                     stack_pointer_rtx);
25609     }
25610
25611   if (TARGET_IWMMXT)
25612     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25613       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25614         {
25615           rtx_insn *insn;
25616           rtx addr = gen_rtx_MEM (V2SImode,
25617                                   gen_rtx_POST_INC (SImode,
25618                                                     stack_pointer_rtx));
25619           set_mem_alias_set (addr, get_frame_alias_set ());
25620           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25621           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25622                                              gen_rtx_REG (V2SImode, i),
25623                                              NULL_RTX);
25624           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25625                                        stack_pointer_rtx, stack_pointer_rtx);
25626         }
25627
25628   if (saved_regs_mask)
25629     {
25630       rtx insn;
25631       bool return_in_pc = false;
25632
25633       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25634           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25635           && !IS_CMSE_ENTRY (func_type)
25636           && !IS_STACKALIGN (func_type)
25637           && really_return
25638           && crtl->args.pretend_args_size == 0
25639           && saved_regs_mask & (1 << LR_REGNUM)
25640           && !crtl->calls_eh_return)
25641         {
25642           saved_regs_mask &= ~(1 << LR_REGNUM);
25643           saved_regs_mask |= (1 << PC_REGNUM);
25644           return_in_pc = true;
25645         }
25646
25647       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25648         {
25649           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25650             if (saved_regs_mask & (1 << i))
25651               {
25652                 rtx addr = gen_rtx_MEM (SImode,
25653                                         gen_rtx_POST_INC (SImode,
25654                                                           stack_pointer_rtx));
25655                 set_mem_alias_set (addr, get_frame_alias_set ());
25656
25657                 if (i == PC_REGNUM)
25658                   {
25659                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25660                     XVECEXP (insn, 0, 0) = ret_rtx;
25661                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25662                                                         addr);
25663                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25664                     insn = emit_jump_insn (insn);
25665                   }
25666                 else
25667                   {
25668                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25669                                                  addr));
25670                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25671                                                        gen_rtx_REG (SImode, i),
25672                                                        NULL_RTX);
25673                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25674                                                  stack_pointer_rtx,
25675                                                  stack_pointer_rtx);
25676                   }
25677               }
25678         }
25679       else
25680         {
25681           if (TARGET_LDRD
25682               && current_tune->prefer_ldrd_strd
25683               && !optimize_function_for_size_p (cfun))
25684             {
25685               if (TARGET_THUMB2)
25686                 thumb2_emit_ldrd_pop (saved_regs_mask);
25687               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25688                 arm_emit_ldrd_pop (saved_regs_mask);
25689               else
25690                 arm_emit_multi_reg_pop (saved_regs_mask);
25691             }
25692           else
25693             arm_emit_multi_reg_pop (saved_regs_mask);
25694         }
25695
25696       if (return_in_pc)
25697         return;
25698     }
25699
25700   amount
25701     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25702   if (amount)
25703     {
25704       int i, j;
25705       rtx dwarf = NULL_RTX;
25706       rtx_insn *tmp =
25707         emit_insn (gen_addsi3 (stack_pointer_rtx,
25708                                stack_pointer_rtx,
25709                                GEN_INT (amount)));
25710
25711       RTX_FRAME_RELATED_P (tmp) = 1;
25712
25713       if (cfun->machine->uses_anonymous_args)
25714         {
25715           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25716              pretend_args in stack.  */
25717           int num_regs = crtl->args.pretend_args_size / 4;
25718           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25719           for (j = 0, i = 0; j < num_regs; i++)
25720             if (saved_regs_mask & (1 << i))
25721               {
25722                 rtx reg = gen_rtx_REG (SImode, i);
25723                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25724                 j++;
25725               }
25726           REG_NOTES (tmp) = dwarf;
25727         }
25728       arm_add_cfa_adjust_cfa_note (tmp, amount,
25729                                    stack_pointer_rtx, stack_pointer_rtx);
25730     }
25731
25732     /* Clear all caller-saved regs that are not used to return.  */
25733     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25734       {
25735         /* CMSE_ENTRY always returns.  */
25736         gcc_assert (really_return);
25737         cmse_nonsecure_entry_clear_before_return ();
25738       }
25739
25740   if (!really_return)
25741     return;
25742
25743   if (crtl->calls_eh_return)
25744     emit_insn (gen_addsi3 (stack_pointer_rtx,
25745                            stack_pointer_rtx,
25746                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25747
25748   if (IS_STACKALIGN (func_type))
25749     /* Restore the original stack pointer.  Before prologue, the stack was
25750        realigned and the original stack pointer saved in r0.  For details,
25751        see comment in arm_expand_prologue.  */
25752     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25753
25754   emit_jump_insn (simple_return_rtx);
25755 }
25756
25757 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25758    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25759
25760 const char *
25761 thumb1_output_interwork (void)
25762 {
25763   const char * name;
25764   FILE *f = asm_out_file;
25765
25766   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25767   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25768               == SYMBOL_REF);
25769   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25770
25771   /* Generate code sequence to switch us into Thumb mode.  */
25772   /* The .code 32 directive has already been emitted by
25773      ASM_DECLARE_FUNCTION_NAME.  */
25774   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25775   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25776
25777   /* Generate a label, so that the debugger will notice the
25778      change in instruction sets.  This label is also used by
25779      the assembler to bypass the ARM code when this function
25780      is called from a Thumb encoded function elsewhere in the
25781      same file.  Hence the definition of STUB_NAME here must
25782      agree with the definition in gas/config/tc-arm.c.  */
25783
25784 #define STUB_NAME ".real_start_of"
25785
25786   fprintf (f, "\t.code\t16\n");
25787 #ifdef ARM_PE
25788   if (arm_dllexport_name_p (name))
25789     name = arm_strip_name_encoding (name);
25790 #endif
25791   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25792   fprintf (f, "\t.thumb_func\n");
25793   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25794
25795   return "";
25796 }
25797
25798 /* Handle the case of a double word load into a low register from
25799    a computed memory address.  The computed address may involve a
25800    register which is overwritten by the load.  */
25801 const char *
25802 thumb_load_double_from_address (rtx *operands)
25803 {
25804   rtx addr;
25805   rtx base;
25806   rtx offset;
25807   rtx arg1;
25808   rtx arg2;
25809
25810   gcc_assert (REG_P (operands[0]));
25811   gcc_assert (MEM_P (operands[1]));
25812
25813   /* Get the memory address.  */
25814   addr = XEXP (operands[1], 0);
25815
25816   /* Work out how the memory address is computed.  */
25817   switch (GET_CODE (addr))
25818     {
25819     case REG:
25820       operands[2] = adjust_address (operands[1], SImode, 4);
25821
25822       if (REGNO (operands[0]) == REGNO (addr))
25823         {
25824           output_asm_insn ("ldr\t%H0, %2", operands);
25825           output_asm_insn ("ldr\t%0, %1", operands);
25826         }
25827       else
25828         {
25829           output_asm_insn ("ldr\t%0, %1", operands);
25830           output_asm_insn ("ldr\t%H0, %2", operands);
25831         }
25832       break;
25833
25834     case CONST:
25835       /* Compute <address> + 4 for the high order load.  */
25836       operands[2] = adjust_address (operands[1], SImode, 4);
25837
25838       output_asm_insn ("ldr\t%0, %1", operands);
25839       output_asm_insn ("ldr\t%H0, %2", operands);
25840       break;
25841
25842     case PLUS:
25843       arg1   = XEXP (addr, 0);
25844       arg2   = XEXP (addr, 1);
25845
25846       if (CONSTANT_P (arg1))
25847         base = arg2, offset = arg1;
25848       else
25849         base = arg1, offset = arg2;
25850
25851       gcc_assert (REG_P (base));
25852
25853       /* Catch the case of <address> = <reg> + <reg> */
25854       if (REG_P (offset))
25855         {
25856           int reg_offset = REGNO (offset);
25857           int reg_base   = REGNO (base);
25858           int reg_dest   = REGNO (operands[0]);
25859
25860           /* Add the base and offset registers together into the
25861              higher destination register.  */
25862           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25863                        reg_dest + 1, reg_base, reg_offset);
25864
25865           /* Load the lower destination register from the address in
25866              the higher destination register.  */
25867           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25868                        reg_dest, reg_dest + 1);
25869
25870           /* Load the higher destination register from its own address
25871              plus 4.  */
25872           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25873                        reg_dest + 1, reg_dest + 1);
25874         }
25875       else
25876         {
25877           /* Compute <address> + 4 for the high order load.  */
25878           operands[2] = adjust_address (operands[1], SImode, 4);
25879
25880           /* If the computed address is held in the low order register
25881              then load the high order register first, otherwise always
25882              load the low order register first.  */
25883           if (REGNO (operands[0]) == REGNO (base))
25884             {
25885               output_asm_insn ("ldr\t%H0, %2", operands);
25886               output_asm_insn ("ldr\t%0, %1", operands);
25887             }
25888           else
25889             {
25890               output_asm_insn ("ldr\t%0, %1", operands);
25891               output_asm_insn ("ldr\t%H0, %2", operands);
25892             }
25893         }
25894       break;
25895
25896     case LABEL_REF:
25897       /* With no registers to worry about we can just load the value
25898          directly.  */
25899       operands[2] = adjust_address (operands[1], SImode, 4);
25900
25901       output_asm_insn ("ldr\t%H0, %2", operands);
25902       output_asm_insn ("ldr\t%0, %1", operands);
25903       break;
25904
25905     default:
25906       gcc_unreachable ();
25907     }
25908
25909   return "";
25910 }
25911
25912 const char *
25913 thumb_output_move_mem_multiple (int n, rtx *operands)
25914 {
25915   switch (n)
25916     {
25917     case 2:
25918       if (REGNO (operands[4]) > REGNO (operands[5]))
25919         std::swap (operands[4], operands[5]);
25920
25921       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25922       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25923       break;
25924
25925     case 3:
25926       if (REGNO (operands[4]) > REGNO (operands[5]))
25927         std::swap (operands[4], operands[5]);
25928       if (REGNO (operands[5]) > REGNO (operands[6]))
25929         std::swap (operands[5], operands[6]);
25930       if (REGNO (operands[4]) > REGNO (operands[5]))
25931         std::swap (operands[4], operands[5]);
25932
25933       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25934       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25935       break;
25936
25937     default:
25938       gcc_unreachable ();
25939     }
25940
25941   return "";
25942 }
25943
25944 /* Output a call-via instruction for thumb state.  */
25945 const char *
25946 thumb_call_via_reg (rtx reg)
25947 {
25948   int regno = REGNO (reg);
25949   rtx *labelp;
25950
25951   gcc_assert (regno < LR_REGNUM);
25952
25953   /* If we are in the normal text section we can use a single instance
25954      per compilation unit.  If we are doing function sections, then we need
25955      an entry per section, since we can't rely on reachability.  */
25956   if (in_section == text_section)
25957     {
25958       thumb_call_reg_needed = 1;
25959
25960       if (thumb_call_via_label[regno] == NULL)
25961         thumb_call_via_label[regno] = gen_label_rtx ();
25962       labelp = thumb_call_via_label + regno;
25963     }
25964   else
25965     {
25966       if (cfun->machine->call_via[regno] == NULL)
25967         cfun->machine->call_via[regno] = gen_label_rtx ();
25968       labelp = cfun->machine->call_via + regno;
25969     }
25970
25971   output_asm_insn ("bl\t%a0", labelp);
25972   return "";
25973 }
25974
25975 /* Routines for generating rtl.  */
25976 void
25977 thumb_expand_movmemqi (rtx *operands)
25978 {
25979   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25980   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25981   HOST_WIDE_INT len = INTVAL (operands[2]);
25982   HOST_WIDE_INT offset = 0;
25983
25984   while (len >= 12)
25985     {
25986       emit_insn (gen_movmem12b (out, in, out, in));
25987       len -= 12;
25988     }
25989
25990   if (len >= 8)
25991     {
25992       emit_insn (gen_movmem8b (out, in, out, in));
25993       len -= 8;
25994     }
25995
25996   if (len >= 4)
25997     {
25998       rtx reg = gen_reg_rtx (SImode);
25999       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26000       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26001       len -= 4;
26002       offset += 4;
26003     }
26004
26005   if (len >= 2)
26006     {
26007       rtx reg = gen_reg_rtx (HImode);
26008       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26009                                               plus_constant (Pmode, in,
26010                                                              offset))));
26011       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26012                                                                 offset)),
26013                             reg));
26014       len -= 2;
26015       offset += 2;
26016     }
26017
26018   if (len)
26019     {
26020       rtx reg = gen_reg_rtx (QImode);
26021       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26022                                               plus_constant (Pmode, in,
26023                                                              offset))));
26024       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26025                                                                 offset)),
26026                             reg));
26027     }
26028 }
26029
26030 void
26031 thumb_reload_out_hi (rtx *operands)
26032 {
26033   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26034 }
26035
26036 /* Return the length of a function name prefix
26037     that starts with the character 'c'.  */
26038 static int
26039 arm_get_strip_length (int c)
26040 {
26041   switch (c)
26042     {
26043     ARM_NAME_ENCODING_LENGTHS
26044       default: return 0;
26045     }
26046 }
26047
26048 /* Return a pointer to a function's name with any
26049    and all prefix encodings stripped from it.  */
26050 const char *
26051 arm_strip_name_encoding (const char *name)
26052 {
26053   int skip;
26054
26055   while ((skip = arm_get_strip_length (* name)))
26056     name += skip;
26057
26058   return name;
26059 }
26060
26061 /* If there is a '*' anywhere in the name's prefix, then
26062    emit the stripped name verbatim, otherwise prepend an
26063    underscore if leading underscores are being used.  */
26064 void
26065 arm_asm_output_labelref (FILE *stream, const char *name)
26066 {
26067   int skip;
26068   int verbatim = 0;
26069
26070   while ((skip = arm_get_strip_length (* name)))
26071     {
26072       verbatim |= (*name == '*');
26073       name += skip;
26074     }
26075
26076   if (verbatim)
26077     fputs (name, stream);
26078   else
26079     asm_fprintf (stream, "%U%s", name);
26080 }
26081
26082 /* This function is used to emit an EABI tag and its associated value.
26083    We emit the numerical value of the tag in case the assembler does not
26084    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26085    the tag name in a comment so that anyone reading the assembler output
26086    will know which tag is being set.
26087
26088    This function is not static because arm-c.c needs it too.  */
26089
26090 void
26091 arm_emit_eabi_attribute (const char *name, int num, int val)
26092 {
26093   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26094   if (flag_verbose_asm || flag_debug_asm)
26095     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26096   asm_fprintf (asm_out_file, "\n");
26097 }
26098
26099 /* This function is used to print CPU tuning information as comment
26100    in assembler file.  Pointers are not printed for now.  */
26101
26102 void
26103 arm_print_tune_info (void)
26104 {
26105   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26106   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26107                current_tune->constant_limit);
26108   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26109                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26110   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26111                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26112   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26113                "prefetch.l1_cache_size:\t%d\n",
26114                current_tune->prefetch.l1_cache_size);
26115   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26116                "prefetch.l1_cache_line_size:\t%d\n",
26117                current_tune->prefetch.l1_cache_line_size);
26118   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26119                "prefer_constant_pool:\t%d\n",
26120                (int) current_tune->prefer_constant_pool);
26121   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26122                "branch_cost:\t(s:speed, p:predictable)\n");
26123   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26124   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26125                current_tune->branch_cost (false, false));
26126   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26127                current_tune->branch_cost (false, true));
26128   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26129                current_tune->branch_cost (true, false));
26130   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26131                current_tune->branch_cost (true, true));
26132   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26133                "prefer_ldrd_strd:\t%d\n",
26134                (int) current_tune->prefer_ldrd_strd);
26135   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26136                "logical_op_non_short_circuit:\t[%d,%d]\n",
26137                (int) current_tune->logical_op_non_short_circuit_thumb,
26138                (int) current_tune->logical_op_non_short_circuit_arm);
26139   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26140                "prefer_neon_for_64bits:\t%d\n",
26141                (int) current_tune->prefer_neon_for_64bits);
26142   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26143                "disparage_flag_setting_t16_encodings:\t%d\n",
26144                (int) current_tune->disparage_flag_setting_t16_encodings);
26145   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26146                "string_ops_prefer_neon:\t%d\n",
26147                (int) current_tune->string_ops_prefer_neon);
26148   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26149                "max_insns_inline_memset:\t%d\n",
26150                current_tune->max_insns_inline_memset);
26151   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26152                current_tune->fusible_ops);
26153   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26154                (int) current_tune->sched_autopref);
26155 }
26156
26157 /* Print .arch and .arch_extension directives corresponding to the
26158    current architecture configuration.  */
26159 static void
26160 arm_print_asm_arch_directives ()
26161 {
26162   const arch_option *arch
26163     = arm_parse_arch_option_name (all_architectures, "-march",
26164                                   arm_active_target.arch_name);
26165   auto_sbitmap opt_bits (isa_num_bits);
26166
26167   gcc_assert (arch);
26168
26169   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26170   if (!arch->common.extensions)
26171     return;
26172
26173   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26174        opt->name != NULL;
26175        opt++)
26176     {
26177       if (!opt->remove)
26178         {
26179           arm_initialize_isa (opt_bits, opt->isa_bits);
26180
26181           /* If every feature bit of this option is set in the target
26182              ISA specification, print out the option name.  However,
26183              don't print anything if all the bits are part of the
26184              FPU specification.  */
26185           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26186               && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26187             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26188         }
26189     }
26190 }
26191
26192 static void
26193 arm_file_start (void)
26194 {
26195   int val;
26196
26197   if (TARGET_BPABI)
26198     {
26199       /* We don't have a specified CPU.  Use the architecture to
26200          generate the tags.
26201
26202          Note: it might be better to do this unconditionally, then the
26203          assembler would not need to know about all new CPU names as
26204          they are added.  */
26205       if (!arm_active_target.core_name)
26206         {
26207           /* armv7ve doesn't support any extensions.  */
26208           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26209             {
26210               /* Keep backward compatability for assemblers
26211                  which don't support armv7ve.  */
26212               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26213               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26214               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26215               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26216               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26217             }
26218           else
26219             arm_print_asm_arch_directives ();
26220         }
26221       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26222         asm_fprintf (asm_out_file, "\t.arch %s\n",
26223                      arm_active_target.core_name + 8);
26224       else
26225         {
26226           const char* truncated_name
26227             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26228           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26229         }
26230
26231       if (print_tune_info)
26232         arm_print_tune_info ();
26233
26234       if (! TARGET_SOFT_FLOAT)
26235         {
26236           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26237             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26238
26239           if (TARGET_HARD_FLOAT_ABI)
26240             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26241         }
26242
26243       /* Some of these attributes only apply when the corresponding features
26244          are used.  However we don't have any easy way of figuring this out.
26245          Conservatively record the setting that would have been used.  */
26246
26247       if (flag_rounding_math)
26248         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26249
26250       if (!flag_unsafe_math_optimizations)
26251         {
26252           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26253           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26254         }
26255       if (flag_signaling_nans)
26256         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26257
26258       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26259                            flag_finite_math_only ? 1 : 3);
26260
26261       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26262       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26263       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26264                                flag_short_enums ? 1 : 2);
26265
26266       /* Tag_ABI_optimization_goals.  */
26267       if (optimize_size)
26268         val = 4;
26269       else if (optimize >= 2)
26270         val = 2;
26271       else if (optimize)
26272         val = 1;
26273       else
26274         val = 6;
26275       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26276
26277       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26278                                unaligned_access);
26279
26280       if (arm_fp16_format)
26281         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26282                              (int) arm_fp16_format);
26283
26284       if (arm_lang_output_object_attributes_hook)
26285         arm_lang_output_object_attributes_hook();
26286     }
26287
26288   default_file_start ();
26289 }
26290
26291 static void
26292 arm_file_end (void)
26293 {
26294   int regno;
26295
26296   if (NEED_INDICATE_EXEC_STACK)
26297     /* Add .note.GNU-stack.  */
26298     file_end_indicate_exec_stack ();
26299
26300   if (! thumb_call_reg_needed)
26301     return;
26302
26303   switch_to_section (text_section);
26304   asm_fprintf (asm_out_file, "\t.code 16\n");
26305   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26306
26307   for (regno = 0; regno < LR_REGNUM; regno++)
26308     {
26309       rtx label = thumb_call_via_label[regno];
26310
26311       if (label != 0)
26312         {
26313           targetm.asm_out.internal_label (asm_out_file, "L",
26314                                           CODE_LABEL_NUMBER (label));
26315           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26316         }
26317     }
26318 }
26319
26320 #ifndef ARM_PE
26321 /* Symbols in the text segment can be accessed without indirecting via the
26322    constant pool; it may take an extra binary operation, but this is still
26323    faster than indirecting via memory.  Don't do this when not optimizing,
26324    since we won't be calculating al of the offsets necessary to do this
26325    simplification.  */
26326
26327 static void
26328 arm_encode_section_info (tree decl, rtx rtl, int first)
26329 {
26330   if (optimize > 0 && TREE_CONSTANT (decl))
26331     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26332
26333   default_encode_section_info (decl, rtl, first);
26334 }
26335 #endif /* !ARM_PE */
26336
26337 static void
26338 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26339 {
26340   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26341       && !strcmp (prefix, "L"))
26342     {
26343       arm_ccfsm_state = 0;
26344       arm_target_insn = NULL;
26345     }
26346   default_internal_label (stream, prefix, labelno);
26347 }
26348
26349 /* Output code to add DELTA to the first argument, and then jump
26350    to FUNCTION.  Used for C++ multiple inheritance.  */
26351
26352 static void
26353 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26354                      HOST_WIDE_INT, tree function)
26355 {
26356   static int thunk_label = 0;
26357   char label[256];
26358   char labelpc[256];
26359   int mi_delta = delta;
26360   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26361   int shift = 0;
26362   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26363                     ? 1 : 0);
26364   if (mi_delta < 0)
26365     mi_delta = - mi_delta;
26366
26367   final_start_function (emit_barrier (), file, 1);
26368
26369   if (TARGET_THUMB1)
26370     {
26371       int labelno = thunk_label++;
26372       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26373       /* Thunks are entered in arm mode when available.  */
26374       if (TARGET_THUMB1_ONLY)
26375         {
26376           /* push r3 so we can use it as a temporary.  */
26377           /* TODO: Omit this save if r3 is not used.  */
26378           fputs ("\tpush {r3}\n", file);
26379           fputs ("\tldr\tr3, ", file);
26380         }
26381       else
26382         {
26383           fputs ("\tldr\tr12, ", file);
26384         }
26385       assemble_name (file, label);
26386       fputc ('\n', file);
26387       if (flag_pic)
26388         {
26389           /* If we are generating PIC, the ldr instruction below loads
26390              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26391              the address of the add + 8, so we have:
26392
26393              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26394                  = target + 1.
26395
26396              Note that we have "+ 1" because some versions of GNU ld
26397              don't set the low bit of the result for R_ARM_REL32
26398              relocations against thumb function symbols.
26399              On ARMv6M this is +4, not +8.  */
26400           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26401           assemble_name (file, labelpc);
26402           fputs (":\n", file);
26403           if (TARGET_THUMB1_ONLY)
26404             {
26405               /* This is 2 insns after the start of the thunk, so we know it
26406                  is 4-byte aligned.  */
26407               fputs ("\tadd\tr3, pc, r3\n", file);
26408               fputs ("\tmov r12, r3\n", file);
26409             }
26410           else
26411             fputs ("\tadd\tr12, pc, r12\n", file);
26412         }
26413       else if (TARGET_THUMB1_ONLY)
26414         fputs ("\tmov r12, r3\n", file);
26415     }
26416   if (TARGET_THUMB1_ONLY)
26417     {
26418       if (mi_delta > 255)
26419         {
26420           fputs ("\tldr\tr3, ", file);
26421           assemble_name (file, label);
26422           fputs ("+4\n", file);
26423           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26424                        mi_op, this_regno, this_regno);
26425         }
26426       else if (mi_delta != 0)
26427         {
26428           /* Thumb1 unified syntax requires s suffix in instruction name when
26429              one of the operands is immediate.  */
26430           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26431                        mi_op, this_regno, this_regno,
26432                        mi_delta);
26433         }
26434     }
26435   else
26436     {
26437       /* TODO: Use movw/movt for large constants when available.  */
26438       while (mi_delta != 0)
26439         {
26440           if ((mi_delta & (3 << shift)) == 0)
26441             shift += 2;
26442           else
26443             {
26444               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26445                            mi_op, this_regno, this_regno,
26446                            mi_delta & (0xff << shift));
26447               mi_delta &= ~(0xff << shift);
26448               shift += 8;
26449             }
26450         }
26451     }
26452   if (TARGET_THUMB1)
26453     {
26454       if (TARGET_THUMB1_ONLY)
26455         fputs ("\tpop\t{r3}\n", file);
26456
26457       fprintf (file, "\tbx\tr12\n");
26458       ASM_OUTPUT_ALIGN (file, 2);
26459       assemble_name (file, label);
26460       fputs (":\n", file);
26461       if (flag_pic)
26462         {
26463           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26464           rtx tem = XEXP (DECL_RTL (function), 0);
26465           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26466              pipeline offset is four rather than eight.  Adjust the offset
26467              accordingly.  */
26468           tem = plus_constant (GET_MODE (tem), tem,
26469                                TARGET_THUMB1_ONLY ? -3 : -7);
26470           tem = gen_rtx_MINUS (GET_MODE (tem),
26471                                tem,
26472                                gen_rtx_SYMBOL_REF (Pmode,
26473                                                    ggc_strdup (labelpc)));
26474           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26475         }
26476       else
26477         /* Output ".word .LTHUNKn".  */
26478         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26479
26480       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26481         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26482     }
26483   else
26484     {
26485       fputs ("\tb\t", file);
26486       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26487       if (NEED_PLT_RELOC)
26488         fputs ("(PLT)", file);
26489       fputc ('\n', file);
26490     }
26491
26492   final_end_function ();
26493 }
26494
26495 /* MI thunk handling for TARGET_32BIT.  */
26496
26497 static void
26498 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26499                        HOST_WIDE_INT vcall_offset, tree function)
26500 {
26501   /* On ARM, this_regno is R0 or R1 depending on
26502      whether the function returns an aggregate or not.
26503   */
26504   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26505                                        function)
26506                     ? R1_REGNUM : R0_REGNUM);
26507
26508   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26509   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26510   reload_completed = 1;
26511   emit_note (NOTE_INSN_PROLOGUE_END);
26512
26513   /* Add DELTA to THIS_RTX.  */
26514   if (delta != 0)
26515     arm_split_constant (PLUS, Pmode, NULL_RTX,
26516                         delta, this_rtx, this_rtx, false);
26517
26518   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26519   if (vcall_offset != 0)
26520     {
26521       /* Load *THIS_RTX.  */
26522       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26523       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26524       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26525                           false);
26526       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26527       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26528       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26529     }
26530
26531   /* Generate a tail call to the target function.  */
26532   if (!TREE_USED (function))
26533     {
26534       assemble_external (function);
26535       TREE_USED (function) = 1;
26536     }
26537   rtx funexp = XEXP (DECL_RTL (function), 0);
26538   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26539   rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26540   SIBLING_CALL_P (insn) = 1;
26541
26542   insn = get_insns ();
26543   shorten_branches (insn);
26544   final_start_function (insn, file, 1);
26545   final (insn, file, 1);
26546   final_end_function ();
26547
26548   /* Stop pretending this is a post-reload pass.  */
26549   reload_completed = 0;
26550 }
26551
26552 /* Output code to add DELTA to the first argument, and then jump
26553    to FUNCTION.  Used for C++ multiple inheritance.  */
26554
26555 static void
26556 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26557                      HOST_WIDE_INT vcall_offset, tree function)
26558 {
26559   if (TARGET_32BIT)
26560     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26561   else
26562     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26563 }
26564
26565 int
26566 arm_emit_vector_const (FILE *file, rtx x)
26567 {
26568   int i;
26569   const char * pattern;
26570
26571   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26572
26573   switch (GET_MODE (x))
26574     {
26575     case E_V2SImode: pattern = "%08x"; break;
26576     case E_V4HImode: pattern = "%04x"; break;
26577     case E_V8QImode: pattern = "%02x"; break;
26578     default:       gcc_unreachable ();
26579     }
26580
26581   fprintf (file, "0x");
26582   for (i = CONST_VECTOR_NUNITS (x); i--;)
26583     {
26584       rtx element;
26585
26586       element = CONST_VECTOR_ELT (x, i);
26587       fprintf (file, pattern, INTVAL (element));
26588     }
26589
26590   return 1;
26591 }
26592
26593 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26594    HFmode constant pool entries are actually loaded with ldr.  */
26595 void
26596 arm_emit_fp16_const (rtx c)
26597 {
26598   long bits;
26599
26600   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26601   if (WORDS_BIG_ENDIAN)
26602     assemble_zeros (2);
26603   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26604   if (!WORDS_BIG_ENDIAN)
26605     assemble_zeros (2);
26606 }
26607
26608 const char *
26609 arm_output_load_gr (rtx *operands)
26610 {
26611   rtx reg;
26612   rtx offset;
26613   rtx wcgr;
26614   rtx sum;
26615
26616   if (!MEM_P (operands [1])
26617       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26618       || !REG_P (reg = XEXP (sum, 0))
26619       || !CONST_INT_P (offset = XEXP (sum, 1))
26620       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26621     return "wldrw%?\t%0, %1";
26622
26623   /* Fix up an out-of-range load of a GR register.  */
26624   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26625   wcgr = operands[0];
26626   operands[0] = reg;
26627   output_asm_insn ("ldr%?\t%0, %1", operands);
26628
26629   operands[0] = wcgr;
26630   operands[1] = reg;
26631   output_asm_insn ("tmcr%?\t%0, %1", operands);
26632   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26633
26634   return "";
26635 }
26636
26637 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26638
26639    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26640    named arg and all anonymous args onto the stack.
26641    XXX I know the prologue shouldn't be pushing registers, but it is faster
26642    that way.  */
26643
26644 static void
26645 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26646                             machine_mode mode,
26647                             tree type,
26648                             int *pretend_size,
26649                             int second_time ATTRIBUTE_UNUSED)
26650 {
26651   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26652   int nregs;
26653
26654   cfun->machine->uses_anonymous_args = 1;
26655   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26656     {
26657       nregs = pcum->aapcs_ncrn;
26658       if (nregs & 1)
26659         {
26660           int res = arm_needs_doubleword_align (mode, type);
26661           if (res < 0 && warn_psabi)
26662             inform (input_location, "parameter passing for argument of "
26663                     "type %qT changed in GCC 7.1", type);
26664           else if (res > 0)
26665             nregs++;
26666         }
26667     }
26668   else
26669     nregs = pcum->nregs;
26670
26671   if (nregs < NUM_ARG_REGS)
26672     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26673 }
26674
26675 /* We can't rely on the caller doing the proper promotion when
26676    using APCS or ATPCS.  */
26677
26678 static bool
26679 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26680 {
26681     return !TARGET_AAPCS_BASED;
26682 }
26683
26684 static machine_mode
26685 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26686                            machine_mode mode,
26687                            int *punsignedp ATTRIBUTE_UNUSED,
26688                            const_tree fntype ATTRIBUTE_UNUSED,
26689                            int for_return ATTRIBUTE_UNUSED)
26690 {
26691   if (GET_MODE_CLASS (mode) == MODE_INT
26692       && GET_MODE_SIZE (mode) < 4)
26693     return SImode;
26694
26695   return mode;
26696 }
26697
26698
26699 static bool
26700 arm_default_short_enums (void)
26701 {
26702   return ARM_DEFAULT_SHORT_ENUMS;
26703 }
26704
26705
26706 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26707
26708 static bool
26709 arm_align_anon_bitfield (void)
26710 {
26711   return TARGET_AAPCS_BASED;
26712 }
26713
26714
26715 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26716
26717 static tree
26718 arm_cxx_guard_type (void)
26719 {
26720   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26721 }
26722
26723
26724 /* The EABI says test the least significant bit of a guard variable.  */
26725
26726 static bool
26727 arm_cxx_guard_mask_bit (void)
26728 {
26729   return TARGET_AAPCS_BASED;
26730 }
26731
26732
26733 /* The EABI specifies that all array cookies are 8 bytes long.  */
26734
26735 static tree
26736 arm_get_cookie_size (tree type)
26737 {
26738   tree size;
26739
26740   if (!TARGET_AAPCS_BASED)
26741     return default_cxx_get_cookie_size (type);
26742
26743   size = build_int_cst (sizetype, 8);
26744   return size;
26745 }
26746
26747
26748 /* The EABI says that array cookies should also contain the element size.  */
26749
26750 static bool
26751 arm_cookie_has_size (void)
26752 {
26753   return TARGET_AAPCS_BASED;
26754 }
26755
26756
26757 /* The EABI says constructors and destructors should return a pointer to
26758    the object constructed/destroyed.  */
26759
26760 static bool
26761 arm_cxx_cdtor_returns_this (void)
26762 {
26763   return TARGET_AAPCS_BASED;
26764 }
26765
26766 /* The EABI says that an inline function may never be the key
26767    method.  */
26768
26769 static bool
26770 arm_cxx_key_method_may_be_inline (void)
26771 {
26772   return !TARGET_AAPCS_BASED;
26773 }
26774
26775 static void
26776 arm_cxx_determine_class_data_visibility (tree decl)
26777 {
26778   if (!TARGET_AAPCS_BASED
26779       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26780     return;
26781
26782   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26783      is exported.  However, on systems without dynamic vague linkage,
26784      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26785   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26786     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26787   else
26788     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26789   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26790 }
26791
26792 static bool
26793 arm_cxx_class_data_always_comdat (void)
26794 {
26795   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26796      vague linkage if the class has no key function.  */
26797   return !TARGET_AAPCS_BASED;
26798 }
26799
26800
26801 /* The EABI says __aeabi_atexit should be used to register static
26802    destructors.  */
26803
26804 static bool
26805 arm_cxx_use_aeabi_atexit (void)
26806 {
26807   return TARGET_AAPCS_BASED;
26808 }
26809
26810
26811 void
26812 arm_set_return_address (rtx source, rtx scratch)
26813 {
26814   arm_stack_offsets *offsets;
26815   HOST_WIDE_INT delta;
26816   rtx addr;
26817   unsigned long saved_regs;
26818
26819   offsets = arm_get_frame_offsets ();
26820   saved_regs = offsets->saved_regs_mask;
26821
26822   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26823     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26824   else
26825     {
26826       if (frame_pointer_needed)
26827         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26828       else
26829         {
26830           /* LR will be the first saved register.  */
26831           delta = offsets->outgoing_args - (offsets->frame + 4);
26832
26833
26834           if (delta >= 4096)
26835             {
26836               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26837                                      GEN_INT (delta & ~4095)));
26838               addr = scratch;
26839               delta &= 4095;
26840             }
26841           else
26842             addr = stack_pointer_rtx;
26843
26844           addr = plus_constant (Pmode, addr, delta);
26845         }
26846       /* The store needs to be marked as frame related in order to prevent
26847          DSE from deleting it as dead if it is based on fp.  */
26848       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26849       RTX_FRAME_RELATED_P (insn) = 1;
26850       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26851     }
26852 }
26853
26854
26855 void
26856 thumb_set_return_address (rtx source, rtx scratch)
26857 {
26858   arm_stack_offsets *offsets;
26859   HOST_WIDE_INT delta;
26860   HOST_WIDE_INT limit;
26861   int reg;
26862   rtx addr;
26863   unsigned long mask;
26864
26865   emit_use (source);
26866
26867   offsets = arm_get_frame_offsets ();
26868   mask = offsets->saved_regs_mask;
26869   if (mask & (1 << LR_REGNUM))
26870     {
26871       limit = 1024;
26872       /* Find the saved regs.  */
26873       if (frame_pointer_needed)
26874         {
26875           delta = offsets->soft_frame - offsets->saved_args;
26876           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26877           if (TARGET_THUMB1)
26878             limit = 128;
26879         }
26880       else
26881         {
26882           delta = offsets->outgoing_args - offsets->saved_args;
26883           reg = SP_REGNUM;
26884         }
26885       /* Allow for the stack frame.  */
26886       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26887         delta -= 16;
26888       /* The link register is always the first saved register.  */
26889       delta -= 4;
26890
26891       /* Construct the address.  */
26892       addr = gen_rtx_REG (SImode, reg);
26893       if (delta > limit)
26894         {
26895           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26896           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26897           addr = scratch;
26898         }
26899       else
26900         addr = plus_constant (Pmode, addr, delta);
26901
26902       /* The store needs to be marked as frame related in order to prevent
26903          DSE from deleting it as dead if it is based on fp.  */
26904       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26905       RTX_FRAME_RELATED_P (insn) = 1;
26906       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26907     }
26908   else
26909     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26910 }
26911
26912 /* Implements target hook vector_mode_supported_p.  */
26913 bool
26914 arm_vector_mode_supported_p (machine_mode mode)
26915 {
26916   /* Neon also supports V2SImode, etc. listed in the clause below.  */
26917   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26918       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26919       || mode == V2DImode || mode == V8HFmode))
26920     return true;
26921
26922   if ((TARGET_NEON || TARGET_IWMMXT)
26923       && ((mode == V2SImode)
26924           || (mode == V4HImode)
26925           || (mode == V8QImode)))
26926     return true;
26927
26928   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26929       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26930       || mode == V2HAmode))
26931     return true;
26932
26933   return false;
26934 }
26935
26936 /* Implements target hook array_mode_supported_p.  */
26937
26938 static bool
26939 arm_array_mode_supported_p (machine_mode mode,
26940                             unsigned HOST_WIDE_INT nelems)
26941 {
26942   if (TARGET_NEON
26943       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26944       && (nelems >= 2 && nelems <= 4))
26945     return true;
26946
26947   return false;
26948 }
26949
26950 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26951    registers when autovectorizing for Neon, at least until multiple vector
26952    widths are supported properly by the middle-end.  */
26953
26954 static machine_mode
26955 arm_preferred_simd_mode (scalar_mode mode)
26956 {
26957   if (TARGET_NEON)
26958     switch (mode)
26959       {
26960       case E_SFmode:
26961         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26962       case E_SImode:
26963         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26964       case E_HImode:
26965         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26966       case E_QImode:
26967         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26968       case E_DImode:
26969         if (!TARGET_NEON_VECTORIZE_DOUBLE)
26970           return V2DImode;
26971         break;
26972
26973       default:;
26974       }
26975
26976   if (TARGET_REALLY_IWMMXT)
26977     switch (mode)
26978       {
26979       case E_SImode:
26980         return V2SImode;
26981       case E_HImode:
26982         return V4HImode;
26983       case E_QImode:
26984         return V8QImode;
26985
26986       default:;
26987       }
26988
26989   return word_mode;
26990 }
26991
26992 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26993
26994    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
26995    using r0-r4 for function arguments, r7 for the stack frame and don't have
26996    enough left over to do doubleword arithmetic.  For Thumb-2 all the
26997    potentially problematic instructions accept high registers so this is not
26998    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
26999    that require many low registers.  */
27000 static bool
27001 arm_class_likely_spilled_p (reg_class_t rclass)
27002 {
27003   if ((TARGET_THUMB1 && rclass == LO_REGS)
27004       || rclass  == CC_REG)
27005     return true;
27006
27007   return false;
27008 }
27009
27010 /* Implements target hook small_register_classes_for_mode_p.  */
27011 bool
27012 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27013 {
27014   return TARGET_THUMB1;
27015 }
27016
27017 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27018    ARM insns and therefore guarantee that the shift count is modulo 256.
27019    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27020    guarantee no particular behavior for out-of-range counts.  */
27021
27022 static unsigned HOST_WIDE_INT
27023 arm_shift_truncation_mask (machine_mode mode)
27024 {
27025   return mode == SImode ? 255 : 0;
27026 }
27027
27028
27029 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27030
27031 unsigned int
27032 arm_dbx_register_number (unsigned int regno)
27033 {
27034   if (regno < 16)
27035     return regno;
27036
27037   if (IS_VFP_REGNUM (regno))
27038     {
27039       /* See comment in arm_dwarf_register_span.  */
27040       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27041         return 64 + regno - FIRST_VFP_REGNUM;
27042       else
27043         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27044     }
27045
27046   if (IS_IWMMXT_GR_REGNUM (regno))
27047     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27048
27049   if (IS_IWMMXT_REGNUM (regno))
27050     return 112 + regno - FIRST_IWMMXT_REGNUM;
27051
27052   return DWARF_FRAME_REGISTERS;
27053 }
27054
27055 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27056    GCC models tham as 64 32-bit registers, so we need to describe this to
27057    the DWARF generation code.  Other registers can use the default.  */
27058 static rtx
27059 arm_dwarf_register_span (rtx rtl)
27060 {
27061   machine_mode mode;
27062   unsigned regno;
27063   rtx parts[16];
27064   int nregs;
27065   int i;
27066
27067   regno = REGNO (rtl);
27068   if (!IS_VFP_REGNUM (regno))
27069     return NULL_RTX;
27070
27071   /* XXX FIXME: The EABI defines two VFP register ranges:
27072         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27073         256-287: D0-D31
27074      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27075      corresponding D register.  Until GDB supports this, we shall use the
27076      legacy encodings.  We also use these encodings for D0-D15 for
27077      compatibility with older debuggers.  */
27078   mode = GET_MODE (rtl);
27079   if (GET_MODE_SIZE (mode) < 8)
27080     return NULL_RTX;
27081
27082   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27083     {
27084       nregs = GET_MODE_SIZE (mode) / 4;
27085       for (i = 0; i < nregs; i += 2)
27086         if (TARGET_BIG_END)
27087           {
27088             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27089             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27090           }
27091         else
27092           {
27093             parts[i] = gen_rtx_REG (SImode, regno + i);
27094             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27095           }
27096     }
27097   else
27098     {
27099       nregs = GET_MODE_SIZE (mode) / 8;
27100       for (i = 0; i < nregs; i++)
27101         parts[i] = gen_rtx_REG (DImode, regno + i);
27102     }
27103
27104   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27105 }
27106
27107 #if ARM_UNWIND_INFO
27108 /* Emit unwind directives for a store-multiple instruction or stack pointer
27109    push during alignment.
27110    These should only ever be generated by the function prologue code, so
27111    expect them to have a particular form.
27112    The store-multiple instruction sometimes pushes pc as the last register,
27113    although it should not be tracked into unwind information, or for -Os
27114    sometimes pushes some dummy registers before first register that needs
27115    to be tracked in unwind information; such dummy registers are there just
27116    to avoid separate stack adjustment, and will not be restored in the
27117    epilogue.  */
27118
27119 static void
27120 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27121 {
27122   int i;
27123   HOST_WIDE_INT offset;
27124   HOST_WIDE_INT nregs;
27125   int reg_size;
27126   unsigned reg;
27127   unsigned lastreg;
27128   unsigned padfirst = 0, padlast = 0;
27129   rtx e;
27130
27131   e = XVECEXP (p, 0, 0);
27132   gcc_assert (GET_CODE (e) == SET);
27133
27134   /* First insn will adjust the stack pointer.  */
27135   gcc_assert (GET_CODE (e) == SET
27136               && REG_P (SET_DEST (e))
27137               && REGNO (SET_DEST (e)) == SP_REGNUM
27138               && GET_CODE (SET_SRC (e)) == PLUS);
27139
27140   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27141   nregs = XVECLEN (p, 0) - 1;
27142   gcc_assert (nregs);
27143
27144   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27145   if (reg < 16)
27146     {
27147       /* For -Os dummy registers can be pushed at the beginning to
27148          avoid separate stack pointer adjustment.  */
27149       e = XVECEXP (p, 0, 1);
27150       e = XEXP (SET_DEST (e), 0);
27151       if (GET_CODE (e) == PLUS)
27152         padfirst = INTVAL (XEXP (e, 1));
27153       gcc_assert (padfirst == 0 || optimize_size);
27154       /* The function prologue may also push pc, but not annotate it as it is
27155          never restored.  We turn this into a stack pointer adjustment.  */
27156       e = XVECEXP (p, 0, nregs);
27157       e = XEXP (SET_DEST (e), 0);
27158       if (GET_CODE (e) == PLUS)
27159         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27160       else
27161         padlast = offset - 4;
27162       gcc_assert (padlast == 0 || padlast == 4);
27163       if (padlast == 4)
27164         fprintf (asm_out_file, "\t.pad #4\n");
27165       reg_size = 4;
27166       fprintf (asm_out_file, "\t.save {");
27167     }
27168   else if (IS_VFP_REGNUM (reg))
27169     {
27170       reg_size = 8;
27171       fprintf (asm_out_file, "\t.vsave {");
27172     }
27173   else
27174     /* Unknown register type.  */
27175     gcc_unreachable ();
27176
27177   /* If the stack increment doesn't match the size of the saved registers,
27178      something has gone horribly wrong.  */
27179   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27180
27181   offset = padfirst;
27182   lastreg = 0;
27183   /* The remaining insns will describe the stores.  */
27184   for (i = 1; i <= nregs; i++)
27185     {
27186       /* Expect (set (mem <addr>) (reg)).
27187          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27188       e = XVECEXP (p, 0, i);
27189       gcc_assert (GET_CODE (e) == SET
27190                   && MEM_P (SET_DEST (e))
27191                   && REG_P (SET_SRC (e)));
27192
27193       reg = REGNO (SET_SRC (e));
27194       gcc_assert (reg >= lastreg);
27195
27196       if (i != 1)
27197         fprintf (asm_out_file, ", ");
27198       /* We can't use %r for vfp because we need to use the
27199          double precision register names.  */
27200       if (IS_VFP_REGNUM (reg))
27201         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27202       else
27203         asm_fprintf (asm_out_file, "%r", reg);
27204
27205       if (flag_checking)
27206         {
27207           /* Check that the addresses are consecutive.  */
27208           e = XEXP (SET_DEST (e), 0);
27209           if (GET_CODE (e) == PLUS)
27210             gcc_assert (REG_P (XEXP (e, 0))
27211                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27212                         && CONST_INT_P (XEXP (e, 1))
27213                         && offset == INTVAL (XEXP (e, 1)));
27214           else
27215             gcc_assert (i == 1
27216                         && REG_P (e)
27217                         && REGNO (e) == SP_REGNUM);
27218           offset += reg_size;
27219         }
27220     }
27221   fprintf (asm_out_file, "}\n");
27222   if (padfirst)
27223     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27224 }
27225
27226 /*  Emit unwind directives for a SET.  */
27227
27228 static void
27229 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27230 {
27231   rtx e0;
27232   rtx e1;
27233   unsigned reg;
27234
27235   e0 = XEXP (p, 0);
27236   e1 = XEXP (p, 1);
27237   switch (GET_CODE (e0))
27238     {
27239     case MEM:
27240       /* Pushing a single register.  */
27241       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27242           || !REG_P (XEXP (XEXP (e0, 0), 0))
27243           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27244         abort ();
27245
27246       asm_fprintf (asm_out_file, "\t.save ");
27247       if (IS_VFP_REGNUM (REGNO (e1)))
27248         asm_fprintf(asm_out_file, "{d%d}\n",
27249                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27250       else
27251         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27252       break;
27253
27254     case REG:
27255       if (REGNO (e0) == SP_REGNUM)
27256         {
27257           /* A stack increment.  */
27258           if (GET_CODE (e1) != PLUS
27259               || !REG_P (XEXP (e1, 0))
27260               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27261               || !CONST_INT_P (XEXP (e1, 1)))
27262             abort ();
27263
27264           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27265                        -INTVAL (XEXP (e1, 1)));
27266         }
27267       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27268         {
27269           HOST_WIDE_INT offset;
27270
27271           if (GET_CODE (e1) == PLUS)
27272             {
27273               if (!REG_P (XEXP (e1, 0))
27274                   || !CONST_INT_P (XEXP (e1, 1)))
27275                 abort ();
27276               reg = REGNO (XEXP (e1, 0));
27277               offset = INTVAL (XEXP (e1, 1));
27278               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27279                            HARD_FRAME_POINTER_REGNUM, reg,
27280                            offset);
27281             }
27282           else if (REG_P (e1))
27283             {
27284               reg = REGNO (e1);
27285               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27286                            HARD_FRAME_POINTER_REGNUM, reg);
27287             }
27288           else
27289             abort ();
27290         }
27291       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27292         {
27293           /* Move from sp to reg.  */
27294           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27295         }
27296      else if (GET_CODE (e1) == PLUS
27297               && REG_P (XEXP (e1, 0))
27298               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27299               && CONST_INT_P (XEXP (e1, 1)))
27300         {
27301           /* Set reg to offset from sp.  */
27302           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27303                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27304         }
27305       else
27306         abort ();
27307       break;
27308
27309     default:
27310       abort ();
27311     }
27312 }
27313
27314
27315 /* Emit unwind directives for the given insn.  */
27316
27317 static void
27318 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27319 {
27320   rtx note, pat;
27321   bool handled_one = false;
27322
27323   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27324     return;
27325
27326   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27327       && (TREE_NOTHROW (current_function_decl)
27328           || crtl->all_throwers_are_sibcalls))
27329     return;
27330
27331   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27332     return;
27333
27334   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27335     {
27336       switch (REG_NOTE_KIND (note))
27337         {
27338         case REG_FRAME_RELATED_EXPR:
27339           pat = XEXP (note, 0);
27340           goto found;
27341
27342         case REG_CFA_REGISTER:
27343           pat = XEXP (note, 0);
27344           if (pat == NULL)
27345             {
27346               pat = PATTERN (insn);
27347               if (GET_CODE (pat) == PARALLEL)
27348                 pat = XVECEXP (pat, 0, 0);
27349             }
27350
27351           /* Only emitted for IS_STACKALIGN re-alignment.  */
27352           {
27353             rtx dest, src;
27354             unsigned reg;
27355
27356             src = SET_SRC (pat);
27357             dest = SET_DEST (pat);
27358
27359             gcc_assert (src == stack_pointer_rtx);
27360             reg = REGNO (dest);
27361             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27362                          reg + 0x90, reg);
27363           }
27364           handled_one = true;
27365           break;
27366
27367         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27368            to get correct dwarf information for shrink-wrap.  We should not
27369            emit unwind information for it because these are used either for
27370            pretend arguments or notes to adjust sp and restore registers from
27371            stack.  */
27372         case REG_CFA_DEF_CFA:
27373         case REG_CFA_ADJUST_CFA:
27374         case REG_CFA_RESTORE:
27375           return;
27376
27377         case REG_CFA_EXPRESSION:
27378         case REG_CFA_OFFSET:
27379           /* ??? Only handling here what we actually emit.  */
27380           gcc_unreachable ();
27381
27382         default:
27383           break;
27384         }
27385     }
27386   if (handled_one)
27387     return;
27388   pat = PATTERN (insn);
27389  found:
27390
27391   switch (GET_CODE (pat))
27392     {
27393     case SET:
27394       arm_unwind_emit_set (asm_out_file, pat);
27395       break;
27396
27397     case SEQUENCE:
27398       /* Store multiple.  */
27399       arm_unwind_emit_sequence (asm_out_file, pat);
27400       break;
27401
27402     default:
27403       abort();
27404     }
27405 }
27406
27407
27408 /* Output a reference from a function exception table to the type_info
27409    object X.  The EABI specifies that the symbol should be relocated by
27410    an R_ARM_TARGET2 relocation.  */
27411
27412 static bool
27413 arm_output_ttype (rtx x)
27414 {
27415   fputs ("\t.word\t", asm_out_file);
27416   output_addr_const (asm_out_file, x);
27417   /* Use special relocations for symbol references.  */
27418   if (!CONST_INT_P (x))
27419     fputs ("(TARGET2)", asm_out_file);
27420   fputc ('\n', asm_out_file);
27421
27422   return TRUE;
27423 }
27424
27425 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27426
27427 static void
27428 arm_asm_emit_except_personality (rtx personality)
27429 {
27430   fputs ("\t.personality\t", asm_out_file);
27431   output_addr_const (asm_out_file, personality);
27432   fputc ('\n', asm_out_file);
27433 }
27434 #endif /* ARM_UNWIND_INFO */
27435
27436 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27437
27438 static void
27439 arm_asm_init_sections (void)
27440 {
27441 #if ARM_UNWIND_INFO
27442   exception_section = get_unnamed_section (0, output_section_asm_op,
27443                                            "\t.handlerdata");
27444 #endif /* ARM_UNWIND_INFO */
27445
27446 #ifdef OBJECT_FORMAT_ELF
27447   if (target_pure_code)
27448     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27449 #endif
27450 }
27451
27452 /* Output unwind directives for the start/end of a function.  */
27453
27454 void
27455 arm_output_fn_unwind (FILE * f, bool prologue)
27456 {
27457   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27458     return;
27459
27460   if (prologue)
27461     fputs ("\t.fnstart\n", f);
27462   else
27463     {
27464       /* If this function will never be unwound, then mark it as such.
27465          The came condition is used in arm_unwind_emit to suppress
27466          the frame annotations.  */
27467       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27468           && (TREE_NOTHROW (current_function_decl)
27469               || crtl->all_throwers_are_sibcalls))
27470         fputs("\t.cantunwind\n", f);
27471
27472       fputs ("\t.fnend\n", f);
27473     }
27474 }
27475
27476 static bool
27477 arm_emit_tls_decoration (FILE *fp, rtx x)
27478 {
27479   enum tls_reloc reloc;
27480   rtx val;
27481
27482   val = XVECEXP (x, 0, 0);
27483   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27484
27485   output_addr_const (fp, val);
27486
27487   switch (reloc)
27488     {
27489     case TLS_GD32:
27490       fputs ("(tlsgd)", fp);
27491       break;
27492     case TLS_LDM32:
27493       fputs ("(tlsldm)", fp);
27494       break;
27495     case TLS_LDO32:
27496       fputs ("(tlsldo)", fp);
27497       break;
27498     case TLS_IE32:
27499       fputs ("(gottpoff)", fp);
27500       break;
27501     case TLS_LE32:
27502       fputs ("(tpoff)", fp);
27503       break;
27504     case TLS_DESCSEQ:
27505       fputs ("(tlsdesc)", fp);
27506       break;
27507     default:
27508       gcc_unreachable ();
27509     }
27510
27511   switch (reloc)
27512     {
27513     case TLS_GD32:
27514     case TLS_LDM32:
27515     case TLS_IE32:
27516     case TLS_DESCSEQ:
27517       fputs (" + (. - ", fp);
27518       output_addr_const (fp, XVECEXP (x, 0, 2));
27519       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27520       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27521       output_addr_const (fp, XVECEXP (x, 0, 3));
27522       fputc (')', fp);
27523       break;
27524     default:
27525       break;
27526     }
27527
27528   return TRUE;
27529 }
27530
27531 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27532
27533 static void
27534 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27535 {
27536   gcc_assert (size == 4);
27537   fputs ("\t.word\t", file);
27538   output_addr_const (file, x);
27539   fputs ("(tlsldo)", file);
27540 }
27541
27542 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27543
27544 static bool
27545 arm_output_addr_const_extra (FILE *fp, rtx x)
27546 {
27547   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27548     return arm_emit_tls_decoration (fp, x);
27549   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27550     {
27551       char label[256];
27552       int labelno = INTVAL (XVECEXP (x, 0, 0));
27553
27554       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27555       assemble_name_raw (fp, label);
27556
27557       return TRUE;
27558     }
27559   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27560     {
27561       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27562       if (GOT_PCREL)
27563         fputs ("+.", fp);
27564       fputs ("-(", fp);
27565       output_addr_const (fp, XVECEXP (x, 0, 0));
27566       fputc (')', fp);
27567       return TRUE;
27568     }
27569   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27570     {
27571       output_addr_const (fp, XVECEXP (x, 0, 0));
27572       if (GOT_PCREL)
27573         fputs ("+.", fp);
27574       fputs ("-(", fp);
27575       output_addr_const (fp, XVECEXP (x, 0, 1));
27576       fputc (')', fp);
27577       return TRUE;
27578     }
27579   else if (GET_CODE (x) == CONST_VECTOR)
27580     return arm_emit_vector_const (fp, x);
27581
27582   return FALSE;
27583 }
27584
27585 /* Output assembly for a shift instruction.
27586    SET_FLAGS determines how the instruction modifies the condition codes.
27587    0 - Do not set condition codes.
27588    1 - Set condition codes.
27589    2 - Use smallest instruction.  */
27590 const char *
27591 arm_output_shift(rtx * operands, int set_flags)
27592 {
27593   char pattern[100];
27594   static const char flag_chars[3] = {'?', '.', '!'};
27595   const char *shift;
27596   HOST_WIDE_INT val;
27597   char c;
27598
27599   c = flag_chars[set_flags];
27600   shift = shift_op(operands[3], &val);
27601   if (shift)
27602     {
27603       if (val != -1)
27604         operands[2] = GEN_INT(val);
27605       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27606     }
27607   else
27608     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27609
27610   output_asm_insn (pattern, operands);
27611   return "";
27612 }
27613
27614 /* Output assembly for a WMMX immediate shift instruction.  */
27615 const char *
27616 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27617 {
27618   int shift = INTVAL (operands[2]);
27619   char templ[50];
27620   machine_mode opmode = GET_MODE (operands[0]);
27621
27622   gcc_assert (shift >= 0);
27623
27624   /* If the shift value in the register versions is > 63 (for D qualifier),
27625      31 (for W qualifier) or 15 (for H qualifier).  */
27626   if (((opmode == V4HImode) && (shift > 15))
27627         || ((opmode == V2SImode) && (shift > 31))
27628         || ((opmode == DImode) && (shift > 63)))
27629   {
27630     if (wror_or_wsra)
27631       {
27632         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27633         output_asm_insn (templ, operands);
27634         if (opmode == DImode)
27635           {
27636             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27637             output_asm_insn (templ, operands);
27638           }
27639       }
27640     else
27641       {
27642         /* The destination register will contain all zeros.  */
27643         sprintf (templ, "wzero\t%%0");
27644         output_asm_insn (templ, operands);
27645       }
27646     return "";
27647   }
27648
27649   if ((opmode == DImode) && (shift > 32))
27650     {
27651       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27652       output_asm_insn (templ, operands);
27653       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27654       output_asm_insn (templ, operands);
27655     }
27656   else
27657     {
27658       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27659       output_asm_insn (templ, operands);
27660     }
27661   return "";
27662 }
27663
27664 /* Output assembly for a WMMX tinsr instruction.  */
27665 const char *
27666 arm_output_iwmmxt_tinsr (rtx *operands)
27667 {
27668   int mask = INTVAL (operands[3]);
27669   int i;
27670   char templ[50];
27671   int units = mode_nunits[GET_MODE (operands[0])];
27672   gcc_assert ((mask & (mask - 1)) == 0);
27673   for (i = 0; i < units; ++i)
27674     {
27675       if ((mask & 0x01) == 1)
27676         {
27677           break;
27678         }
27679       mask >>= 1;
27680     }
27681   gcc_assert (i < units);
27682   {
27683     switch (GET_MODE (operands[0]))
27684       {
27685       case E_V8QImode:
27686         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27687         break;
27688       case E_V4HImode:
27689         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27690         break;
27691       case E_V2SImode:
27692         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27693         break;
27694       default:
27695         gcc_unreachable ();
27696         break;
27697       }
27698     output_asm_insn (templ, operands);
27699   }
27700   return "";
27701 }
27702
27703 /* Output a Thumb-1 casesi dispatch sequence.  */
27704 const char *
27705 thumb1_output_casesi (rtx *operands)
27706 {
27707   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27708
27709   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27710
27711   switch (GET_MODE(diff_vec))
27712     {
27713     case E_QImode:
27714       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27715               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27716     case E_HImode:
27717       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27718               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27719     case E_SImode:
27720       return "bl\t%___gnu_thumb1_case_si";
27721     default:
27722       gcc_unreachable ();
27723     }
27724 }
27725
27726 /* Output a Thumb-2 casesi instruction.  */
27727 const char *
27728 thumb2_output_casesi (rtx *operands)
27729 {
27730   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27731
27732   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27733
27734   output_asm_insn ("cmp\t%0, %1", operands);
27735   output_asm_insn ("bhi\t%l3", operands);
27736   switch (GET_MODE(diff_vec))
27737     {
27738     case E_QImode:
27739       return "tbb\t[%|pc, %0]";
27740     case E_HImode:
27741       return "tbh\t[%|pc, %0, lsl #1]";
27742     case E_SImode:
27743       if (flag_pic)
27744         {
27745           output_asm_insn ("adr\t%4, %l2", operands);
27746           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27747           output_asm_insn ("add\t%4, %4, %5", operands);
27748           return "bx\t%4";
27749         }
27750       else
27751         {
27752           output_asm_insn ("adr\t%4, %l2", operands);
27753           return "ldr\t%|pc, [%4, %0, lsl #2]";
27754         }
27755     default:
27756       gcc_unreachable ();
27757     }
27758 }
27759
27760 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
27761    per-core tuning structs.  */
27762 static int
27763 arm_issue_rate (void)
27764 {
27765   return current_tune->issue_rate;
27766 }
27767
27768 /* Return how many instructions should scheduler lookahead to choose the
27769    best one.  */
27770 static int
27771 arm_first_cycle_multipass_dfa_lookahead (void)
27772 {
27773   int issue_rate = arm_issue_rate ();
27774
27775   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27776 }
27777
27778 /* Enable modeling of L2 auto-prefetcher.  */
27779 static int
27780 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27781 {
27782   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27783 }
27784
27785 const char *
27786 arm_mangle_type (const_tree type)
27787 {
27788   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27789      has to be managled as if it is in the "std" namespace.  */
27790   if (TARGET_AAPCS_BASED
27791       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27792     return "St9__va_list";
27793
27794   /* Half-precision float.  */
27795   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27796     return "Dh";
27797
27798   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27799      builtin type.  */
27800   if (TYPE_NAME (type) != NULL)
27801     return arm_mangle_builtin_type (type);
27802
27803   /* Use the default mangling.  */
27804   return NULL;
27805 }
27806
27807 /* Order of allocation of core registers for Thumb: this allocation is
27808    written over the corresponding initial entries of the array
27809    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27810    first.  Saving and restoring a low register is usually cheaper than
27811    using a call-clobbered high register.  */
27812
27813 static const int thumb_core_reg_alloc_order[] =
27814 {
27815    3,  2,  1,  0,  4,  5,  6,  7,
27816   12, 14,  8,  9, 10, 11
27817 };
27818
27819 /* Adjust register allocation order when compiling for Thumb.  */
27820
27821 void
27822 arm_order_regs_for_local_alloc (void)
27823 {
27824   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27825   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27826   if (TARGET_THUMB)
27827     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27828             sizeof (thumb_core_reg_alloc_order));
27829 }
27830
27831 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27832
27833 bool
27834 arm_frame_pointer_required (void)
27835 {
27836   if (SUBTARGET_FRAME_POINTER_REQUIRED)
27837     return true;
27838
27839   /* If the function receives nonlocal gotos, it needs to save the frame
27840      pointer in the nonlocal_goto_save_area object.  */
27841   if (cfun->has_nonlocal_label)
27842     return true;
27843
27844   /* The frame pointer is required for non-leaf APCS frames.  */
27845   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27846     return true;
27847
27848   /* If we are probing the stack in the prologue, we will have a faulting
27849      instruction prior to the stack adjustment and this requires a frame
27850      pointer if we want to catch the exception using the EABI unwinder.  */
27851   if (!IS_INTERRUPT (arm_current_func_type ())
27852       && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27853       && arm_except_unwind_info (&global_options) == UI_TARGET
27854       && cfun->can_throw_non_call_exceptions)
27855     {
27856       HOST_WIDE_INT size = get_frame_size ();
27857
27858       /* That's irrelevant if there is no stack adjustment.  */
27859       if (size <= 0)
27860         return false;
27861
27862       /* That's relevant only if there is a stack probe.  */
27863       if (crtl->is_leaf && !cfun->calls_alloca)
27864         {
27865           /* We don't have the final size of the frame so adjust.  */
27866           size += 32 * UNITS_PER_WORD;
27867           if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27868             return true;
27869         }
27870       else
27871         return true;
27872     }
27873
27874   return false;
27875 }
27876
27877 /* Only thumb1 can't support conditional execution, so return true if
27878    the target is not thumb1.  */
27879 static bool
27880 arm_have_conditional_execution (void)
27881 {
27882   return !TARGET_THUMB1;
27883 }
27884
27885 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27886 static HOST_WIDE_INT
27887 arm_vector_alignment (const_tree type)
27888 {
27889   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27890
27891   if (TARGET_AAPCS_BASED)
27892     align = MIN (align, 64);
27893
27894   return align;
27895 }
27896
27897 static unsigned int
27898 arm_autovectorize_vector_sizes (void)
27899 {
27900   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27901 }
27902
27903 static bool
27904 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27905 {
27906   /* Vectors which aren't in packed structures will not be less aligned than
27907      the natural alignment of their element type, so this is safe.  */
27908   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27909     return !is_packed;
27910
27911   return default_builtin_vector_alignment_reachable (type, is_packed);
27912 }
27913
27914 static bool
27915 arm_builtin_support_vector_misalignment (machine_mode mode,
27916                                          const_tree type, int misalignment,
27917                                          bool is_packed)
27918 {
27919   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27920     {
27921       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27922
27923       if (is_packed)
27924         return align == 1;
27925
27926       /* If the misalignment is unknown, we should be able to handle the access
27927          so long as it is not to a member of a packed data structure.  */
27928       if (misalignment == -1)
27929         return true;
27930
27931       /* Return true if the misalignment is a multiple of the natural alignment
27932          of the vector's element type.  This is probably always going to be
27933          true in practice, since we've already established that this isn't a
27934          packed access.  */
27935       return ((misalignment % align) == 0);
27936     }
27937
27938   return default_builtin_support_vector_misalignment (mode, type, misalignment,
27939                                                       is_packed);
27940 }
27941
27942 static void
27943 arm_conditional_register_usage (void)
27944 {
27945   int regno;
27946
27947   if (TARGET_THUMB1 && optimize_size)
27948     {
27949       /* When optimizing for size on Thumb-1, it's better not
27950         to use the HI regs, because of the overhead of
27951         stacking them.  */
27952       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27953         fixed_regs[regno] = call_used_regs[regno] = 1;
27954     }
27955
27956   /* The link register can be clobbered by any branch insn,
27957      but we have no way to track that at present, so mark
27958      it as unavailable.  */
27959   if (TARGET_THUMB1)
27960     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27961
27962   if (TARGET_32BIT && TARGET_HARD_FLOAT)
27963     {
27964       /* VFPv3 registers are disabled when earlier VFP
27965          versions are selected due to the definition of
27966          LAST_VFP_REGNUM.  */
27967       for (regno = FIRST_VFP_REGNUM;
27968            regno <= LAST_VFP_REGNUM; ++ regno)
27969         {
27970           fixed_regs[regno] = 0;
27971           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27972             || regno >= FIRST_VFP_REGNUM + 32;
27973         }
27974     }
27975
27976   if (TARGET_REALLY_IWMMXT)
27977     {
27978       regno = FIRST_IWMMXT_GR_REGNUM;
27979       /* The 2002/10/09 revision of the XScale ABI has wCG0
27980          and wCG1 as call-preserved registers.  The 2002/11/21
27981          revision changed this so that all wCG registers are
27982          scratch registers.  */
27983       for (regno = FIRST_IWMMXT_GR_REGNUM;
27984            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27985         fixed_regs[regno] = 0;
27986       /* The XScale ABI has wR0 - wR9 as scratch registers,
27987          the rest as call-preserved registers.  */
27988       for (regno = FIRST_IWMMXT_REGNUM;
27989            regno <= LAST_IWMMXT_REGNUM; ++ regno)
27990         {
27991           fixed_regs[regno] = 0;
27992           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27993         }
27994     }
27995
27996   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27997     {
27998       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27999       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28000     }
28001   else if (TARGET_APCS_STACK)
28002     {
28003       fixed_regs[10]     = 1;
28004       call_used_regs[10] = 1;
28005     }
28006   /* -mcaller-super-interworking reserves r11 for calls to
28007      _interwork_r11_call_via_rN().  Making the register global
28008      is an easy way of ensuring that it remains valid for all
28009      calls.  */
28010   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28011       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28012     {
28013       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28014       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28015       if (TARGET_CALLER_INTERWORKING)
28016         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28017     }
28018   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28019 }
28020
28021 static reg_class_t
28022 arm_preferred_rename_class (reg_class_t rclass)
28023 {
28024   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28025      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28026      and code size can be reduced.  */
28027   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28028     return LO_REGS;
28029   else
28030     return NO_REGS;
28031 }
28032
28033 /* Compute the attribute "length" of insn "*push_multi".
28034    So this function MUST be kept in sync with that insn pattern.  */
28035 int
28036 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28037 {
28038   int i, regno, hi_reg;
28039   int num_saves = XVECLEN (parallel_op, 0);
28040
28041   /* ARM mode.  */
28042   if (TARGET_ARM)
28043     return 4;
28044   /* Thumb1 mode.  */
28045   if (TARGET_THUMB1)
28046     return 2;
28047
28048   /* Thumb2 mode.  */
28049   regno = REGNO (first_op);
28050   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28051      list is 8-bit.  Normally this means all registers in the list must be
28052      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28053      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28054      with 16-bit encoding.  */
28055   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28056   for (i = 1; i < num_saves && !hi_reg; i++)
28057     {
28058       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28059       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28060     }
28061
28062   if (!hi_reg)
28063     return 2;
28064   return 4;
28065 }
28066
28067 /* Compute the attribute "length" of insn.  Currently, this function is used
28068    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28069    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28070    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28071    true if OPERANDS contains insn which explicit updates base register.  */
28072
28073 int
28074 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28075 {
28076   /* ARM mode.  */
28077   if (TARGET_ARM)
28078     return 4;
28079   /* Thumb1 mode.  */
28080   if (TARGET_THUMB1)
28081     return 2;
28082
28083   rtx parallel_op = operands[0];
28084   /* Initialize to elements number of PARALLEL.  */
28085   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28086   /* Initialize the value to base register.  */
28087   unsigned regno = REGNO (operands[1]);
28088   /* Skip return and write back pattern.
28089      We only need register pop pattern for later analysis.  */
28090   unsigned first_indx = 0;
28091   first_indx += return_pc ? 1 : 0;
28092   first_indx += write_back_p ? 1 : 0;
28093
28094   /* A pop operation can be done through LDM or POP.  If the base register is SP
28095      and if it's with write back, then a LDM will be alias of POP.  */
28096   bool pop_p = (regno == SP_REGNUM && write_back_p);
28097   bool ldm_p = !pop_p;
28098
28099   /* Check base register for LDM.  */
28100   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28101     return 4;
28102
28103   /* Check each register in the list.  */
28104   for (; indx >= first_indx; indx--)
28105     {
28106       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28107       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28108          comment in arm_attr_length_push_multi.  */
28109       if (REGNO_REG_CLASS (regno) == HI_REGS
28110           && (regno != PC_REGNUM || ldm_p))
28111         return 4;
28112     }
28113
28114   return 2;
28115 }
28116
28117 /* Compute the number of instructions emitted by output_move_double.  */
28118 int
28119 arm_count_output_move_double_insns (rtx *operands)
28120 {
28121   int count;
28122   rtx ops[2];
28123   /* output_move_double may modify the operands array, so call it
28124      here on a copy of the array.  */
28125   ops[0] = operands[0];
28126   ops[1] = operands[1];
28127   output_move_double (ops, false, &count);
28128   return count;
28129 }
28130
28131 int
28132 vfp3_const_double_for_fract_bits (rtx operand)
28133 {
28134   REAL_VALUE_TYPE r0;
28135
28136   if (!CONST_DOUBLE_P (operand))
28137     return 0;
28138
28139   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28140   if (exact_real_inverse (DFmode, &r0)
28141       && !REAL_VALUE_NEGATIVE (r0))
28142     {
28143       if (exact_real_truncate (DFmode, &r0))
28144         {
28145           HOST_WIDE_INT value = real_to_integer (&r0);
28146           value = value & 0xffffffff;
28147           if ((value != 0) && ( (value & (value - 1)) == 0))
28148             {
28149               int ret = exact_log2 (value);
28150               gcc_assert (IN_RANGE (ret, 0, 31));
28151               return ret;
28152             }
28153         }
28154     }
28155   return 0;
28156 }
28157
28158 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28159    log2 is in [1, 32], return that log2.  Otherwise return -1.
28160    This is used in the patterns for vcvt.s32.f32 floating-point to
28161    fixed-point conversions.  */
28162
28163 int
28164 vfp3_const_double_for_bits (rtx x)
28165 {
28166   const REAL_VALUE_TYPE *r;
28167
28168   if (!CONST_DOUBLE_P (x))
28169     return -1;
28170
28171   r = CONST_DOUBLE_REAL_VALUE (x);
28172
28173   if (REAL_VALUE_NEGATIVE (*r)
28174       || REAL_VALUE_ISNAN (*r)
28175       || REAL_VALUE_ISINF (*r)
28176       || !real_isinteger (r, SFmode))
28177     return -1;
28178
28179   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28180
28181 /* The exact_log2 above will have returned -1 if this is
28182    not an exact log2.  */
28183   if (!IN_RANGE (hwint, 1, 32))
28184     return -1;
28185
28186   return hwint;
28187 }
28188
28189 \f
28190 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28191
28192 static void
28193 arm_pre_atomic_barrier (enum memmodel model)
28194 {
28195   if (need_atomic_barrier_p (model, true))
28196     emit_insn (gen_memory_barrier ());
28197 }
28198
28199 static void
28200 arm_post_atomic_barrier (enum memmodel model)
28201 {
28202   if (need_atomic_barrier_p (model, false))
28203     emit_insn (gen_memory_barrier ());
28204 }
28205
28206 /* Emit the load-exclusive and store-exclusive instructions.
28207    Use acquire and release versions if necessary.  */
28208
28209 static void
28210 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28211 {
28212   rtx (*gen) (rtx, rtx);
28213
28214   if (acq)
28215     {
28216       switch (mode)
28217         {
28218         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28219         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28220         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28221         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28222         default:
28223           gcc_unreachable ();
28224         }
28225     }
28226   else
28227     {
28228       switch (mode)
28229         {
28230         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28231         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28232         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28233         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28234         default:
28235           gcc_unreachable ();
28236         }
28237     }
28238
28239   emit_insn (gen (rval, mem));
28240 }
28241
28242 static void
28243 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28244                           rtx mem, bool rel)
28245 {
28246   rtx (*gen) (rtx, rtx, rtx);
28247
28248   if (rel)
28249     {
28250       switch (mode)
28251         {
28252         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28253         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28254         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28255         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28256         default:
28257           gcc_unreachable ();
28258         }
28259     }
28260   else
28261     {
28262       switch (mode)
28263         {
28264         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28265         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28266         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28267         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28268         default:
28269           gcc_unreachable ();
28270         }
28271     }
28272
28273   emit_insn (gen (bval, rval, mem));
28274 }
28275
28276 /* Mark the previous jump instruction as unlikely.  */
28277
28278 static void
28279 emit_unlikely_jump (rtx insn)
28280 {
28281   rtx_insn *jump = emit_jump_insn (insn);
28282   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28283 }
28284
28285 /* Expand a compare and swap pattern.  */
28286
28287 void
28288 arm_expand_compare_and_swap (rtx operands[])
28289 {
28290   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28291   machine_mode mode;
28292   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28293
28294   bval = operands[0];
28295   rval = operands[1];
28296   mem = operands[2];
28297   oldval = operands[3];
28298   newval = operands[4];
28299   is_weak = operands[5];
28300   mod_s = operands[6];
28301   mod_f = operands[7];
28302   mode = GET_MODE (mem);
28303
28304   /* Normally the succ memory model must be stronger than fail, but in the
28305      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28306      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28307
28308   if (TARGET_HAVE_LDACQ
28309       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28310       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28311     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28312
28313   switch (mode)
28314     {
28315     case E_QImode:
28316     case E_HImode:
28317       /* For narrow modes, we're going to perform the comparison in SImode,
28318          so do the zero-extension now.  */
28319       rval = gen_reg_rtx (SImode);
28320       oldval = convert_modes (SImode, mode, oldval, true);
28321       /* FALLTHRU */
28322
28323     case E_SImode:
28324       /* Force the value into a register if needed.  We waited until after
28325          the zero-extension above to do this properly.  */
28326       if (!arm_add_operand (oldval, SImode))
28327         oldval = force_reg (SImode, oldval);
28328       break;
28329
28330     case E_DImode:
28331       if (!cmpdi_operand (oldval, mode))
28332         oldval = force_reg (mode, oldval);
28333       break;
28334
28335     default:
28336       gcc_unreachable ();
28337     }
28338
28339   if (TARGET_THUMB1)
28340     {
28341       switch (mode)
28342         {
28343         case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28344         case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28345         case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28346         case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28347         default:
28348           gcc_unreachable ();
28349         }
28350     }
28351   else
28352     {
28353       switch (mode)
28354         {
28355         case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28356         case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28357         case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28358         case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28359         default:
28360           gcc_unreachable ();
28361         }
28362     }
28363
28364   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28365   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28366
28367   if (mode == QImode || mode == HImode)
28368     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28369
28370   /* In all cases, we arrange for success to be signaled by Z set.
28371      This arrangement allows for the boolean result to be used directly
28372      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28373      boolean negation of the result is also stored in bval because Thumb-1
28374      backend lacks dependency tracking for CC flag due to flag-setting not
28375      being represented at RTL level.  */
28376   if (TARGET_THUMB1)
28377       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28378   else
28379     {
28380       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28381       emit_insn (gen_rtx_SET (bval, x));
28382     }
28383 }
28384
28385 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28386    another memory store between the load-exclusive and store-exclusive can
28387    reset the monitor from Exclusive to Open state.  This means we must wait
28388    until after reload to split the pattern, lest we get a register spill in
28389    the middle of the atomic sequence.  Success of the compare and swap is
28390    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28391    for Thumb-1 targets (ie. negation of the boolean value returned by
28392    atomic_compare_and_swapmode standard pattern in operand 0).  */
28393
28394 void
28395 arm_split_compare_and_swap (rtx operands[])
28396 {
28397   rtx rval, mem, oldval, newval, neg_bval;
28398   machine_mode mode;
28399   enum memmodel mod_s, mod_f;
28400   bool is_weak;
28401   rtx_code_label *label1, *label2;
28402   rtx x, cond;
28403
28404   rval = operands[1];
28405   mem = operands[2];
28406   oldval = operands[3];
28407   newval = operands[4];
28408   is_weak = (operands[5] != const0_rtx);
28409   mod_s = memmodel_from_int (INTVAL (operands[6]));
28410   mod_f = memmodel_from_int (INTVAL (operands[7]));
28411   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28412   mode = GET_MODE (mem);
28413
28414   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28415
28416   bool use_acquire = TARGET_HAVE_LDACQ
28417                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28418                           || is_mm_release (mod_s));
28419
28420   bool use_release = TARGET_HAVE_LDACQ
28421                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28422                           || is_mm_acquire (mod_s));
28423
28424   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28425      a full barrier is emitted after the store-release.  */
28426   if (is_armv8_sync)
28427     use_acquire = false;
28428
28429   /* Checks whether a barrier is needed and emits one accordingly.  */
28430   if (!(use_acquire || use_release))
28431     arm_pre_atomic_barrier (mod_s);
28432
28433   label1 = NULL;
28434   if (!is_weak)
28435     {
28436       label1 = gen_label_rtx ();
28437       emit_label (label1);
28438     }
28439   label2 = gen_label_rtx ();
28440
28441   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28442
28443   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28444      as required to communicate with arm_expand_compare_and_swap.  */
28445   if (TARGET_32BIT)
28446     {
28447       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28448       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28449       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28450                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28451       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28452     }
28453   else
28454     {
28455       emit_move_insn (neg_bval, const1_rtx);
28456       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28457       if (thumb1_cmpneg_operand (oldval, SImode))
28458         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28459                                                     label2, cond));
28460       else
28461         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28462     }
28463
28464   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28465
28466   /* Weak or strong, we want EQ to be true for success, so that we
28467      match the flags that we got from the compare above.  */
28468   if (TARGET_32BIT)
28469     {
28470       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28471       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28472       emit_insn (gen_rtx_SET (cond, x));
28473     }
28474
28475   if (!is_weak)
28476     {
28477       /* Z is set to boolean value of !neg_bval, as required to communicate
28478          with arm_expand_compare_and_swap.  */
28479       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28480       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28481     }
28482
28483   if (!is_mm_relaxed (mod_f))
28484     emit_label (label2);
28485
28486   /* Checks whether a barrier is needed and emits one accordingly.  */
28487   if (is_armv8_sync
28488       || !(use_acquire || use_release))
28489     arm_post_atomic_barrier (mod_s);
28490
28491   if (is_mm_relaxed (mod_f))
28492     emit_label (label2);
28493 }
28494
28495 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28496    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28497    operation).  Operation is performed on the content at MEM and on VALUE
28498    following the memory model MODEL_RTX.  The content at MEM before and after
28499    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28500    success of the operation is returned in COND.  Using a scratch register or
28501    an operand register for these determines what result is returned for that
28502    pattern.  */
28503
28504 void
28505 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28506                      rtx value, rtx model_rtx, rtx cond)
28507 {
28508   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28509   machine_mode mode = GET_MODE (mem);
28510   machine_mode wmode = (mode == DImode ? DImode : SImode);
28511   rtx_code_label *label;
28512   bool all_low_regs, bind_old_new;
28513   rtx x;
28514
28515   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28516
28517   bool use_acquire = TARGET_HAVE_LDACQ
28518                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28519                           || is_mm_release (model));
28520
28521   bool use_release = TARGET_HAVE_LDACQ
28522                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28523                           || is_mm_acquire (model));
28524
28525   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28526      a full barrier is emitted after the store-release.  */
28527   if (is_armv8_sync)
28528     use_acquire = false;
28529
28530   /* Checks whether a barrier is needed and emits one accordingly.  */
28531   if (!(use_acquire || use_release))
28532     arm_pre_atomic_barrier (model);
28533
28534   label = gen_label_rtx ();
28535   emit_label (label);
28536
28537   if (new_out)
28538     new_out = gen_lowpart (wmode, new_out);
28539   if (old_out)
28540     old_out = gen_lowpart (wmode, old_out);
28541   else
28542     old_out = new_out;
28543   value = simplify_gen_subreg (wmode, value, mode, 0);
28544
28545   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28546
28547   /* Does the operation require destination and first operand to use the same
28548      register?  This is decided by register constraints of relevant insn
28549      patterns in thumb1.md.  */
28550   gcc_assert (!new_out || REG_P (new_out));
28551   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28552                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28553                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28554   bind_old_new =
28555     (TARGET_THUMB1
28556      && code != SET
28557      && code != MINUS
28558      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28559
28560   /* We want to return the old value while putting the result of the operation
28561      in the same register as the old value so copy the old value over to the
28562      destination register and use that register for the operation.  */
28563   if (old_out && bind_old_new)
28564     {
28565       emit_move_insn (new_out, old_out);
28566       old_out = new_out;
28567     }
28568
28569   switch (code)
28570     {
28571     case SET:
28572       new_out = value;
28573       break;
28574
28575     case NOT:
28576       x = gen_rtx_AND (wmode, old_out, value);
28577       emit_insn (gen_rtx_SET (new_out, x));
28578       x = gen_rtx_NOT (wmode, new_out);
28579       emit_insn (gen_rtx_SET (new_out, x));
28580       break;
28581
28582     case MINUS:
28583       if (CONST_INT_P (value))
28584         {
28585           value = GEN_INT (-INTVAL (value));
28586           code = PLUS;
28587         }
28588       /* FALLTHRU */
28589
28590     case PLUS:
28591       if (mode == DImode)
28592         {
28593           /* DImode plus/minus need to clobber flags.  */
28594           /* The adddi3 and subdi3 patterns are incorrectly written so that
28595              they require matching operands, even when we could easily support
28596              three operands.  Thankfully, this can be fixed up post-splitting,
28597              as the individual add+adc patterns do accept three operands and
28598              post-reload cprop can make these moves go away.  */
28599           emit_move_insn (new_out, old_out);
28600           if (code == PLUS)
28601             x = gen_adddi3 (new_out, new_out, value);
28602           else
28603             x = gen_subdi3 (new_out, new_out, value);
28604           emit_insn (x);
28605           break;
28606         }
28607       /* FALLTHRU */
28608
28609     default:
28610       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28611       emit_insn (gen_rtx_SET (new_out, x));
28612       break;
28613     }
28614
28615   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28616                             use_release);
28617
28618   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28619   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28620
28621   /* Checks whether a barrier is needed and emits one accordingly.  */
28622   if (is_armv8_sync
28623       || !(use_acquire || use_release))
28624     arm_post_atomic_barrier (model);
28625 }
28626 \f
28627 #define MAX_VECT_LEN 16
28628
28629 struct expand_vec_perm_d
28630 {
28631   rtx target, op0, op1;
28632   unsigned char perm[MAX_VECT_LEN];
28633   machine_mode vmode;
28634   unsigned char nelt;
28635   bool one_vector_p;
28636   bool testing_p;
28637 };
28638
28639 /* Generate a variable permutation.  */
28640
28641 static void
28642 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28643 {
28644   machine_mode vmode = GET_MODE (target);
28645   bool one_vector_p = rtx_equal_p (op0, op1);
28646
28647   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28648   gcc_checking_assert (GET_MODE (op0) == vmode);
28649   gcc_checking_assert (GET_MODE (op1) == vmode);
28650   gcc_checking_assert (GET_MODE (sel) == vmode);
28651   gcc_checking_assert (TARGET_NEON);
28652
28653   if (one_vector_p)
28654     {
28655       if (vmode == V8QImode)
28656         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28657       else
28658         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28659     }
28660   else
28661     {
28662       rtx pair;
28663
28664       if (vmode == V8QImode)
28665         {
28666           pair = gen_reg_rtx (V16QImode);
28667           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28668           pair = gen_lowpart (TImode, pair);
28669           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28670         }
28671       else
28672         {
28673           pair = gen_reg_rtx (OImode);
28674           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28675           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28676         }
28677     }
28678 }
28679
28680 void
28681 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28682 {
28683   machine_mode vmode = GET_MODE (target);
28684   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28685   bool one_vector_p = rtx_equal_p (op0, op1);
28686   rtx rmask[MAX_VECT_LEN], mask;
28687
28688   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28689      numbering of elements for big-endian, we must reverse the order.  */
28690   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28691
28692   /* The VTBL instruction does not use a modulo index, so we must take care
28693      of that ourselves.  */
28694   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28695   for (i = 0; i < nelt; ++i)
28696     rmask[i] = mask;
28697   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28698   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28699
28700   arm_expand_vec_perm_1 (target, op0, op1, sel);
28701 }
28702
28703 /* Map lane ordering between architectural lane order, and GCC lane order,
28704    taking into account ABI.  See comment above output_move_neon for details.  */
28705
28706 static int
28707 neon_endian_lane_map (machine_mode mode, int lane)
28708 {
28709   if (BYTES_BIG_ENDIAN)
28710   {
28711     int nelems = GET_MODE_NUNITS (mode);
28712     /* Reverse lane order.  */
28713     lane = (nelems - 1 - lane);
28714     /* Reverse D register order, to match ABI.  */
28715     if (GET_MODE_SIZE (mode) == 16)
28716       lane = lane ^ (nelems / 2);
28717   }
28718   return lane;
28719 }
28720
28721 /* Some permutations index into pairs of vectors, this is a helper function
28722    to map indexes into those pairs of vectors.  */
28723
28724 static int
28725 neon_pair_endian_lane_map (machine_mode mode, int lane)
28726 {
28727   int nelem = GET_MODE_NUNITS (mode);
28728   if (BYTES_BIG_ENDIAN)
28729     lane =
28730       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28731   return lane;
28732 }
28733
28734 /* Generate or test for an insn that supports a constant permutation.  */
28735
28736 /* Recognize patterns for the VUZP insns.  */
28737
28738 static bool
28739 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28740 {
28741   unsigned int i, odd, mask, nelt = d->nelt;
28742   rtx out0, out1, in0, in1;
28743   rtx (*gen)(rtx, rtx, rtx, rtx);
28744   int first_elem;
28745   int swap_nelt;
28746
28747   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28748     return false;
28749
28750   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28751      big endian pattern on 64 bit vectors, so we correct for that.  */
28752   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28753     && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28754
28755   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28756
28757   if (first_elem == neon_endian_lane_map (d->vmode, 0))
28758     odd = 0;
28759   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28760     odd = 1;
28761   else
28762     return false;
28763   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28764
28765   for (i = 0; i < nelt; i++)
28766     {
28767       unsigned elt =
28768         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28769       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28770         return false;
28771     }
28772
28773   /* Success!  */
28774   if (d->testing_p)
28775     return true;
28776
28777   switch (d->vmode)
28778     {
28779     case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28780     case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
28781     case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
28782     case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
28783     case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
28784     case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
28785     case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
28786     case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
28787     case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
28788     case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
28789     default:
28790       gcc_unreachable ();
28791     }
28792
28793   in0 = d->op0;
28794   in1 = d->op1;
28795   if (swap_nelt != 0)
28796     std::swap (in0, in1);
28797
28798   out0 = d->target;
28799   out1 = gen_reg_rtx (d->vmode);
28800   if (odd)
28801     std::swap (out0, out1);
28802
28803   emit_insn (gen (out0, in0, in1, out1));
28804   return true;
28805 }
28806
28807 /* Recognize patterns for the VZIP insns.  */
28808
28809 static bool
28810 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28811 {
28812   unsigned int i, high, mask, nelt = d->nelt;
28813   rtx out0, out1, in0, in1;
28814   rtx (*gen)(rtx, rtx, rtx, rtx);
28815   int first_elem;
28816   bool is_swapped;
28817
28818   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28819     return false;
28820
28821   is_swapped = BYTES_BIG_ENDIAN;
28822
28823   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28824
28825   high = nelt / 2;
28826   if (first_elem == neon_endian_lane_map (d->vmode, high))
28827     ;
28828   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28829     high = 0;
28830   else
28831     return false;
28832   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28833
28834   for (i = 0; i < nelt / 2; i++)
28835     {
28836       unsigned elt =
28837         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28838       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28839           != elt)
28840         return false;
28841       elt =
28842         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28843       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28844           != elt)
28845         return false;
28846     }
28847
28848   /* Success!  */
28849   if (d->testing_p)
28850     return true;
28851
28852   switch (d->vmode)
28853     {
28854     case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28855     case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
28856     case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
28857     case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
28858     case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
28859     case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
28860     case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
28861     case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
28862     case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
28863     case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
28864     default:
28865       gcc_unreachable ();
28866     }
28867
28868   in0 = d->op0;
28869   in1 = d->op1;
28870   if (is_swapped)
28871     std::swap (in0, in1);
28872
28873   out0 = d->target;
28874   out1 = gen_reg_rtx (d->vmode);
28875   if (high)
28876     std::swap (out0, out1);
28877
28878   emit_insn (gen (out0, in0, in1, out1));
28879   return true;
28880 }
28881
28882 /* Recognize patterns for the VREV insns.  */
28883
28884 static bool
28885 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28886 {
28887   unsigned int i, j, diff, nelt = d->nelt;
28888   rtx (*gen)(rtx, rtx);
28889
28890   if (!d->one_vector_p)
28891     return false;
28892
28893   diff = d->perm[0];
28894   switch (diff)
28895     {
28896     case 7:
28897       switch (d->vmode)
28898         {
28899         case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28900         case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
28901         default:
28902           return false;
28903         }
28904       break;
28905     case 3:
28906       switch (d->vmode)
28907         {
28908         case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28909         case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
28910         case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
28911         case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
28912         case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
28913         case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
28914         default:
28915           return false;
28916         }
28917       break;
28918     case 1:
28919       switch (d->vmode)
28920         {
28921         case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28922         case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
28923         case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
28924         case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
28925         case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
28926         case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
28927         case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
28928         case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
28929         default:
28930           return false;
28931         }
28932       break;
28933     default:
28934       return false;
28935     }
28936
28937   for (i = 0; i < nelt ; i += diff + 1)
28938     for (j = 0; j <= diff; j += 1)
28939       {
28940         /* This is guaranteed to be true as the value of diff
28941            is 7, 3, 1 and we should have enough elements in the
28942            queue to generate this. Getting a vector mask with a
28943            value of diff other than these values implies that
28944            something is wrong by the time we get here.  */
28945         gcc_assert (i + j < nelt);
28946         if (d->perm[i + j] != i + diff - j)
28947           return false;
28948       }
28949
28950   /* Success! */
28951   if (d->testing_p)
28952     return true;
28953
28954   emit_insn (gen (d->target, d->op0));
28955   return true;
28956 }
28957
28958 /* Recognize patterns for the VTRN insns.  */
28959
28960 static bool
28961 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28962 {
28963   unsigned int i, odd, mask, nelt = d->nelt;
28964   rtx out0, out1, in0, in1;
28965   rtx (*gen)(rtx, rtx, rtx, rtx);
28966
28967   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28968     return false;
28969
28970   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
28971   if (d->perm[0] == 0)
28972     odd = 0;
28973   else if (d->perm[0] == 1)
28974     odd = 1;
28975   else
28976     return false;
28977   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28978
28979   for (i = 0; i < nelt; i += 2)
28980     {
28981       if (d->perm[i] != i + odd)
28982         return false;
28983       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28984         return false;
28985     }
28986
28987   /* Success!  */
28988   if (d->testing_p)
28989     return true;
28990
28991   switch (d->vmode)
28992     {
28993     case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28994     case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
28995     case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
28996     case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
28997     case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
28998     case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
28999     case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
29000     case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
29001     case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
29002     case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
29003     default:
29004       gcc_unreachable ();
29005     }
29006
29007   in0 = d->op0;
29008   in1 = d->op1;
29009   if (BYTES_BIG_ENDIAN)
29010     {
29011       std::swap (in0, in1);
29012       odd = !odd;
29013     }
29014
29015   out0 = d->target;
29016   out1 = gen_reg_rtx (d->vmode);
29017   if (odd)
29018     std::swap (out0, out1);
29019
29020   emit_insn (gen (out0, in0, in1, out1));
29021   return true;
29022 }
29023
29024 /* Recognize patterns for the VEXT insns.  */
29025
29026 static bool
29027 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29028 {
29029   unsigned int i, nelt = d->nelt;
29030   rtx (*gen) (rtx, rtx, rtx, rtx);
29031   rtx offset;
29032
29033   unsigned int location;
29034
29035   unsigned int next  = d->perm[0] + 1;
29036
29037   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29038   if (BYTES_BIG_ENDIAN)
29039     return false;
29040
29041   /* Check if the extracted indexes are increasing by one.  */
29042   for (i = 1; i < nelt; next++, i++)
29043     {
29044       /* If we hit the most significant element of the 2nd vector in
29045          the previous iteration, no need to test further.  */
29046       if (next == 2 * nelt)
29047         return false;
29048
29049       /* If we are operating on only one vector: it could be a
29050          rotation.  If there are only two elements of size < 64, let
29051          arm_evpc_neon_vrev catch it.  */
29052       if (d->one_vector_p && (next == nelt))
29053         {
29054           if ((nelt == 2) && (d->vmode != V2DImode))
29055             return false;
29056           else
29057             next = 0;
29058         }
29059
29060       if (d->perm[i] != next)
29061         return false;
29062     }
29063
29064   location = d->perm[0];
29065
29066   switch (d->vmode)
29067     {
29068     case E_V16QImode: gen = gen_neon_vextv16qi; break;
29069     case E_V8QImode: gen = gen_neon_vextv8qi; break;
29070     case E_V4HImode: gen = gen_neon_vextv4hi; break;
29071     case E_V8HImode: gen = gen_neon_vextv8hi; break;
29072     case E_V2SImode: gen = gen_neon_vextv2si; break;
29073     case E_V4SImode: gen = gen_neon_vextv4si; break;
29074     case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29075     case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29076     case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29077     case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29078     case E_V2DImode: gen = gen_neon_vextv2di; break;
29079     default:
29080       return false;
29081     }
29082
29083   /* Success! */
29084   if (d->testing_p)
29085     return true;
29086
29087   offset = GEN_INT (location);
29088   emit_insn (gen (d->target, d->op0, d->op1, offset));
29089   return true;
29090 }
29091
29092 /* The NEON VTBL instruction is a fully variable permuation that's even
29093    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29094    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29095    can do slightly better by expanding this as a constant where we don't
29096    have to apply a mask.  */
29097
29098 static bool
29099 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29100 {
29101   rtx rperm[MAX_VECT_LEN], sel;
29102   machine_mode vmode = d->vmode;
29103   unsigned int i, nelt = d->nelt;
29104
29105   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29106      numbering of elements for big-endian, we must reverse the order.  */
29107   if (BYTES_BIG_ENDIAN)
29108     return false;
29109
29110   if (d->testing_p)
29111     return true;
29112
29113   /* Generic code will try constant permutation twice.  Once with the
29114      original mode and again with the elements lowered to QImode.
29115      So wait and don't do the selector expansion ourselves.  */
29116   if (vmode != V8QImode && vmode != V16QImode)
29117     return false;
29118
29119   for (i = 0; i < nelt; ++i)
29120     rperm[i] = GEN_INT (d->perm[i]);
29121   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29122   sel = force_reg (vmode, sel);
29123
29124   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29125   return true;
29126 }
29127
29128 static bool
29129 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29130 {
29131   /* Check if the input mask matches vext before reordering the
29132      operands.  */
29133   if (TARGET_NEON)
29134     if (arm_evpc_neon_vext (d))
29135       return true;
29136
29137   /* The pattern matching functions above are written to look for a small
29138      number to begin the sequence (0, 1, N/2).  If we begin with an index
29139      from the second operand, we can swap the operands.  */
29140   if (d->perm[0] >= d->nelt)
29141     {
29142       unsigned i, nelt = d->nelt;
29143
29144       for (i = 0; i < nelt; ++i)
29145         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29146
29147       std::swap (d->op0, d->op1);
29148     }
29149
29150   if (TARGET_NEON)
29151     {
29152       if (arm_evpc_neon_vuzp (d))
29153         return true;
29154       if (arm_evpc_neon_vzip (d))
29155         return true;
29156       if (arm_evpc_neon_vrev (d))
29157         return true;
29158       if (arm_evpc_neon_vtrn (d))
29159         return true;
29160       return arm_evpc_neon_vtbl (d);
29161     }
29162   return false;
29163 }
29164
29165 /* Expand a vec_perm_const pattern.  */
29166
29167 bool
29168 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29169 {
29170   struct expand_vec_perm_d d;
29171   int i, nelt, which;
29172
29173   d.target = target;
29174   d.op0 = op0;
29175   d.op1 = op1;
29176
29177   d.vmode = GET_MODE (target);
29178   gcc_assert (VECTOR_MODE_P (d.vmode));
29179   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29180   d.testing_p = false;
29181
29182   for (i = which = 0; i < nelt; ++i)
29183     {
29184       rtx e = XVECEXP (sel, 0, i);
29185       int ei = INTVAL (e) & (2 * nelt - 1);
29186       which |= (ei < nelt ? 1 : 2);
29187       d.perm[i] = ei;
29188     }
29189
29190   switch (which)
29191     {
29192     default:
29193       gcc_unreachable();
29194
29195     case 3:
29196       d.one_vector_p = false;
29197       if (!rtx_equal_p (op0, op1))
29198         break;
29199
29200       /* The elements of PERM do not suggest that only the first operand
29201          is used, but both operands are identical.  Allow easier matching
29202          of the permutation by folding the permutation into the single
29203          input vector.  */
29204       /* FALLTHRU */
29205     case 2:
29206       for (i = 0; i < nelt; ++i)
29207         d.perm[i] &= nelt - 1;
29208       d.op0 = op1;
29209       d.one_vector_p = true;
29210       break;
29211
29212     case 1:
29213       d.op1 = op0;
29214       d.one_vector_p = true;
29215       break;
29216     }
29217
29218   return arm_expand_vec_perm_const_1 (&d);
29219 }
29220
29221 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
29222
29223 static bool
29224 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29225                                  const unsigned char *sel)
29226 {
29227   struct expand_vec_perm_d d;
29228   unsigned int i, nelt, which;
29229   bool ret;
29230
29231   d.vmode = vmode;
29232   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29233   d.testing_p = true;
29234   memcpy (d.perm, sel, nelt);
29235
29236   /* Categorize the set of elements in the selector.  */
29237   for (i = which = 0; i < nelt; ++i)
29238     {
29239       unsigned char e = d.perm[i];
29240       gcc_assert (e < 2 * nelt);
29241       which |= (e < nelt ? 1 : 2);
29242     }
29243
29244   /* For all elements from second vector, fold the elements to first.  */
29245   if (which == 2)
29246     for (i = 0; i < nelt; ++i)
29247       d.perm[i] -= nelt;
29248
29249   /* Check whether the mask can be applied to the vector type.  */
29250   d.one_vector_p = (which != 3);
29251
29252   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29253   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29254   if (!d.one_vector_p)
29255     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29256
29257   start_sequence ();
29258   ret = arm_expand_vec_perm_const_1 (&d);
29259   end_sequence ();
29260
29261   return ret;
29262 }
29263
29264 bool
29265 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29266 {
29267   /* If we are soft float and we do not have ldrd
29268      then all auto increment forms are ok.  */
29269   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29270     return true;
29271
29272   switch (code)
29273     {
29274       /* Post increment and Pre Decrement are supported for all
29275          instruction forms except for vector forms.  */
29276     case ARM_POST_INC:
29277     case ARM_PRE_DEC:
29278       if (VECTOR_MODE_P (mode))
29279         {
29280           if (code != ARM_PRE_DEC)
29281             return true;
29282           else
29283             return false;
29284         }
29285
29286       return true;
29287
29288     case ARM_POST_DEC:
29289     case ARM_PRE_INC:
29290       /* Without LDRD and mode size greater than
29291          word size, there is no point in auto-incrementing
29292          because ldm and stm will not have these forms.  */
29293       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29294         return false;
29295
29296       /* Vector and floating point modes do not support
29297          these auto increment forms.  */
29298       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29299         return false;
29300
29301       return true;
29302
29303     default:
29304       return false;
29305
29306     }
29307
29308   return false;
29309 }
29310
29311 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29312    on ARM, since we know that shifts by negative amounts are no-ops.
29313    Additionally, the default expansion code is not available or suitable
29314    for post-reload insn splits (this can occur when the register allocator
29315    chooses not to do a shift in NEON).
29316
29317    This function is used in both initial expand and post-reload splits, and
29318    handles all kinds of 64-bit shifts.
29319
29320    Input requirements:
29321     - It is safe for the input and output to be the same register, but
29322       early-clobber rules apply for the shift amount and scratch registers.
29323     - Shift by register requires both scratch registers.  In all other cases
29324       the scratch registers may be NULL.
29325     - Ashiftrt by a register also clobbers the CC register.  */
29326 void
29327 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29328                                rtx amount, rtx scratch1, rtx scratch2)
29329 {
29330   rtx out_high = gen_highpart (SImode, out);
29331   rtx out_low = gen_lowpart (SImode, out);
29332   rtx in_high = gen_highpart (SImode, in);
29333   rtx in_low = gen_lowpart (SImode, in);
29334
29335   /* Terminology:
29336         in = the register pair containing the input value.
29337         out = the destination register pair.
29338         up = the high- or low-part of each pair.
29339         down = the opposite part to "up".
29340      In a shift, we can consider bits to shift from "up"-stream to
29341      "down"-stream, so in a left-shift "up" is the low-part and "down"
29342      is the high-part of each register pair.  */
29343
29344   rtx out_up   = code == ASHIFT ? out_low : out_high;
29345   rtx out_down = code == ASHIFT ? out_high : out_low;
29346   rtx in_up   = code == ASHIFT ? in_low : in_high;
29347   rtx in_down = code == ASHIFT ? in_high : in_low;
29348
29349   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29350   gcc_assert (out
29351               && (REG_P (out) || GET_CODE (out) == SUBREG)
29352               && GET_MODE (out) == DImode);
29353   gcc_assert (in
29354               && (REG_P (in) || GET_CODE (in) == SUBREG)
29355               && GET_MODE (in) == DImode);
29356   gcc_assert (amount
29357               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29358                    && GET_MODE (amount) == SImode)
29359                   || CONST_INT_P (amount)));
29360   gcc_assert (scratch1 == NULL
29361               || (GET_CODE (scratch1) == SCRATCH)
29362               || (GET_MODE (scratch1) == SImode
29363                   && REG_P (scratch1)));
29364   gcc_assert (scratch2 == NULL
29365               || (GET_CODE (scratch2) == SCRATCH)
29366               || (GET_MODE (scratch2) == SImode
29367                   && REG_P (scratch2)));
29368   gcc_assert (!REG_P (out) || !REG_P (amount)
29369               || !HARD_REGISTER_P (out)
29370               || (REGNO (out) != REGNO (amount)
29371                   && REGNO (out) + 1 != REGNO (amount)));
29372
29373   /* Macros to make following code more readable.  */
29374   #define SUB_32(DEST,SRC) \
29375             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29376   #define RSB_32(DEST,SRC) \
29377             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29378   #define SUB_S_32(DEST,SRC) \
29379             gen_addsi3_compare0 ((DEST), (SRC), \
29380                                  GEN_INT (-32))
29381   #define SET(DEST,SRC) \
29382             gen_rtx_SET ((DEST), (SRC))
29383   #define SHIFT(CODE,SRC,AMOUNT) \
29384             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29385   #define LSHIFT(CODE,SRC,AMOUNT) \
29386             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29387                             SImode, (SRC), (AMOUNT))
29388   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29389             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29390                             SImode, (SRC), (AMOUNT))
29391   #define ORR(A,B) \
29392             gen_rtx_IOR (SImode, (A), (B))
29393   #define BRANCH(COND,LABEL) \
29394             gen_arm_cond_branch ((LABEL), \
29395                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29396                                                    const0_rtx), \
29397                                  cc_reg)
29398
29399   /* Shifts by register and shifts by constant are handled separately.  */
29400   if (CONST_INT_P (amount))
29401     {
29402       /* We have a shift-by-constant.  */
29403
29404       /* First, handle out-of-range shift amounts.
29405          In both cases we try to match the result an ARM instruction in a
29406          shift-by-register would give.  This helps reduce execution
29407          differences between optimization levels, but it won't stop other
29408          parts of the compiler doing different things.  This is "undefined
29409          behavior, in any case.  */
29410       if (INTVAL (amount) <= 0)
29411         emit_insn (gen_movdi (out, in));
29412       else if (INTVAL (amount) >= 64)
29413         {
29414           if (code == ASHIFTRT)
29415             {
29416               rtx const31_rtx = GEN_INT (31);
29417               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29418               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29419             }
29420           else
29421             emit_insn (gen_movdi (out, const0_rtx));
29422         }
29423
29424       /* Now handle valid shifts. */
29425       else if (INTVAL (amount) < 32)
29426         {
29427           /* Shifts by a constant less than 32.  */
29428           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29429
29430           /* Clearing the out register in DImode first avoids lots
29431              of spilling and results in less stack usage.
29432              Later this redundant insn is completely removed.
29433              Do that only if "in" and "out" are different registers.  */
29434           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29435             emit_insn (SET (out, const0_rtx));
29436           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29437           emit_insn (SET (out_down,
29438                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29439                                out_down)));
29440           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29441         }
29442       else
29443         {
29444           /* Shifts by a constant greater than 31.  */
29445           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29446
29447           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29448             emit_insn (SET (out, const0_rtx));
29449           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29450           if (code == ASHIFTRT)
29451             emit_insn (gen_ashrsi3 (out_up, in_up,
29452                                     GEN_INT (31)));
29453           else
29454             emit_insn (SET (out_up, const0_rtx));
29455         }
29456     }
29457   else
29458     {
29459       /* We have a shift-by-register.  */
29460       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29461
29462       /* This alternative requires the scratch registers.  */
29463       gcc_assert (scratch1 && REG_P (scratch1));
29464       gcc_assert (scratch2 && REG_P (scratch2));
29465
29466       /* We will need the values "amount-32" and "32-amount" later.
29467          Swapping them around now allows the later code to be more general. */
29468       switch (code)
29469         {
29470         case ASHIFT:
29471           emit_insn (SUB_32 (scratch1, amount));
29472           emit_insn (RSB_32 (scratch2, amount));
29473           break;
29474         case ASHIFTRT:
29475           emit_insn (RSB_32 (scratch1, amount));
29476           /* Also set CC = amount > 32.  */
29477           emit_insn (SUB_S_32 (scratch2, amount));
29478           break;
29479         case LSHIFTRT:
29480           emit_insn (RSB_32 (scratch1, amount));
29481           emit_insn (SUB_32 (scratch2, amount));
29482           break;
29483         default:
29484           gcc_unreachable ();
29485         }
29486
29487       /* Emit code like this:
29488
29489          arithmetic-left:
29490             out_down = in_down << amount;
29491             out_down = (in_up << (amount - 32)) | out_down;
29492             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29493             out_up = in_up << amount;
29494
29495          arithmetic-right:
29496             out_down = in_down >> amount;
29497             out_down = (in_up << (32 - amount)) | out_down;
29498             if (amount < 32)
29499               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29500             out_up = in_up << amount;
29501
29502          logical-right:
29503             out_down = in_down >> amount;
29504             out_down = (in_up << (32 - amount)) | out_down;
29505             if (amount < 32)
29506               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29507             out_up = in_up << amount;
29508
29509           The ARM and Thumb2 variants are the same but implemented slightly
29510           differently.  If this were only called during expand we could just
29511           use the Thumb2 case and let combine do the right thing, but this
29512           can also be called from post-reload splitters.  */
29513
29514       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29515
29516       if (!TARGET_THUMB2)
29517         {
29518           /* Emit code for ARM mode.  */
29519           emit_insn (SET (out_down,
29520                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29521           if (code == ASHIFTRT)
29522             {
29523               rtx_code_label *done_label = gen_label_rtx ();
29524               emit_jump_insn (BRANCH (LT, done_label));
29525               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29526                                              out_down)));
29527               emit_label (done_label);
29528             }
29529           else
29530             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29531                                            out_down)));
29532         }
29533       else
29534         {
29535           /* Emit code for Thumb2 mode.
29536              Thumb2 can't do shift and or in one insn.  */
29537           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29538           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29539
29540           if (code == ASHIFTRT)
29541             {
29542               rtx_code_label *done_label = gen_label_rtx ();
29543               emit_jump_insn (BRANCH (LT, done_label));
29544               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29545               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29546               emit_label (done_label);
29547             }
29548           else
29549             {
29550               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29551               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29552             }
29553         }
29554
29555       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29556     }
29557
29558   #undef SUB_32
29559   #undef RSB_32
29560   #undef SUB_S_32
29561   #undef SET
29562   #undef SHIFT
29563   #undef LSHIFT
29564   #undef REV_LSHIFT
29565   #undef ORR
29566   #undef BRANCH
29567 }
29568
29569 /* Returns true if the pattern is a valid symbolic address, which is either a
29570    symbol_ref or (symbol_ref + addend).
29571
29572    According to the ARM ELF ABI, the initial addend of REL-type relocations
29573    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29574    literal field of the instruction as a 16-bit signed value in the range
29575    -32768 <= A < 32768.  */
29576
29577 bool
29578 arm_valid_symbolic_address_p (rtx addr)
29579 {
29580   rtx xop0, xop1 = NULL_RTX;
29581   rtx tmp = addr;
29582
29583   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29584     return true;
29585
29586   /* (const (plus: symbol_ref const_int))  */
29587   if (GET_CODE (addr) == CONST)
29588     tmp = XEXP (addr, 0);
29589
29590   if (GET_CODE (tmp) == PLUS)
29591     {
29592       xop0 = XEXP (tmp, 0);
29593       xop1 = XEXP (tmp, 1);
29594
29595       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29596           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29597     }
29598
29599   return false;
29600 }
29601
29602 /* Returns true if a valid comparison operation and makes
29603    the operands in a form that is valid.  */
29604 bool
29605 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29606 {
29607   enum rtx_code code = GET_CODE (*comparison);
29608   int code_int;
29609   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29610     ? GET_MODE (*op2) : GET_MODE (*op1);
29611
29612   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29613
29614   if (code == UNEQ || code == LTGT)
29615     return false;
29616
29617   code_int = (int)code;
29618   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29619   PUT_CODE (*comparison, (enum rtx_code)code_int);
29620
29621   switch (mode)
29622     {
29623     case E_SImode:
29624       if (!arm_add_operand (*op1, mode))
29625         *op1 = force_reg (mode, *op1);
29626       if (!arm_add_operand (*op2, mode))
29627         *op2 = force_reg (mode, *op2);
29628       return true;
29629
29630     case E_DImode:
29631       if (!cmpdi_operand (*op1, mode))
29632         *op1 = force_reg (mode, *op1);
29633       if (!cmpdi_operand (*op2, mode))
29634         *op2 = force_reg (mode, *op2);
29635       return true;
29636
29637     case E_HFmode:
29638       if (!TARGET_VFP_FP16INST)
29639         break;
29640       /* FP16 comparisons are done in SF mode.  */
29641       mode = SFmode;
29642       *op1 = convert_to_mode (mode, *op1, 1);
29643       *op2 = convert_to_mode (mode, *op2, 1);
29644       /* Fall through.  */
29645     case E_SFmode:
29646     case E_DFmode:
29647       if (!vfp_compare_operand (*op1, mode))
29648         *op1 = force_reg (mode, *op1);
29649       if (!vfp_compare_operand (*op2, mode))
29650         *op2 = force_reg (mode, *op2);
29651       return true;
29652     default:
29653       break;
29654     }
29655
29656   return false;
29657
29658 }
29659
29660 /* Maximum number of instructions to set block of memory.  */
29661 static int
29662 arm_block_set_max_insns (void)
29663 {
29664   if (optimize_function_for_size_p (cfun))
29665     return 4;
29666   else
29667     return current_tune->max_insns_inline_memset;
29668 }
29669
29670 /* Return TRUE if it's profitable to set block of memory for
29671    non-vectorized case.  VAL is the value to set the memory
29672    with.  LENGTH is the number of bytes to set.  ALIGN is the
29673    alignment of the destination memory in bytes.  UNALIGNED_P
29674    is TRUE if we can only set the memory with instructions
29675    meeting alignment requirements.  USE_STRD_P is TRUE if we
29676    can use strd to set the memory.  */
29677 static bool
29678 arm_block_set_non_vect_profit_p (rtx val,
29679                                  unsigned HOST_WIDE_INT length,
29680                                  unsigned HOST_WIDE_INT align,
29681                                  bool unaligned_p, bool use_strd_p)
29682 {
29683   int num = 0;
29684   /* For leftovers in bytes of 0-7, we can set the memory block using
29685      strb/strh/str with minimum instruction number.  */
29686   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29687
29688   if (unaligned_p)
29689     {
29690       num = arm_const_inline_cost (SET, val);
29691       num += length / align + length % align;
29692     }
29693   else if (use_strd_p)
29694     {
29695       num = arm_const_double_inline_cost (val);
29696       num += (length >> 3) + leftover[length & 7];
29697     }
29698   else
29699     {
29700       num = arm_const_inline_cost (SET, val);
29701       num += (length >> 2) + leftover[length & 3];
29702     }
29703
29704   /* We may be able to combine last pair STRH/STRB into a single STR
29705      by shifting one byte back.  */
29706   if (unaligned_access && length > 3 && (length & 3) == 3)
29707     num--;
29708
29709   return (num <= arm_block_set_max_insns ());
29710 }
29711
29712 /* Return TRUE if it's profitable to set block of memory for
29713    vectorized case.  LENGTH is the number of bytes to set.
29714    ALIGN is the alignment of destination memory in bytes.
29715    MODE is the vector mode used to set the memory.  */
29716 static bool
29717 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29718                              unsigned HOST_WIDE_INT align,
29719                              machine_mode mode)
29720 {
29721   int num;
29722   bool unaligned_p = ((align & 3) != 0);
29723   unsigned int nelt = GET_MODE_NUNITS (mode);
29724
29725   /* Instruction loading constant value.  */
29726   num = 1;
29727   /* Instructions storing the memory.  */
29728   num += (length + nelt - 1) / nelt;
29729   /* Instructions adjusting the address expression.  Only need to
29730      adjust address expression if it's 4 bytes aligned and bytes
29731      leftover can only be stored by mis-aligned store instruction.  */
29732   if (!unaligned_p && (length & 3) != 0)
29733     num++;
29734
29735   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29736   if (!unaligned_p && mode == V16QImode)
29737     num--;
29738
29739   return (num <= arm_block_set_max_insns ());
29740 }
29741
29742 /* Set a block of memory using vectorization instructions for the
29743    unaligned case.  We fill the first LENGTH bytes of the memory
29744    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29745    the alignment requirement of memory.  Return TRUE if succeeded.  */
29746 static bool
29747 arm_block_set_unaligned_vect (rtx dstbase,
29748                               unsigned HOST_WIDE_INT length,
29749                               unsigned HOST_WIDE_INT value,
29750                               unsigned HOST_WIDE_INT align)
29751 {
29752   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29753   rtx dst, mem;
29754   rtx val_elt, val_vec, reg;
29755   rtx rval[MAX_VECT_LEN];
29756   rtx (*gen_func) (rtx, rtx);
29757   machine_mode mode;
29758   unsigned HOST_WIDE_INT v = value;
29759   unsigned int offset = 0;
29760   gcc_assert ((align & 0x3) != 0);
29761   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29762   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29763   if (length >= nelt_v16)
29764     {
29765       mode = V16QImode;
29766       gen_func = gen_movmisalignv16qi;
29767     }
29768   else
29769     {
29770       mode = V8QImode;
29771       gen_func = gen_movmisalignv8qi;
29772     }
29773   nelt_mode = GET_MODE_NUNITS (mode);
29774   gcc_assert (length >= nelt_mode);
29775   /* Skip if it isn't profitable.  */
29776   if (!arm_block_set_vect_profit_p (length, align, mode))
29777     return false;
29778
29779   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29780   mem = adjust_automodify_address (dstbase, mode, dst, offset);
29781
29782   v = sext_hwi (v, BITS_PER_WORD);
29783   val_elt = GEN_INT (v);
29784   for (j = 0; j < nelt_mode; j++)
29785     rval[j] = val_elt;
29786
29787   reg = gen_reg_rtx (mode);
29788   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29789   /* Emit instruction loading the constant value.  */
29790   emit_move_insn (reg, val_vec);
29791
29792   /* Handle nelt_mode bytes in a vector.  */
29793   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29794     {
29795       emit_insn ((*gen_func) (mem, reg));
29796       if (i + 2 * nelt_mode <= length)
29797         {
29798           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29799           offset += nelt_mode;
29800           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29801         }
29802     }
29803
29804   /* If there are not less than nelt_v8 bytes leftover, we must be in
29805      V16QI mode.  */
29806   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29807
29808   /* Handle (8, 16) bytes leftover.  */
29809   if (i + nelt_v8 < length)
29810     {
29811       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29812       offset += length - i;
29813       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29814
29815       /* We are shifting bytes back, set the alignment accordingly.  */
29816       if ((length & 1) != 0 && align >= 2)
29817         set_mem_align (mem, BITS_PER_UNIT);
29818
29819       emit_insn (gen_movmisalignv16qi (mem, reg));
29820     }
29821   /* Handle (0, 8] bytes leftover.  */
29822   else if (i < length && i + nelt_v8 >= length)
29823     {
29824       if (mode == V16QImode)
29825         reg = gen_lowpart (V8QImode, reg);
29826
29827       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29828                                               + (nelt_mode - nelt_v8))));
29829       offset += (length - i) + (nelt_mode - nelt_v8);
29830       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29831
29832       /* We are shifting bytes back, set the alignment accordingly.  */
29833       if ((length & 1) != 0 && align >= 2)
29834         set_mem_align (mem, BITS_PER_UNIT);
29835
29836       emit_insn (gen_movmisalignv8qi (mem, reg));
29837     }
29838
29839   return true;
29840 }
29841
29842 /* Set a block of memory using vectorization instructions for the
29843    aligned case.  We fill the first LENGTH bytes of the memory area
29844    starting from DSTBASE with byte constant VALUE.  ALIGN is the
29845    alignment requirement of memory.  Return TRUE if succeeded.  */
29846 static bool
29847 arm_block_set_aligned_vect (rtx dstbase,
29848                             unsigned HOST_WIDE_INT length,
29849                             unsigned HOST_WIDE_INT value,
29850                             unsigned HOST_WIDE_INT align)
29851 {
29852   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29853   rtx dst, addr, mem;
29854   rtx val_elt, val_vec, reg;
29855   rtx rval[MAX_VECT_LEN];
29856   machine_mode mode;
29857   unsigned HOST_WIDE_INT v = value;
29858   unsigned int offset = 0;
29859
29860   gcc_assert ((align & 0x3) == 0);
29861   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29862   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29863   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29864     mode = V16QImode;
29865   else
29866     mode = V8QImode;
29867
29868   nelt_mode = GET_MODE_NUNITS (mode);
29869   gcc_assert (length >= nelt_mode);
29870   /* Skip if it isn't profitable.  */
29871   if (!arm_block_set_vect_profit_p (length, align, mode))
29872     return false;
29873
29874   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29875
29876   v = sext_hwi (v, BITS_PER_WORD);
29877   val_elt = GEN_INT (v);
29878   for (j = 0; j < nelt_mode; j++)
29879     rval[j] = val_elt;
29880
29881   reg = gen_reg_rtx (mode);
29882   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29883   /* Emit instruction loading the constant value.  */
29884   emit_move_insn (reg, val_vec);
29885
29886   i = 0;
29887   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
29888   if (mode == V16QImode)
29889     {
29890       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29891       emit_insn (gen_movmisalignv16qi (mem, reg));
29892       i += nelt_mode;
29893       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
29894       if (i + nelt_v8 < length && i + nelt_v16 > length)
29895         {
29896           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29897           offset += length - nelt_mode;
29898           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29899           /* We are shifting bytes back, set the alignment accordingly.  */
29900           if ((length & 0x3) == 0)
29901             set_mem_align (mem, BITS_PER_UNIT * 4);
29902           else if ((length & 0x1) == 0)
29903             set_mem_align (mem, BITS_PER_UNIT * 2);
29904           else
29905             set_mem_align (mem, BITS_PER_UNIT);
29906
29907           emit_insn (gen_movmisalignv16qi (mem, reg));
29908           return true;
29909         }
29910       /* Fall through for bytes leftover.  */
29911       mode = V8QImode;
29912       nelt_mode = GET_MODE_NUNITS (mode);
29913       reg = gen_lowpart (V8QImode, reg);
29914     }
29915
29916   /* Handle 8 bytes in a vector.  */
29917   for (; (i + nelt_mode <= length); i += nelt_mode)
29918     {
29919       addr = plus_constant (Pmode, dst, i);
29920       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29921       emit_move_insn (mem, reg);
29922     }
29923
29924   /* Handle single word leftover by shifting 4 bytes back.  We can
29925      use aligned access for this case.  */
29926   if (i + UNITS_PER_WORD == length)
29927     {
29928       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29929       offset += i - UNITS_PER_WORD;
29930       mem = adjust_automodify_address (dstbase, mode, addr, offset);
29931       /* We are shifting 4 bytes back, set the alignment accordingly.  */
29932       if (align > UNITS_PER_WORD)
29933         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29934
29935       emit_move_insn (mem, reg);
29936     }
29937   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29938      We have to use unaligned access for this case.  */
29939   else if (i < length)
29940     {
29941       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29942       offset += length - nelt_mode;
29943       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29944       /* We are shifting bytes back, set the alignment accordingly.  */
29945       if ((length & 1) == 0)
29946         set_mem_align (mem, BITS_PER_UNIT * 2);
29947       else
29948         set_mem_align (mem, BITS_PER_UNIT);
29949
29950       emit_insn (gen_movmisalignv8qi (mem, reg));
29951     }
29952
29953   return true;
29954 }
29955
29956 /* Set a block of memory using plain strh/strb instructions, only
29957    using instructions allowed by ALIGN on processor.  We fill the
29958    first LENGTH bytes of the memory area starting from DSTBASE
29959    with byte constant VALUE.  ALIGN is the alignment requirement
29960    of memory.  */
29961 static bool
29962 arm_block_set_unaligned_non_vect (rtx dstbase,
29963                                   unsigned HOST_WIDE_INT length,
29964                                   unsigned HOST_WIDE_INT value,
29965                                   unsigned HOST_WIDE_INT align)
29966 {
29967   unsigned int i;
29968   rtx dst, addr, mem;
29969   rtx val_exp, val_reg, reg;
29970   machine_mode mode;
29971   HOST_WIDE_INT v = value;
29972
29973   gcc_assert (align == 1 || align == 2);
29974
29975   if (align == 2)
29976     v |= (value << BITS_PER_UNIT);
29977
29978   v = sext_hwi (v, BITS_PER_WORD);
29979   val_exp = GEN_INT (v);
29980   /* Skip if it isn't profitable.  */
29981   if (!arm_block_set_non_vect_profit_p (val_exp, length,
29982                                         align, true, false))
29983     return false;
29984
29985   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29986   mode = (align == 2 ? HImode : QImode);
29987   val_reg = force_reg (SImode, val_exp);
29988   reg = gen_lowpart (mode, val_reg);
29989
29990   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29991     {
29992       addr = plus_constant (Pmode, dst, i);
29993       mem = adjust_automodify_address (dstbase, mode, addr, i);
29994       emit_move_insn (mem, reg);
29995     }
29996
29997   /* Handle single byte leftover.  */
29998   if (i + 1 == length)
29999     {
30000       reg = gen_lowpart (QImode, val_reg);
30001       addr = plus_constant (Pmode, dst, i);
30002       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30003       emit_move_insn (mem, reg);
30004       i++;
30005     }
30006
30007   gcc_assert (i == length);
30008   return true;
30009 }
30010
30011 /* Set a block of memory using plain strd/str/strh/strb instructions,
30012    to permit unaligned copies on processors which support unaligned
30013    semantics for those instructions.  We fill the first LENGTH bytes
30014    of the memory area starting from DSTBASE with byte constant VALUE.
30015    ALIGN is the alignment requirement of memory.  */
30016 static bool
30017 arm_block_set_aligned_non_vect (rtx dstbase,
30018                                 unsigned HOST_WIDE_INT length,
30019                                 unsigned HOST_WIDE_INT value,
30020                                 unsigned HOST_WIDE_INT align)
30021 {
30022   unsigned int i;
30023   rtx dst, addr, mem;
30024   rtx val_exp, val_reg, reg;
30025   unsigned HOST_WIDE_INT v;
30026   bool use_strd_p;
30027
30028   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30029                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30030
30031   v = (value | (value << 8) | (value << 16) | (value << 24));
30032   if (length < UNITS_PER_WORD)
30033     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30034
30035   if (use_strd_p)
30036     v |= (v << BITS_PER_WORD);
30037   else
30038     v = sext_hwi (v, BITS_PER_WORD);
30039
30040   val_exp = GEN_INT (v);
30041   /* Skip if it isn't profitable.  */
30042   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30043                                         align, false, use_strd_p))
30044     {
30045       if (!use_strd_p)
30046         return false;
30047
30048       /* Try without strd.  */
30049       v = (v >> BITS_PER_WORD);
30050       v = sext_hwi (v, BITS_PER_WORD);
30051       val_exp = GEN_INT (v);
30052       use_strd_p = false;
30053       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30054                                             align, false, use_strd_p))
30055         return false;
30056     }
30057
30058   i = 0;
30059   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30060   /* Handle double words using strd if possible.  */
30061   if (use_strd_p)
30062     {
30063       val_reg = force_reg (DImode, val_exp);
30064       reg = val_reg;
30065       for (; (i + 8 <= length); i += 8)
30066         {
30067           addr = plus_constant (Pmode, dst, i);
30068           mem = adjust_automodify_address (dstbase, DImode, addr, i);
30069           emit_move_insn (mem, reg);
30070         }
30071     }
30072   else
30073     val_reg = force_reg (SImode, val_exp);
30074
30075   /* Handle words.  */
30076   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30077   for (; (i + 4 <= length); i += 4)
30078     {
30079       addr = plus_constant (Pmode, dst, i);
30080       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30081       if ((align & 3) == 0)
30082         emit_move_insn (mem, reg);
30083       else
30084         emit_insn (gen_unaligned_storesi (mem, reg));
30085     }
30086
30087   /* Merge last pair of STRH and STRB into a STR if possible.  */
30088   if (unaligned_access && i > 0 && (i + 3) == length)
30089     {
30090       addr = plus_constant (Pmode, dst, i - 1);
30091       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30092       /* We are shifting one byte back, set the alignment accordingly.  */
30093       if ((align & 1) == 0)
30094         set_mem_align (mem, BITS_PER_UNIT);
30095
30096       /* Most likely this is an unaligned access, and we can't tell at
30097          compilation time.  */
30098       emit_insn (gen_unaligned_storesi (mem, reg));
30099       return true;
30100     }
30101
30102   /* Handle half word leftover.  */
30103   if (i + 2 <= length)
30104     {
30105       reg = gen_lowpart (HImode, val_reg);
30106       addr = plus_constant (Pmode, dst, i);
30107       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30108       if ((align & 1) == 0)
30109         emit_move_insn (mem, reg);
30110       else
30111         emit_insn (gen_unaligned_storehi (mem, reg));
30112
30113       i += 2;
30114     }
30115
30116   /* Handle single byte leftover.  */
30117   if (i + 1 == length)
30118     {
30119       reg = gen_lowpart (QImode, val_reg);
30120       addr = plus_constant (Pmode, dst, i);
30121       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30122       emit_move_insn (mem, reg);
30123     }
30124
30125   return true;
30126 }
30127
30128 /* Set a block of memory using vectorization instructions for both
30129    aligned and unaligned cases.  We fill the first LENGTH bytes of
30130    the memory area starting from DSTBASE with byte constant VALUE.
30131    ALIGN is the alignment requirement of memory.  */
30132 static bool
30133 arm_block_set_vect (rtx dstbase,
30134                     unsigned HOST_WIDE_INT length,
30135                     unsigned HOST_WIDE_INT value,
30136                     unsigned HOST_WIDE_INT align)
30137 {
30138   /* Check whether we need to use unaligned store instruction.  */
30139   if (((align & 3) != 0 || (length & 3) != 0)
30140       /* Check whether unaligned store instruction is available.  */
30141       && (!unaligned_access || BYTES_BIG_ENDIAN))
30142     return false;
30143
30144   if ((align & 3) == 0)
30145     return arm_block_set_aligned_vect (dstbase, length, value, align);
30146   else
30147     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30148 }
30149
30150 /* Expand string store operation.  Firstly we try to do that by using
30151    vectorization instructions, then try with ARM unaligned access and
30152    double-word store if profitable.  OPERANDS[0] is the destination,
30153    OPERANDS[1] is the number of bytes, operands[2] is the value to
30154    initialize the memory, OPERANDS[3] is the known alignment of the
30155    destination.  */
30156 bool
30157 arm_gen_setmem (rtx *operands)
30158 {
30159   rtx dstbase = operands[0];
30160   unsigned HOST_WIDE_INT length;
30161   unsigned HOST_WIDE_INT value;
30162   unsigned HOST_WIDE_INT align;
30163
30164   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30165     return false;
30166
30167   length = UINTVAL (operands[1]);
30168   if (length > 64)
30169     return false;
30170
30171   value = (UINTVAL (operands[2]) & 0xFF);
30172   align = UINTVAL (operands[3]);
30173   if (TARGET_NEON && length >= 8
30174       && current_tune->string_ops_prefer_neon
30175       && arm_block_set_vect (dstbase, length, value, align))
30176     return true;
30177
30178   if (!unaligned_access && (align & 3) != 0)
30179     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30180
30181   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30182 }
30183
30184
30185 static bool
30186 arm_macro_fusion_p (void)
30187 {
30188   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30189 }
30190
30191 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30192    for MOVW / MOVT macro fusion.  */
30193
30194 static bool
30195 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30196 {
30197   /* We are trying to fuse
30198      movw imm / movt imm
30199     instructions as a group that gets scheduled together.  */
30200
30201   rtx set_dest = SET_DEST (curr_set);
30202
30203   if (GET_MODE (set_dest) != SImode)
30204     return false;
30205
30206   /* We are trying to match:
30207      prev (movw)  == (set (reg r0) (const_int imm16))
30208      curr (movt) == (set (zero_extract (reg r0)
30209                                         (const_int 16)
30210                                         (const_int 16))
30211                           (const_int imm16_1))
30212      or
30213      prev (movw) == (set (reg r1)
30214                           (high (symbol_ref ("SYM"))))
30215     curr (movt) == (set (reg r0)
30216                         (lo_sum (reg r1)
30217                                 (symbol_ref ("SYM"))))  */
30218
30219     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30220       {
30221         if (CONST_INT_P (SET_SRC (curr_set))
30222             && CONST_INT_P (SET_SRC (prev_set))
30223             && REG_P (XEXP (set_dest, 0))
30224             && REG_P (SET_DEST (prev_set))
30225             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30226           return true;
30227
30228       }
30229     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30230              && REG_P (SET_DEST (curr_set))
30231              && REG_P (SET_DEST (prev_set))
30232              && GET_CODE (SET_SRC (prev_set)) == HIGH
30233              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30234       return true;
30235
30236   return false;
30237 }
30238
30239 static bool
30240 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30241 {
30242   rtx prev_set = single_set (prev);
30243   rtx curr_set = single_set (curr);
30244
30245   if (!prev_set
30246       || !curr_set)
30247     return false;
30248
30249   if (any_condjump_p (curr))
30250     return false;
30251
30252   if (!arm_macro_fusion_p ())
30253     return false;
30254
30255   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30256       && aarch_crypto_can_dual_issue (prev, curr))
30257     return true;
30258
30259   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30260       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30261     return true;
30262
30263   return false;
30264 }
30265
30266 /* Return true iff the instruction fusion described by OP is enabled.  */
30267 bool
30268 arm_fusion_enabled_p (tune_params::fuse_ops op)
30269 {
30270   return current_tune->fusible_ops & op;
30271 }
30272
30273 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30274    scheduled for speculative execution.  Reject the long-running division
30275    and square-root instructions.  */
30276
30277 static bool
30278 arm_sched_can_speculate_insn (rtx_insn *insn)
30279 {
30280   switch (get_attr_type (insn))
30281     {
30282       case TYPE_SDIV:
30283       case TYPE_UDIV:
30284       case TYPE_FDIVS:
30285       case TYPE_FDIVD:
30286       case TYPE_FSQRTS:
30287       case TYPE_FSQRTD:
30288       case TYPE_NEON_FP_SQRT_S:
30289       case TYPE_NEON_FP_SQRT_D:
30290       case TYPE_NEON_FP_SQRT_S_Q:
30291       case TYPE_NEON_FP_SQRT_D_Q:
30292       case TYPE_NEON_FP_DIV_S:
30293       case TYPE_NEON_FP_DIV_D:
30294       case TYPE_NEON_FP_DIV_S_Q:
30295       case TYPE_NEON_FP_DIV_D_Q:
30296         return false;
30297       default:
30298         return true;
30299     }
30300 }
30301
30302 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30303
30304 static unsigned HOST_WIDE_INT
30305 arm_asan_shadow_offset (void)
30306 {
30307   return HOST_WIDE_INT_1U << 29;
30308 }
30309
30310
30311 /* This is a temporary fix for PR60655.  Ideally we need
30312    to handle most of these cases in the generic part but
30313    currently we reject minus (..) (sym_ref).  We try to
30314    ameliorate the case with minus (sym_ref1) (sym_ref2)
30315    where they are in the same section.  */
30316
30317 static bool
30318 arm_const_not_ok_for_debug_p (rtx p)
30319 {
30320   tree decl_op0 = NULL;
30321   tree decl_op1 = NULL;
30322
30323   if (GET_CODE (p) == MINUS)
30324     {
30325       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30326         {
30327           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30328           if (decl_op1
30329               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30330               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30331             {
30332               if ((VAR_P (decl_op1)
30333                    || TREE_CODE (decl_op1) == CONST_DECL)
30334                   && (VAR_P (decl_op0)
30335                       || TREE_CODE (decl_op0) == CONST_DECL))
30336                 return (get_variable_section (decl_op1, false)
30337                         != get_variable_section (decl_op0, false));
30338
30339               if (TREE_CODE (decl_op1) == LABEL_DECL
30340                   && TREE_CODE (decl_op0) == LABEL_DECL)
30341                 return (DECL_CONTEXT (decl_op1)
30342                         != DECL_CONTEXT (decl_op0));
30343             }
30344
30345           return true;
30346         }
30347     }
30348
30349   return false;
30350 }
30351
30352 /* return TRUE if x is a reference to a value in a constant pool */
30353 extern bool
30354 arm_is_constant_pool_ref (rtx x)
30355 {
30356   return (MEM_P (x)
30357           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30358           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30359 }
30360
30361 /* Remember the last target of arm_set_current_function.  */
30362 static GTY(()) tree arm_previous_fndecl;
30363
30364 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30365
30366 void
30367 save_restore_target_globals (tree new_tree)
30368 {
30369   /* If we have a previous state, use it.  */
30370   if (TREE_TARGET_GLOBALS (new_tree))
30371     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30372   else if (new_tree == target_option_default_node)
30373     restore_target_globals (&default_target_globals);
30374   else
30375     {
30376       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30377       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30378     }
30379
30380   arm_option_params_internal ();
30381 }
30382
30383 /* Invalidate arm_previous_fndecl.  */
30384
30385 void
30386 arm_reset_previous_fndecl (void)
30387 {
30388   arm_previous_fndecl = NULL_TREE;
30389 }
30390
30391 /* Establish appropriate back-end context for processing the function
30392    FNDECL.  The argument might be NULL to indicate processing at top
30393    level, outside of any function scope.  */
30394
30395 static void
30396 arm_set_current_function (tree fndecl)
30397 {
30398   if (!fndecl || fndecl == arm_previous_fndecl)
30399     return;
30400
30401   tree old_tree = (arm_previous_fndecl
30402                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30403                    : NULL_TREE);
30404
30405   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30406
30407   /* If current function has no attributes but previous one did,
30408      use the default node.  */
30409   if (! new_tree && old_tree)
30410     new_tree = target_option_default_node;
30411
30412   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30413      the default have been handled by save_restore_target_globals from
30414      arm_pragma_target_parse.  */
30415   if (old_tree == new_tree)
30416     return;
30417
30418   arm_previous_fndecl = fndecl;
30419
30420   /* First set the target options.  */
30421   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30422
30423   save_restore_target_globals (new_tree);
30424 }
30425
30426 /* Implement TARGET_OPTION_PRINT.  */
30427
30428 static void
30429 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30430 {
30431   int flags = ptr->x_target_flags;
30432   const char *fpu_name;
30433
30434   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30435               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30436
30437   fprintf (file, "%*sselected isa %s\n", indent, "",
30438            TARGET_THUMB2_P (flags) ? "thumb2" :
30439            TARGET_THUMB_P (flags) ? "thumb1" :
30440            "arm");
30441
30442   if (ptr->x_arm_arch_string)
30443     fprintf (file, "%*sselected architecture %s\n", indent, "",
30444              ptr->x_arm_arch_string);
30445
30446   if (ptr->x_arm_cpu_string)
30447     fprintf (file, "%*sselected CPU %s\n", indent, "",
30448              ptr->x_arm_cpu_string);
30449
30450   if (ptr->x_arm_tune_string)
30451     fprintf (file, "%*sselected tune %s\n", indent, "",
30452              ptr->x_arm_tune_string);
30453
30454   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30455 }
30456
30457 /* Hook to determine if one function can safely inline another.  */
30458
30459 static bool
30460 arm_can_inline_p (tree caller, tree callee)
30461 {
30462   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30463   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30464   bool can_inline = true;
30465
30466   struct cl_target_option *caller_opts
30467         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30468                                            : target_option_default_node);
30469
30470   struct cl_target_option *callee_opts
30471         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30472                                            : target_option_default_node);
30473
30474   if (callee_opts == caller_opts)
30475     return true;
30476
30477   /* Callee's ISA features should be a subset of the caller's.  */
30478   struct arm_build_target caller_target;
30479   struct arm_build_target callee_target;
30480   caller_target.isa = sbitmap_alloc (isa_num_bits);
30481   callee_target.isa = sbitmap_alloc (isa_num_bits);
30482
30483   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30484                               false);
30485   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30486                               false);
30487   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30488     can_inline = false;
30489
30490   sbitmap_free (caller_target.isa);
30491   sbitmap_free (callee_target.isa);
30492
30493   /* OK to inline between different modes.
30494      Function with mode specific instructions, e.g using asm,
30495      must be explicitly protected with noinline.  */
30496   return can_inline;
30497 }
30498
30499 /* Hook to fix function's alignment affected by target attribute.  */
30500
30501 static void
30502 arm_relayout_function (tree fndecl)
30503 {
30504   if (DECL_USER_ALIGN (fndecl))
30505     return;
30506
30507   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30508
30509   if (!callee_tree)
30510     callee_tree = target_option_default_node;
30511
30512   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30513   SET_DECL_ALIGN
30514     (fndecl,
30515      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30516 }
30517
30518 /* Inner function to process the attribute((target(...))), take an argument and
30519    set the current options from the argument.  If we have a list, recursively
30520    go over the list.  */
30521
30522 static bool
30523 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30524 {
30525   if (TREE_CODE (args) == TREE_LIST)
30526     {
30527       bool ret = true;
30528
30529       for (; args; args = TREE_CHAIN (args))
30530         if (TREE_VALUE (args)
30531             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30532           ret = false;
30533       return ret;
30534     }
30535
30536   else if (TREE_CODE (args) != STRING_CST)
30537     {
30538       error ("attribute %<target%> argument not a string");
30539       return false;
30540     }
30541
30542   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30543   char *q;
30544
30545   while ((q = strtok (argstr, ",")) != NULL)
30546     {
30547       while (ISSPACE (*q)) ++q;
30548
30549       argstr = NULL;
30550       if (!strncmp (q, "thumb", 5))
30551           opts->x_target_flags |= MASK_THUMB;
30552
30553       else if (!strncmp (q, "arm", 3))
30554           opts->x_target_flags &= ~MASK_THUMB;
30555
30556       else if (!strncmp (q, "fpu=", 4))
30557         {
30558           int fpu_index;
30559           if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30560                                        &fpu_index, CL_TARGET))
30561             {
30562               error ("invalid fpu for attribute(target(\"%s\"))", q);
30563               return false;
30564             }
30565           if (fpu_index == TARGET_FPU_auto)
30566             {
30567               /* This doesn't really make sense until we support
30568                  general dynamic selection of the architecture and all
30569                  sub-features.  */
30570               sorry ("auto fpu selection not currently permitted here");
30571               return false;
30572             }
30573           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30574         }
30575       else
30576         {
30577           error ("attribute(target(\"%s\")) is unknown", q);
30578           return false;
30579         }
30580     }
30581
30582   return true;
30583 }
30584
30585 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30586
30587 tree
30588 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30589                                  struct gcc_options *opts_set)
30590 {
30591   struct cl_target_option cl_opts;
30592
30593   if (!arm_valid_target_attribute_rec (args, opts))
30594     return NULL_TREE;
30595
30596   cl_target_option_save (&cl_opts, opts);
30597   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30598   arm_option_check_internal (opts);
30599   /* Do any overrides, such as global options arch=xxx.  */
30600   arm_option_override_internal (opts, opts_set);
30601
30602   return build_target_option_node (opts);
30603 }
30604
30605 static void
30606 add_attribute  (const char * mode, tree *attributes)
30607 {
30608   size_t len = strlen (mode);
30609   tree value = build_string (len, mode);
30610
30611   TREE_TYPE (value) = build_array_type (char_type_node,
30612                                         build_index_type (size_int (len)));
30613
30614   *attributes = tree_cons (get_identifier ("target"),
30615                            build_tree_list (NULL_TREE, value),
30616                            *attributes);
30617 }
30618
30619 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30620
30621 static void
30622 arm_insert_attributes (tree fndecl, tree * attributes)
30623 {
30624   const char *mode;
30625
30626   if (! TARGET_FLIP_THUMB)
30627     return;
30628
30629   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30630       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30631    return;
30632
30633   /* Nested definitions must inherit mode.  */
30634   if (current_function_decl)
30635    {
30636      mode = TARGET_THUMB ? "thumb" : "arm";
30637      add_attribute (mode, attributes);
30638      return;
30639    }
30640
30641   /* If there is already a setting don't change it.  */
30642   if (lookup_attribute ("target", *attributes) != NULL)
30643     return;
30644
30645   mode = thumb_flipper ? "thumb" : "arm";
30646   add_attribute (mode, attributes);
30647
30648   thumb_flipper = !thumb_flipper;
30649 }
30650
30651 /* Hook to validate attribute((target("string"))).  */
30652
30653 static bool
30654 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30655                               tree args, int ARG_UNUSED (flags))
30656 {
30657   bool ret = true;
30658   struct gcc_options func_options;
30659   tree cur_tree, new_optimize;
30660   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30661
30662   /* Get the optimization options of the current function.  */
30663   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30664
30665   /* If the function changed the optimization levels as well as setting target
30666      options, start with the optimizations specified.  */
30667   if (!func_optimize)
30668     func_optimize = optimization_default_node;
30669
30670   /* Init func_options.  */
30671   memset (&func_options, 0, sizeof (func_options));
30672   init_options_struct (&func_options, NULL);
30673   lang_hooks.init_options_struct (&func_options);
30674
30675   /* Initialize func_options to the defaults.  */
30676   cl_optimization_restore (&func_options,
30677                            TREE_OPTIMIZATION (func_optimize));
30678
30679   cl_target_option_restore (&func_options,
30680                             TREE_TARGET_OPTION (target_option_default_node));
30681
30682   /* Set func_options flags with new target mode.  */
30683   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30684                                               &global_options_set);
30685
30686   if (cur_tree == NULL_TREE)
30687     ret = false;
30688
30689   new_optimize = build_optimization_node (&func_options);
30690
30691   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30692
30693   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30694
30695   finalize_options_struct (&func_options);
30696
30697   return ret;
30698 }
30699
30700 /* Match an ISA feature bitmap to a named FPU.  We always use the
30701    first entry that exactly matches the feature set, so that we
30702    effectively canonicalize the FPU name for the assembler.  */
30703 static const char*
30704 arm_identify_fpu_from_isa (sbitmap isa)
30705 {
30706   auto_sbitmap fpubits (isa_num_bits);
30707   auto_sbitmap cand_fpubits (isa_num_bits);
30708
30709   bitmap_and (fpubits, isa, isa_all_fpubits);
30710
30711   /* If there are no ISA feature bits relating to the FPU, we must be
30712      doing soft-float.  */
30713   if (bitmap_empty_p (fpubits))
30714     return "softvfp";
30715
30716   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30717     {
30718       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30719       if (bitmap_equal_p (fpubits, cand_fpubits))
30720         return all_fpus[i].name;
30721     }
30722   /* We must find an entry, or things have gone wrong.  */
30723   gcc_unreachable ();
30724 }
30725
30726 void
30727 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30728 {
30729
30730   fprintf (stream, "\t.syntax unified\n");
30731
30732   if (TARGET_THUMB)
30733     {
30734       if (is_called_in_ARM_mode (decl)
30735           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30736               && cfun->is_thunk))
30737         fprintf (stream, "\t.code 32\n");
30738       else if (TARGET_THUMB1)
30739         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30740       else
30741         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30742     }
30743   else
30744     fprintf (stream, "\t.arm\n");
30745
30746   asm_fprintf (asm_out_file, "\t.fpu %s\n",
30747                (TARGET_SOFT_FLOAT
30748                 ? "softvfp"
30749                 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30750
30751   if (TARGET_POKE_FUNCTION_NAME)
30752     arm_poke_function_name (stream, (const char *) name);
30753 }
30754
30755 /* If MEM is in the form of [base+offset], extract the two parts
30756    of address and set to BASE and OFFSET, otherwise return false
30757    after clearing BASE and OFFSET.  */
30758
30759 static bool
30760 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30761 {
30762   rtx addr;
30763
30764   gcc_assert (MEM_P (mem));
30765
30766   addr = XEXP (mem, 0);
30767
30768   /* Strip off const from addresses like (const (addr)).  */
30769   if (GET_CODE (addr) == CONST)
30770     addr = XEXP (addr, 0);
30771
30772   if (GET_CODE (addr) == REG)
30773     {
30774       *base = addr;
30775       *offset = const0_rtx;
30776       return true;
30777     }
30778
30779   if (GET_CODE (addr) == PLUS
30780       && GET_CODE (XEXP (addr, 0)) == REG
30781       && CONST_INT_P (XEXP (addr, 1)))
30782     {
30783       *base = XEXP (addr, 0);
30784       *offset = XEXP (addr, 1);
30785       return true;
30786     }
30787
30788   *base = NULL_RTX;
30789   *offset = NULL_RTX;
30790
30791   return false;
30792 }
30793
30794 /* If INSN is a load or store of address in the form of [base+offset],
30795    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
30796    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
30797    otherwise return FALSE.  */
30798
30799 static bool
30800 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30801 {
30802   rtx x, dest, src;
30803
30804   gcc_assert (INSN_P (insn));
30805   x = PATTERN (insn);
30806   if (GET_CODE (x) != SET)
30807     return false;
30808
30809   src = SET_SRC (x);
30810   dest = SET_DEST (x);
30811   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30812     {
30813       *is_load = false;
30814       extract_base_offset_in_addr (dest, base, offset);
30815     }
30816   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30817     {
30818       *is_load = true;
30819       extract_base_offset_in_addr (src, base, offset);
30820     }
30821   else
30822     return false;
30823
30824   return (*base != NULL_RTX && *offset != NULL_RTX);
30825 }
30826
30827 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30828
30829    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30830    and PRI are only calculated for these instructions.  For other instruction,
30831    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
30832    instruction fusion can be supported by returning different priorities.
30833
30834    It's important that irrelevant instructions get the largest FUSION_PRI.  */
30835
30836 static void
30837 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30838                            int *fusion_pri, int *pri)
30839 {
30840   int tmp, off_val;
30841   bool is_load;
30842   rtx base, offset;
30843
30844   gcc_assert (INSN_P (insn));
30845
30846   tmp = max_pri - 1;
30847   if (!fusion_load_store (insn, &base, &offset, &is_load))
30848     {
30849       *pri = tmp;
30850       *fusion_pri = tmp;
30851       return;
30852     }
30853
30854   /* Load goes first.  */
30855   if (is_load)
30856     *fusion_pri = tmp - 1;
30857   else
30858     *fusion_pri = tmp - 2;
30859
30860   tmp /= 2;
30861
30862   /* INSN with smaller base register goes first.  */
30863   tmp -= ((REGNO (base) & 0xff) << 20);
30864
30865   /* INSN with smaller offset goes first.  */
30866   off_val = (int)(INTVAL (offset));
30867   if (off_val >= 0)
30868     tmp -= (off_val & 0xfffff);
30869   else
30870     tmp += ((- off_val) & 0xfffff);
30871
30872   *pri = tmp;
30873   return;
30874 }
30875
30876
30877 /* Construct and return a PARALLEL RTX vector with elements numbering the
30878    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30879    the vector - from the perspective of the architecture.  This does not
30880    line up with GCC's perspective on lane numbers, so we end up with
30881    different masks depending on our target endian-ness.  The diagram
30882    below may help.  We must draw the distinction when building masks
30883    which select one half of the vector.  An instruction selecting
30884    architectural low-lanes for a big-endian target, must be described using
30885    a mask selecting GCC high-lanes.
30886
30887                  Big-Endian             Little-Endian
30888
30889 GCC             0   1   2   3           3   2   1   0
30890               | x | x | x | x |       | x | x | x | x |
30891 Architecture    3   2   1   0           3   2   1   0
30892
30893 Low Mask:         { 2, 3 }                { 0, 1 }
30894 High Mask:        { 0, 1 }                { 2, 3 }
30895 */
30896
30897 rtx
30898 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30899 {
30900   int nunits = GET_MODE_NUNITS (mode);
30901   rtvec v = rtvec_alloc (nunits / 2);
30902   int high_base = nunits / 2;
30903   int low_base = 0;
30904   int base;
30905   rtx t1;
30906   int i;
30907
30908   if (BYTES_BIG_ENDIAN)
30909     base = high ? low_base : high_base;
30910   else
30911     base = high ? high_base : low_base;
30912
30913   for (i = 0; i < nunits / 2; i++)
30914     RTVEC_ELT (v, i) = GEN_INT (base + i);
30915
30916   t1 = gen_rtx_PARALLEL (mode, v);
30917   return t1;
30918 }
30919
30920 /* Check OP for validity as a PARALLEL RTX vector with elements
30921    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30922    from the perspective of the architecture.  See the diagram above
30923    arm_simd_vect_par_cnst_half_p for more details.  */
30924
30925 bool
30926 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30927                                        bool high)
30928 {
30929   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30930   HOST_WIDE_INT count_op = XVECLEN (op, 0);
30931   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30932   int i = 0;
30933
30934   if (!VECTOR_MODE_P (mode))
30935     return false;
30936
30937   if (count_op != count_ideal)
30938     return false;
30939
30940   for (i = 0; i < count_ideal; i++)
30941     {
30942       rtx elt_op = XVECEXP (op, 0, i);
30943       rtx elt_ideal = XVECEXP (ideal, 0, i);
30944
30945       if (!CONST_INT_P (elt_op)
30946           || INTVAL (elt_ideal) != INTVAL (elt_op))
30947         return false;
30948     }
30949   return true;
30950 }
30951
30952 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30953    in Thumb1.  */
30954 static bool
30955 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30956                          const_tree)
30957 {
30958   /* For now, we punt and not handle this for TARGET_THUMB1.  */
30959   if (vcall_offset && TARGET_THUMB1)
30960     return false;
30961
30962   /* Otherwise ok.  */
30963   return true;
30964 }
30965
30966 /* Generate RTL for a conditional branch with rtx comparison CODE in
30967    mode CC_MODE. The destination of the unlikely conditional branch
30968    is LABEL_REF.  */
30969
30970 void
30971 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30972                           rtx label_ref)
30973 {
30974   rtx x;
30975   x = gen_rtx_fmt_ee (code, VOIDmode,
30976                       gen_rtx_REG (cc_mode, CC_REGNUM),
30977                       const0_rtx);
30978
30979   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30980                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
30981                             pc_rtx);
30982   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30983 }
30984
30985 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30986
30987    For pure-code sections there is no letter code for this attribute, so
30988    output all the section flags numerically when this is needed.  */
30989
30990 static bool
30991 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30992 {
30993
30994   if (flags & SECTION_ARM_PURECODE)
30995     {
30996       *num = 0x20000000;
30997
30998       if (!(flags & SECTION_DEBUG))
30999         *num |= 0x2;
31000       if (flags & SECTION_EXCLUDE)
31001         *num |= 0x80000000;
31002       if (flags & SECTION_WRITE)
31003         *num |= 0x1;
31004       if (flags & SECTION_CODE)
31005         *num |= 0x4;
31006       if (flags & SECTION_MERGE)
31007         *num |= 0x10;
31008       if (flags & SECTION_STRINGS)
31009         *num |= 0x20;
31010       if (flags & SECTION_TLS)
31011         *num |= 0x400;
31012       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31013         *num |= 0x200;
31014
31015         return true;
31016     }
31017
31018   return false;
31019 }
31020
31021 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31022
31023    If pure-code is passed as an option, make sure all functions are in
31024    sections that have the SHF_ARM_PURECODE attribute.  */
31025
31026 static section *
31027 arm_function_section (tree decl, enum node_frequency freq,
31028                       bool startup, bool exit)
31029 {
31030   const char * section_name;
31031   section * sec;
31032
31033   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31034     return default_function_section (decl, freq, startup, exit);
31035
31036   if (!target_pure_code)
31037     return default_function_section (decl, freq, startup, exit);
31038
31039
31040   section_name = DECL_SECTION_NAME (decl);
31041
31042   /* If a function is not in a named section then it falls under the 'default'
31043      text section, also known as '.text'.  We can preserve previous behavior as
31044      the default text section already has the SHF_ARM_PURECODE section
31045      attribute.  */
31046   if (!section_name)
31047     {
31048       section *default_sec = default_function_section (decl, freq, startup,
31049                                                        exit);
31050
31051       /* If default_sec is not null, then it must be a special section like for
31052          example .text.startup.  We set the pure-code attribute and return the
31053          same section to preserve existing behavior.  */
31054       if (default_sec)
31055           default_sec->common.flags |= SECTION_ARM_PURECODE;
31056       return default_sec;
31057     }
31058
31059   /* Otherwise look whether a section has already been created with
31060      'section_name'.  */
31061   sec = get_named_section (decl, section_name, 0);
31062   if (!sec)
31063     /* If that is not the case passing NULL as the section's name to
31064        'get_named_section' will create a section with the declaration's
31065        section name.  */
31066     sec = get_named_section (decl, NULL, 0);
31067
31068   /* Set the SHF_ARM_PURECODE attribute.  */
31069   sec->common.flags |= SECTION_ARM_PURECODE;
31070
31071   return sec;
31072 }
31073
31074 /* Implements the TARGET_SECTION_FLAGS hook.
31075
31076    If DECL is a function declaration and pure-code is passed as an option
31077    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31078    section's name and RELOC indicates whether the declarations initializer may
31079    contain runtime relocations.  */
31080
31081 static unsigned int
31082 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31083 {
31084   unsigned int flags = default_section_type_flags (decl, name, reloc);
31085
31086   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31087     flags |= SECTION_ARM_PURECODE;
31088
31089   return flags;
31090 }
31091
31092 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31093
31094 static void
31095 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31096                            rtx op0, rtx op1,
31097                            rtx *quot_p, rtx *rem_p)
31098 {
31099   if (mode == SImode)
31100     gcc_assert (!TARGET_IDIV);
31101
31102   scalar_int_mode libval_mode
31103     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31104
31105   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31106                                         libval_mode,
31107                                         op0, GET_MODE (op0),
31108                                         op1, GET_MODE (op1));
31109
31110   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31111   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31112                                        GET_MODE_SIZE (mode));
31113
31114   gcc_assert (quotient);
31115   gcc_assert (remainder);
31116
31117   *quot_p = quotient;
31118   *rem_p = remainder;
31119 }
31120
31121 /*  This function checks for the availability of the coprocessor builtin passed
31122     in BUILTIN for the current target.  Returns true if it is available and
31123     false otherwise.  If a BUILTIN is passed for which this function has not
31124     been implemented it will cause an exception.  */
31125
31126 bool
31127 arm_coproc_builtin_available (enum unspecv builtin)
31128 {
31129   /* None of these builtins are available in Thumb mode if the target only
31130      supports Thumb-1.  */
31131   if (TARGET_THUMB1)
31132     return false;
31133
31134   switch (builtin)
31135     {
31136       case VUNSPEC_CDP:
31137       case VUNSPEC_LDC:
31138       case VUNSPEC_LDCL:
31139       case VUNSPEC_STC:
31140       case VUNSPEC_STCL:
31141       case VUNSPEC_MCR:
31142       case VUNSPEC_MRC:
31143         if (arm_arch4)
31144           return true;
31145         break;
31146       case VUNSPEC_CDP2:
31147       case VUNSPEC_LDC2:
31148       case VUNSPEC_LDC2L:
31149       case VUNSPEC_STC2:
31150       case VUNSPEC_STC2L:
31151       case VUNSPEC_MCR2:
31152       case VUNSPEC_MRC2:
31153         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31154            ARMv8-{A,M}.  */
31155         if (arm_arch5)
31156           return true;
31157         break;
31158       case VUNSPEC_MCRR:
31159       case VUNSPEC_MRRC:
31160         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31161            ARMv8-{A,M}.  */
31162         if (arm_arch6 || arm_arch5te)
31163           return true;
31164         break;
31165       case VUNSPEC_MCRR2:
31166       case VUNSPEC_MRRC2:
31167         if (arm_arch6)
31168           return true;
31169         break;
31170       default:
31171         gcc_unreachable ();
31172     }
31173   return false;
31174 }
31175
31176 /* This function returns true if OP is a valid memory operand for the ldc and
31177    stc coprocessor instructions and false otherwise.  */
31178
31179 bool
31180 arm_coproc_ldc_stc_legitimate_address (rtx op)
31181 {
31182   HOST_WIDE_INT range;
31183   /* Has to be a memory operand.  */
31184   if (!MEM_P (op))
31185     return false;
31186
31187   op = XEXP (op, 0);
31188
31189   /* We accept registers.  */
31190   if (REG_P (op))
31191     return true;
31192
31193   switch GET_CODE (op)
31194     {
31195       case PLUS:
31196         {
31197           /* Or registers with an offset.  */
31198           if (!REG_P (XEXP (op, 0)))
31199             return false;
31200
31201           op = XEXP (op, 1);
31202
31203           /* The offset must be an immediate though.  */
31204           if (!CONST_INT_P (op))
31205             return false;
31206
31207           range = INTVAL (op);
31208
31209           /* Within the range of [-1020,1020].  */
31210           if (!IN_RANGE (range, -1020, 1020))
31211             return false;
31212
31213           /* And a multiple of 4.  */
31214           return (range % 4) == 0;
31215         }
31216       case PRE_INC:
31217       case POST_INC:
31218       case PRE_DEC:
31219       case POST_DEC:
31220         return REG_P (XEXP (op, 0));
31221       default:
31222         gcc_unreachable ();
31223     }
31224   return false;
31225 }
31226
31227 #if CHECKING_P
31228 namespace selftest {
31229
31230 /* Scan the static data tables generated by parsecpu.awk looking for
31231    potential issues with the data.  We primarily check for
31232    inconsistencies in the option extensions at present (extensions
31233    that duplicate others but aren't marked as aliases).  Furthermore,
31234    for correct canonicalization later options must never be a subset
31235    of an earlier option.  Any extension should also only specify other
31236    feature bits and never an architecture bit.  The architecture is inferred
31237    from the declaration of the extension.  */
31238 static void
31239 arm_test_cpu_arch_data (void)
31240 {
31241   const arch_option *arch;
31242   const cpu_option *cpu;
31243   auto_sbitmap target_isa (isa_num_bits);
31244   auto_sbitmap isa1 (isa_num_bits);
31245   auto_sbitmap isa2 (isa_num_bits);
31246
31247   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31248     {
31249       const cpu_arch_extension *ext1, *ext2;
31250
31251       if (arch->common.extensions == NULL)
31252         continue;
31253
31254       arm_initialize_isa (target_isa, arch->common.isa_bits);
31255
31256       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31257         {
31258           if (ext1->alias)
31259             continue;
31260
31261           arm_initialize_isa (isa1, ext1->isa_bits);
31262           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31263             {
31264               if (ext2->alias || ext1->remove != ext2->remove)
31265                 continue;
31266
31267               arm_initialize_isa (isa2, ext2->isa_bits);
31268               /* If the option is a subset of the parent option, it doesn't
31269                  add anything and so isn't useful.  */
31270               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31271
31272               /* If the extension specifies any architectural bits then
31273                  disallow it.  Extensions should only specify feature bits.  */
31274               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31275             }
31276         }
31277     }
31278
31279   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31280     {
31281       const cpu_arch_extension *ext1, *ext2;
31282
31283       if (cpu->common.extensions == NULL)
31284         continue;
31285
31286       arm_initialize_isa (target_isa, arch->common.isa_bits);
31287
31288       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31289         {
31290           if (ext1->alias)
31291             continue;
31292
31293           arm_initialize_isa (isa1, ext1->isa_bits);
31294           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31295             {
31296               if (ext2->alias || ext1->remove != ext2->remove)
31297                 continue;
31298
31299               arm_initialize_isa (isa2, ext2->isa_bits);
31300               /* If the option is a subset of the parent option, it doesn't
31301                  add anything and so isn't useful.  */
31302               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31303
31304               /* If the extension specifies any architectural bits then
31305                  disallow it.  Extensions should only specify feature bits.  */
31306               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31307             }
31308         }
31309     }
31310 }
31311
31312 static void
31313 arm_run_selftests (void)
31314 {
31315   arm_test_cpu_arch_data ();
31316 }
31317 } /* Namespace selftest.  */
31318
31319 #undef TARGET_RUN_TARGET_SELFTESTS
31320 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31321 #endif /* CHECKING_P */
31322
31323 struct gcc_target targetm = TARGET_INITIALIZER;
31324
31325 #include "gt-arm.h"