gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2017 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "backend.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "memmodel.h"
  31 #include "cfghooks.h"
  32 #include "df.h"
  33 #include "tm_p.h"
  34 #include "stringpool.h"
  35 #include "optabs.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "recog.h"
  39 #include "cgraph.h"
  40 #include "diagnostic-core.h"
  41 #include "alias.h"
  42 #include "fold-const.h"
  43 #include "stor-layout.h"
  44 #include "calls.h"
  45 #include "varasm.h"
  46 #include "output.h"
  47 #include "insn-attr.h"
  48 #include "flags.h"
  49 #include "reload.h"
  50 #include "explow.h"
  51 #include "expr.h"
  52 #include "cfgrtl.h"
  53 #include "sched-int.h"
  54 #include "common/common-target.h"
  55 #include "langhooks.h"
  56 #include "intl.h"
  57 #include "libfuncs.h"
  58 #include "params.h"
  59 #include "opts.h"
  60 #include "dumpfile.h"
  61 #include "target-globals.h"
  62 #include "builtins.h"
  63 #include "tm-constrs.h"
  64 #include "rtl-iter.h"
  65 #include "optabs-libfuncs.h"
  66 #include "gimplify.h"
  67
  68 /* This file should be included last.  */
  69 #include "target-def.h"
  70
  71 /* Forward definitions of types.  */
  72 typedef struct minipool_node    Mnode;
  73 typedef struct minipool_fixup   Mfix;
  74
  75 void (*arm_lang_output_object_attributes_hook)(void);
  76
  77 struct four_ints
  78 {
  79   int i[4];
  80 };
  81
  82 /* Forward function declarations.  */
  83 static bool arm_const_not_ok_for_debug_p (rtx);
  84 static bool arm_needs_doubleword_align (machine_mode, const_tree);
  85 static int arm_compute_static_chain_stack_bytes (void);
  86 static arm_stack_offsets *arm_get_frame_offsets (void);
  87 static void arm_add_gc_roots (void);
  88 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  89                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
  90 static unsigned bit_count (unsigned long);
  91 static unsigned bitmap_popcount (const sbitmap);
  92 static int arm_address_register_rtx_p (rtx, int);
  93 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
  94 static bool is_called_in_ARM_mode (tree);
  95 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
  96 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
  97 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
  98 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
  99 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 100 inline static int thumb1_index_register_rtx_p (rtx, int);
 101 static int thumb_far_jump_used_p (void);
 102 static bool thumb_force_lr_save (void);
 103 static unsigned arm_size_return_regs (void);
 104 static bool arm_assemble_integer (rtx, unsigned int, int);
 105 static void arm_print_operand (FILE *, rtx, int);
 106 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 107 static bool arm_print_operand_punct_valid_p (unsigned char code);
 108 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 109 static arm_cc get_arm_condition_code (rtx);
 110 static const char *output_multi_immediate (rtx *, const char *, const char *,
 111                                            int, HOST_WIDE_INT);
 112 static const char *shift_op (rtx, HOST_WIDE_INT *);
 113 static struct machine_function *arm_init_machine_status (void);
 114 static void thumb_exit (FILE *, int);
 115 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 116 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 117 static Mnode *add_minipool_forward_ref (Mfix *);
 118 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 119 static Mnode *add_minipool_backward_ref (Mfix *);
 120 static void assign_minipool_offsets (Mfix *);
 121 static void arm_print_value (FILE *, rtx);
 122 static void dump_minipool (rtx_insn *);
 123 static int arm_barrier_cost (rtx_insn *);
 124 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 125 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 126 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 127                                machine_mode, rtx);
 128 static void arm_reorg (void);
 129 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 130 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 131 static unsigned long arm_compute_save_reg_mask (void);
 132 static unsigned long arm_isr_value (tree);
 133 static unsigned long arm_compute_func_type (void);
 134 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 135 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 136 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 137 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 138 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 139 #endif
 140 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 141 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 142 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
 143 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
 144 static int arm_comp_type_attributes (const_tree, const_tree);
 145 static void arm_set_default_type_attributes (tree);
 146 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 147 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 148 static int optimal_immediate_sequence (enum rtx_code code,
 149                                        unsigned HOST_WIDE_INT val,
 150                                        struct four_ints *return_sequence);
 151 static int optimal_immediate_sequence_1 (enum rtx_code code,
 152                                          unsigned HOST_WIDE_INT val,
 153                                          struct four_ints *return_sequence,
 154                                          int i);
 155 static int arm_get_strip_length (int);
 156 static bool arm_function_ok_for_sibcall (tree, tree);
 157 static machine_mode arm_promote_function_mode (const_tree,
 158                                                     machine_mode, int *,
 159                                                     const_tree, int);
 160 static bool arm_return_in_memory (const_tree, const_tree);
 161 static rtx arm_function_value (const_tree, const_tree, bool);
 162 static rtx arm_libcall_value_1 (machine_mode);
 163 static rtx arm_libcall_value (machine_mode, const_rtx);
 164 static bool arm_function_value_regno_p (const unsigned int);
 165 static void arm_internal_label (FILE *, const char *, unsigned long);
 166 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 167                                  tree);
 168 static bool arm_have_conditional_execution (void);
 169 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 170 static bool arm_legitimate_constant_p (machine_mode, rtx);
 171 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 172 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 173 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 174 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 175 static void emit_constant_insn (rtx cond, rtx pattern);
 176 static rtx_insn *emit_set_insn (rtx, rtx);
 177 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 178 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 179                                   tree, bool);
 180 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 181                              const_tree, bool);
 182 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 183                                       const_tree, bool);
 184 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 185 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 186                                       const_tree);
 187 static rtx aapcs_libcall_value (machine_mode);
 188 static int aapcs_select_return_coproc (const_tree, const_tree);
 189
 190 #ifdef OBJECT_FORMAT_ELF
 191 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 192 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 193 #endif
 194 #ifndef ARM_PE
 195 static void arm_encode_section_info (tree, rtx, int);
 196 #endif
 197
 198 static void arm_file_end (void);
 199 static void arm_file_start (void);
 200 static void arm_insert_attributes (tree, tree *);
 201
 202 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 203                                         tree, int *, int);
 204 static bool arm_pass_by_reference (cumulative_args_t,
 205                                    machine_mode, const_tree, bool);
 206 static bool arm_promote_prototypes (const_tree);
 207 static bool arm_default_short_enums (void);
 208 static bool arm_align_anon_bitfield (void);
 209 static bool arm_return_in_msb (const_tree);
 210 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 211 static bool arm_return_in_memory (const_tree, const_tree);
 212 #if ARM_UNWIND_INFO
 213 static void arm_unwind_emit (FILE *, rtx_insn *);
 214 static bool arm_output_ttype (rtx);
 215 static void arm_asm_emit_except_personality (rtx);
 216 #endif
 217 static void arm_asm_init_sections (void);
 218 static rtx arm_dwarf_register_span (rtx);
 219
 220 static tree arm_cxx_guard_type (void);
 221 static bool arm_cxx_guard_mask_bit (void);
 222 static tree arm_get_cookie_size (tree);
 223 static bool arm_cookie_has_size (void);
 224 static bool arm_cxx_cdtor_returns_this (void);
 225 static bool arm_cxx_key_method_may_be_inline (void);
 226 static void arm_cxx_determine_class_data_visibility (tree);
 227 static bool arm_cxx_class_data_always_comdat (void);
 228 static bool arm_cxx_use_aeabi_atexit (void);
 229 static void arm_init_libfuncs (void);
 230 static tree arm_build_builtin_va_list (void);
 231 static void arm_expand_builtin_va_start (tree, rtx);
 232 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 233 static void arm_option_override (void);
 234 static void arm_option_restore (struct gcc_options *,
 235                                 struct cl_target_option *);
 236 static void arm_override_options_after_change (void);
 237 static void arm_option_print (FILE *, int, struct cl_target_option *);
 238 static void arm_set_current_function (tree);
 239 static bool arm_can_inline_p (tree, tree);
 240 static void arm_relayout_function (tree);
 241 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 242 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 243 static bool arm_sched_can_speculate_insn (rtx_insn *);
 244 static bool arm_macro_fusion_p (void);
 245 static bool arm_cannot_copy_insn_p (rtx_insn *);
 246 static int arm_issue_rate (void);
 247 static int arm_first_cycle_multipass_dfa_lookahead (void);
 248 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 249 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 250 static bool arm_output_addr_const_extra (FILE *, rtx);
 251 static bool arm_allocate_stack_slots_for_args (void);
 252 static bool arm_warn_func_return (tree);
 253 static tree arm_promoted_type (const_tree t);
 254 static bool arm_scalar_mode_supported_p (machine_mode);
 255 static bool arm_frame_pointer_required (void);
 256 static bool arm_can_eliminate (const int, const int);
 257 static void arm_asm_trampoline_template (FILE *);
 258 static void arm_trampoline_init (rtx, tree, rtx);
 259 static rtx arm_trampoline_adjust_address (rtx);
 260 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 261 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 262 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 263 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 264 static bool arm_array_mode_supported_p (machine_mode,
 265                                         unsigned HOST_WIDE_INT);
 266 static machine_mode arm_preferred_simd_mode (machine_mode);
 267 static bool arm_class_likely_spilled_p (reg_class_t);
 268 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 269 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 270 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 271                                                      const_tree type,
 272                                                      int misalignment,
 273                                                      bool is_packed);
 274 static void arm_conditional_register_usage (void);
 275 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 276 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 277 static unsigned int arm_autovectorize_vector_sizes (void);
 278 static int arm_default_branch_cost (bool, bool);
 279 static int arm_cortex_a5_branch_cost (bool, bool);
 280 static int arm_cortex_m_branch_cost (bool, bool);
 281 static int arm_cortex_m7_branch_cost (bool, bool);
 282
 283 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
 284                                              const unsigned char *sel);
 285
 286 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 287
 288 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 289                                            tree vectype,
 290                                            int misalign ATTRIBUTE_UNUSED);
 291 static unsigned arm_add_stmt_cost (void *data, int count,
 292                                    enum vect_cost_for_stmt kind,
 293                                    struct _stmt_vec_info *stmt_info,
 294                                    int misalign,
 295                                    enum vect_cost_model_location where);
 296
 297 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 298                                          bool op0_preserve_value);
 299 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 300
 301 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 302 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 303                                      const_tree);
 304 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 305 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 306 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 307                                                 int reloc);
 308 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 309 static machine_mode arm_floatn_mode (int, bool);
 310 \f
 311 /* Table of machine attributes.  */
 312 static const struct attribute_spec arm_attribute_table[] =
 313 {
 314   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 315        affects_type_identity } */
 316   /* Function calls made to this symbol must be done indirectly, because
 317      it may lie outside of the 26 bit addressing range of a normal function
 318      call.  */
 319   { "long_call",    0, 0, false, true,  true,  NULL, false },
 320   /* Whereas these functions are always known to reside within the 26 bit
 321      addressing range.  */
 322   { "short_call",   0, 0, false, true,  true,  NULL, false },
 323   /* Specify the procedure call conventions for a function.  */
 324   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 325     false },
 326   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 327   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 328     false },
 329   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 330     false },
 331   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 332     false },
 333 #ifdef ARM_PE
 334   /* ARM/PE has three new attributes:
 335      interfacearm - ?
 336      dllexport - for exporting a function/variable that will live in a dll
 337      dllimport - for importing a function/variable from a dll
 338
 339      Microsoft allows multiple declspecs in one __declspec, separating
 340      them with spaces.  We do NOT support this.  Instead, use __declspec
 341      multiple times.
 342   */
 343   { "dllimport",    0, 0, true,  false, false, NULL, false },
 344   { "dllexport",    0, 0, true,  false, false, NULL, false },
 345   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 346     false },
 347 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 348   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 349   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 350   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 351     false },
 352 #endif
 353   /* ARMv8-M Security Extensions support.  */
 354   { "cmse_nonsecure_entry", 0, 0, true, false, false,
 355     arm_handle_cmse_nonsecure_entry, false },
 356   { "cmse_nonsecure_call", 0, 0, true, false, false,
 357     arm_handle_cmse_nonsecure_call, true },
 358   { NULL,           0, 0, false, false, false, NULL, false }
 359 };
 360 \f
 361 /* Initialize the GCC target structure.  */
 362 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 363 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 364 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 365 #endif
 366
 367 #undef TARGET_LEGITIMIZE_ADDRESS
 368 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 369
 370 #undef  TARGET_ATTRIBUTE_TABLE
 371 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 372
 373 #undef  TARGET_INSERT_ATTRIBUTES
 374 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 375
 376 #undef TARGET_ASM_FILE_START
 377 #define TARGET_ASM_FILE_START arm_file_start
 378 #undef TARGET_ASM_FILE_END
 379 #define TARGET_ASM_FILE_END arm_file_end
 380
 381 #undef  TARGET_ASM_ALIGNED_SI_OP
 382 #define TARGET_ASM_ALIGNED_SI_OP NULL
 383 #undef  TARGET_ASM_INTEGER
 384 #define TARGET_ASM_INTEGER arm_assemble_integer
 385
 386 #undef TARGET_PRINT_OPERAND
 387 #define TARGET_PRINT_OPERAND arm_print_operand
 388 #undef TARGET_PRINT_OPERAND_ADDRESS
 389 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 390 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 391 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 392
 393 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 394 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 395
 396 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 397 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 398
 399 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 400 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 401
 402 #undef TARGET_CAN_INLINE_P
 403 #define TARGET_CAN_INLINE_P arm_can_inline_p
 404
 405 #undef TARGET_RELAYOUT_FUNCTION
 406 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 407
 408 #undef  TARGET_OPTION_OVERRIDE
 409 #define TARGET_OPTION_OVERRIDE arm_option_override
 410
 411 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 412 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 413
 414 #undef TARGET_OPTION_RESTORE
 415 #define TARGET_OPTION_RESTORE arm_option_restore
 416
 417 #undef TARGET_OPTION_PRINT
 418 #define TARGET_OPTION_PRINT arm_option_print
 419
 420 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 421 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 422
 423 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 424 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 425
 426 #undef TARGET_SCHED_MACRO_FUSION_P
 427 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 428
 429 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 430 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 431
 432 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 433 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 434
 435 #undef  TARGET_SCHED_ADJUST_COST
 436 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 437
 438 #undef TARGET_SET_CURRENT_FUNCTION
 439 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 440
 441 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 442 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 443
 444 #undef TARGET_SCHED_REORDER
 445 #define TARGET_SCHED_REORDER arm_sched_reorder
 446
 447 #undef TARGET_REGISTER_MOVE_COST
 448 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 449
 450 #undef TARGET_MEMORY_MOVE_COST
 451 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 452
 453 #undef TARGET_ENCODE_SECTION_INFO
 454 #ifdef ARM_PE
 455 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 456 #else
 457 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 458 #endif
 459
 460 #undef  TARGET_STRIP_NAME_ENCODING
 461 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 462
 463 #undef  TARGET_ASM_INTERNAL_LABEL
 464 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 465
 466 #undef TARGET_FLOATN_MODE
 467 #define TARGET_FLOATN_MODE arm_floatn_mode
 468
 469 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 470 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 471
 472 #undef  TARGET_FUNCTION_VALUE
 473 #define TARGET_FUNCTION_VALUE arm_function_value
 474
 475 #undef  TARGET_LIBCALL_VALUE
 476 #define TARGET_LIBCALL_VALUE arm_libcall_value
 477
 478 #undef TARGET_FUNCTION_VALUE_REGNO_P
 479 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 480
 481 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 482 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 483 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 484 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 485
 486 #undef  TARGET_RTX_COSTS
 487 #define TARGET_RTX_COSTS arm_rtx_costs
 488 #undef  TARGET_ADDRESS_COST
 489 #define TARGET_ADDRESS_COST arm_address_cost
 490
 491 #undef TARGET_SHIFT_TRUNCATION_MASK
 492 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 493 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 494 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 495 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 496 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 497 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 498 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 499 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 500 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 501   arm_autovectorize_vector_sizes
 502
 503 #undef  TARGET_MACHINE_DEPENDENT_REORG
 504 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 505
 506 #undef  TARGET_INIT_BUILTINS
 507 #define TARGET_INIT_BUILTINS  arm_init_builtins
 508 #undef  TARGET_EXPAND_BUILTIN
 509 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 510 #undef  TARGET_BUILTIN_DECL
 511 #define TARGET_BUILTIN_DECL arm_builtin_decl
 512
 513 #undef TARGET_INIT_LIBFUNCS
 514 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 515
 516 #undef TARGET_PROMOTE_FUNCTION_MODE
 517 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 518 #undef TARGET_PROMOTE_PROTOTYPES
 519 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 520 #undef TARGET_PASS_BY_REFERENCE
 521 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 522 #undef TARGET_ARG_PARTIAL_BYTES
 523 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 524 #undef TARGET_FUNCTION_ARG
 525 #define TARGET_FUNCTION_ARG arm_function_arg
 526 #undef TARGET_FUNCTION_ARG_ADVANCE
 527 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 528 #undef TARGET_FUNCTION_ARG_BOUNDARY
 529 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 530
 531 #undef  TARGET_SETUP_INCOMING_VARARGS
 532 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 533
 534 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 535 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 536
 537 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 538 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 539 #undef TARGET_TRAMPOLINE_INIT
 540 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 541 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 542 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 543
 544 #undef TARGET_WARN_FUNC_RETURN
 545 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 546
 547 #undef TARGET_DEFAULT_SHORT_ENUMS
 548 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 549
 550 #undef TARGET_ALIGN_ANON_BITFIELD
 551 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 552
 553 #undef TARGET_NARROW_VOLATILE_BITFIELD
 554 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 555
 556 #undef TARGET_CXX_GUARD_TYPE
 557 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 558
 559 #undef TARGET_CXX_GUARD_MASK_BIT
 560 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 561
 562 #undef TARGET_CXX_GET_COOKIE_SIZE
 563 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 564
 565 #undef TARGET_CXX_COOKIE_HAS_SIZE
 566 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 567
 568 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 569 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 570
 571 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 572 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 573
 574 #undef TARGET_CXX_USE_AEABI_ATEXIT
 575 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 576
 577 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 578 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 579   arm_cxx_determine_class_data_visibility
 580
 581 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 582 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 583
 584 #undef TARGET_RETURN_IN_MSB
 585 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 586
 587 #undef TARGET_RETURN_IN_MEMORY
 588 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 589
 590 #undef TARGET_MUST_PASS_IN_STACK
 591 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 592
 593 #if ARM_UNWIND_INFO
 594 #undef TARGET_ASM_UNWIND_EMIT
 595 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 596
 597 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 598 #undef TARGET_ASM_TTYPE
 599 #define TARGET_ASM_TTYPE arm_output_ttype
 600
 601 #undef TARGET_ARM_EABI_UNWINDER
 602 #define TARGET_ARM_EABI_UNWINDER true
 603
 604 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 605 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 606
 607 #endif /* ARM_UNWIND_INFO */
 608
 609 #undef TARGET_ASM_INIT_SECTIONS
 610 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 611
 612 #undef TARGET_DWARF_REGISTER_SPAN
 613 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 614
 615 #undef  TARGET_CANNOT_COPY_INSN_P
 616 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 617
 618 #ifdef HAVE_AS_TLS
 619 #undef TARGET_HAVE_TLS
 620 #define TARGET_HAVE_TLS true
 621 #endif
 622
 623 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 624 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 625
 626 #undef TARGET_LEGITIMATE_CONSTANT_P
 627 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 628
 629 #undef TARGET_CANNOT_FORCE_CONST_MEM
 630 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 631
 632 #undef TARGET_MAX_ANCHOR_OFFSET
 633 #define TARGET_MAX_ANCHOR_OFFSET 4095
 634
 635 /* The minimum is set such that the total size of the block
 636    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 637    divisible by eight, ensuring natural spacing of anchors.  */
 638 #undef TARGET_MIN_ANCHOR_OFFSET
 639 #define TARGET_MIN_ANCHOR_OFFSET -4088
 640
 641 #undef TARGET_SCHED_ISSUE_RATE
 642 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 643
 644 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 645 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 646   arm_first_cycle_multipass_dfa_lookahead
 647
 648 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 649 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 650   arm_first_cycle_multipass_dfa_lookahead_guard
 651
 652 #undef TARGET_MANGLE_TYPE
 653 #define TARGET_MANGLE_TYPE arm_mangle_type
 654
 655 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 656 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 657
 658 #undef TARGET_BUILD_BUILTIN_VA_LIST
 659 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 660 #undef TARGET_EXPAND_BUILTIN_VA_START
 661 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 662 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 663 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 664
 665 #ifdef HAVE_AS_TLS
 666 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 667 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 668 #endif
 669
 670 #undef TARGET_LEGITIMATE_ADDRESS_P
 671 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 672
 673 #undef TARGET_PREFERRED_RELOAD_CLASS
 674 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 675
 676 #undef TARGET_PROMOTED_TYPE
 677 #define TARGET_PROMOTED_TYPE arm_promoted_type
 678
 679 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 680 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 681
 682 #undef TARGET_FRAME_POINTER_REQUIRED
 683 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 684
 685 #undef TARGET_CAN_ELIMINATE
 686 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 687
 688 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 689 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 690
 691 #undef TARGET_CLASS_LIKELY_SPILLED_P
 692 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 693
 694 #undef TARGET_VECTORIZE_BUILTINS
 695 #define TARGET_VECTORIZE_BUILTINS
 696
 697 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 698 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 699   arm_builtin_vectorized_function
 700
 701 #undef TARGET_VECTOR_ALIGNMENT
 702 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 703
 704 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 705 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 706   arm_vector_alignment_reachable
 707
 708 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 709 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 710   arm_builtin_support_vector_misalignment
 711
 712 #undef TARGET_PREFERRED_RENAME_CLASS
 713 #define TARGET_PREFERRED_RENAME_CLASS \
 714   arm_preferred_rename_class
 715
 716 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 717 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 718   arm_vectorize_vec_perm_const_ok
 719
 720 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 721 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 722   arm_builtin_vectorization_cost
 723 #undef TARGET_VECTORIZE_ADD_STMT_COST
 724 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 725
 726 #undef TARGET_CANONICALIZE_COMPARISON
 727 #define TARGET_CANONICALIZE_COMPARISON \
 728   arm_canonicalize_comparison
 729
 730 #undef TARGET_ASAN_SHADOW_OFFSET
 731 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 732
 733 #undef MAX_INSN_PER_IT_BLOCK
 734 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 735
 736 #undef TARGET_CAN_USE_DOLOOP_P
 737 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 738
 739 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 740 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 741
 742 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 743 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 744
 745 #undef TARGET_SCHED_FUSION_PRIORITY
 746 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 747
 748 #undef  TARGET_ASM_FUNCTION_SECTION
 749 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 750
 751 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 752 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 753
 754 #undef TARGET_SECTION_TYPE_FLAGS
 755 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 756
 757 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 758 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 759
 760 #undef TARGET_C_EXCESS_PRECISION
 761 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 762
 763 /* Although the architecture reserves bits 0 and 1, only the former is
 764    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 765 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 766 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 767
 768 struct gcc_target targetm = TARGET_INITIALIZER;
 769 \f
 770 /* Obstack for minipool constant handling.  */
 771 static struct obstack minipool_obstack;
 772 static char *         minipool_startobj;
 773
 774 /* The maximum number of insns skipped which
 775    will be conditionalised if possible.  */
 776 static int max_insns_skipped = 5;
 777
 778 extern FILE * asm_out_file;
 779
 780 /* True if we are currently building a constant table.  */
 781 int making_const_table;
 782
 783 /* The processor for which instructions should be scheduled.  */
 784 enum processor_type arm_tune = TARGET_CPU_arm_none;
 785
 786 /* The current tuning set.  */
 787 const struct tune_params *current_tune;
 788
 789 /* Which floating point hardware to schedule for.  */
 790 int arm_fpu_attr;
 791
 792 /* Used for Thumb call_via trampolines.  */
 793 rtx thumb_call_via_label[14];
 794 static int thumb_call_reg_needed;
 795
 796 /* The bits in this mask specify which instruction scheduling options should
 797    be used.  */
 798 unsigned int tune_flags = 0;
 799
 800 /* The highest ARM architecture version supported by the
 801    target.  */
 802 enum base_architecture arm_base_arch = BASE_ARCH_0;
 803
 804 /* Active target architecture and tuning.  */
 805
 806 struct arm_build_target arm_active_target;
 807
 808 /* The following are used in the arm.md file as equivalents to bits
 809    in the above two flag variables.  */
 810
 811 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 812 int arm_arch3m = 0;
 813
 814 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 815 int arm_arch4 = 0;
 816
 817 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 818 int arm_arch4t = 0;
 819
 820 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 821 int arm_arch5 = 0;
 822
 823 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 824 int arm_arch5e = 0;
 825
 826 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 827 int arm_arch5te = 0;
 828
 829 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 830 int arm_arch6 = 0;
 831
 832 /* Nonzero if this chip supports the ARM 6K extensions.  */
 833 int arm_arch6k = 0;
 834
 835 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 836 int arm_arch6kz = 0;
 837
 838 /* Nonzero if instructions present in ARMv6-M can be used.  */
 839 int arm_arch6m = 0;
 840
 841 /* Nonzero if this chip supports the ARM 7 extensions.  */
 842 int arm_arch7 = 0;
 843
 844 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 845 int arm_arch_lpae = 0;
 846
 847 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 848 int arm_arch_notm = 0;
 849
 850 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 851 int arm_arch7em = 0;
 852
 853 /* Nonzero if instructions present in ARMv8 can be used.  */
 854 int arm_arch8 = 0;
 855
 856 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 857 int arm_arch8_1 = 0;
 858
 859 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 860 int arm_arch8_2 = 0;
 861
 862 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 863    Architecture 8.2.  */
 864 int arm_fp16_inst = 0;
 865
 866 /* Nonzero if this chip can benefit from load scheduling.  */
 867 int arm_ld_sched = 0;
 868
 869 /* Nonzero if this chip is a StrongARM.  */
 870 int arm_tune_strongarm = 0;
 871
 872 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 873 int arm_arch_iwmmxt = 0;
 874
 875 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 876 int arm_arch_iwmmxt2 = 0;
 877
 878 /* Nonzero if this chip is an XScale.  */
 879 int arm_arch_xscale = 0;
 880
 881 /* Nonzero if tuning for XScale  */
 882 int arm_tune_xscale = 0;
 883
 884 /* Nonzero if we want to tune for stores that access the write-buffer.
 885    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 886 int arm_tune_wbuf = 0;
 887
 888 /* Nonzero if tuning for Cortex-A9.  */
 889 int arm_tune_cortex_a9 = 0;
 890
 891 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 892    preprocessor.
 893    XXX This is a bit of a hack, it's intended to help work around
 894    problems in GLD which doesn't understand that armv5t code is
 895    interworking clean.  */
 896 int arm_cpp_interwork = 0;
 897
 898 /* Nonzero if chip supports Thumb 1.  */
 899 int arm_arch_thumb1;
 900
 901 /* Nonzero if chip supports Thumb 2.  */
 902 int arm_arch_thumb2;
 903
 904 /* Nonzero if chip supports integer division instruction.  */
 905 int arm_arch_arm_hwdiv;
 906 int arm_arch_thumb_hwdiv;
 907
 908 /* Nonzero if chip disallows volatile memory access in IT block.  */
 909 int arm_arch_no_volatile_ce;
 910
 911 /* Nonzero if we should use Neon to handle 64-bits operations rather
 912    than core registers.  */
 913 int prefer_neon_for_64bits = 0;
 914
 915 /* Nonzero if we shouldn't use literal pools.  */
 916 bool arm_disable_literal_pool = false;
 917
 918 /* The register number to be used for the PIC offset register.  */
 919 unsigned arm_pic_register = INVALID_REGNUM;
 920
 921 enum arm_pcs arm_pcs_default;
 922
 923 /* For an explanation of these variables, see final_prescan_insn below.  */
 924 int arm_ccfsm_state;
 925 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 926 enum arm_cond_code arm_current_cc;
 927
 928 rtx arm_target_insn;
 929 int arm_target_label;
 930 /* The number of conditionally executed insns, including the current insn.  */
 931 int arm_condexec_count = 0;
 932 /* A bitmask specifying the patterns for the IT block.
 933    Zero means do not output an IT block before this insn. */
 934 int arm_condexec_mask = 0;
 935 /* The number of bits used in arm_condexec_mask.  */
 936 int arm_condexec_masklen = 0;
 937
 938 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 939 int arm_arch_crc = 0;
 940
 941 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 942 int arm_arch_cmse = 0;
 943
 944 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 945 int arm_m_profile_small_mul = 0;
 946
 947 /* The condition codes of the ARM, and the inverse function.  */
 948 static const char * const arm_condition_codes[] =
 949 {
 950   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 951   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 952 };
 953
 954 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 955 int arm_regs_in_sequence[] =
 956 {
 957   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 958 };
 959
 960 #define ARM_LSL_NAME "lsl"
 961 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 962
 963 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 964                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 965                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 966 \f
 967 /* Initialization code.  */
 968
 969 struct processors
 970 {
 971   const char *const name;
 972   enum processor_type core;
 973   unsigned int tune_flags;
 974   const char *arch;
 975   enum base_architecture base_arch;
 976   enum isa_feature isa_bits[isa_num_bits];
 977   const struct tune_params *const tune;
 978 };
 979
 980
 981 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
 982 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
 983   {                                                             \
 984     num_slots,                                                  \
 985     l1_size,                                                    \
 986     l1_line_size                                                \
 987   }
 988
 989 /* arm generic vectorizer costs.  */
 990 static const
 991 struct cpu_vec_costs arm_default_vec_cost = {
 992   1,                                    /* scalar_stmt_cost.  */
 993   1,                                    /* scalar load_cost.  */
 994   1,                                    /* scalar_store_cost.  */
 995   1,                                    /* vec_stmt_cost.  */
 996   1,                                    /* vec_to_scalar_cost.  */
 997   1,                                    /* scalar_to_vec_cost.  */
 998   1,                                    /* vec_align_load_cost.  */
 999   1,                                    /* vec_unalign_load_cost.  */
1000   1,                                    /* vec_unalign_store_cost.  */
1001   1,                                    /* vec_store_cost.  */
1002   3,                                    /* cond_taken_branch_cost.  */
1003   1,                                    /* cond_not_taken_branch_cost.  */
1004 };
1005
1006 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1007 #include "aarch-cost-tables.h"
1008
1009
1010
1011 const struct cpu_cost_table cortexa9_extra_costs =
1012 {
1013   /* ALU */
1014   {
1015     0,                  /* arith.  */
1016     0,                  /* logical.  */
1017     0,                  /* shift.  */
1018     COSTS_N_INSNS (1),  /* shift_reg.  */
1019     COSTS_N_INSNS (1),  /* arith_shift.  */
1020     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1021     0,                  /* log_shift.  */
1022     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1023     COSTS_N_INSNS (1),  /* extend.  */
1024     COSTS_N_INSNS (2),  /* extend_arith.  */
1025     COSTS_N_INSNS (1),  /* bfi.  */
1026     COSTS_N_INSNS (1),  /* bfx.  */
1027     0,                  /* clz.  */
1028     0,                  /* rev.  */
1029     0,                  /* non_exec.  */
1030     true                /* non_exec_costs_exec.  */
1031   },
1032   {
1033     /* MULT SImode */
1034     {
1035       COSTS_N_INSNS (3),        /* simple.  */
1036       COSTS_N_INSNS (3),        /* flag_setting.  */
1037       COSTS_N_INSNS (2),        /* extend.  */
1038       COSTS_N_INSNS (3),        /* add.  */
1039       COSTS_N_INSNS (2),        /* extend_add.  */
1040       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1041     },
1042     /* MULT DImode */
1043     {
1044       0,                        /* simple (N/A).  */
1045       0,                        /* flag_setting (N/A).  */
1046       COSTS_N_INSNS (4),        /* extend.  */
1047       0,                        /* add (N/A).  */
1048       COSTS_N_INSNS (4),        /* extend_add.  */
1049       0                         /* idiv (N/A).  */
1050     }
1051   },
1052   /* LD/ST */
1053   {
1054     COSTS_N_INSNS (2),  /* load.  */
1055     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1056     COSTS_N_INSNS (2),  /* ldrd.  */
1057     COSTS_N_INSNS (2),  /* ldm_1st.  */
1058     1,                  /* ldm_regs_per_insn_1st.  */
1059     2,                  /* ldm_regs_per_insn_subsequent.  */
1060     COSTS_N_INSNS (5),  /* loadf.  */
1061     COSTS_N_INSNS (5),  /* loadd.  */
1062     COSTS_N_INSNS (1),  /* load_unaligned.  */
1063     COSTS_N_INSNS (2),  /* store.  */
1064     COSTS_N_INSNS (2),  /* strd.  */
1065     COSTS_N_INSNS (2),  /* stm_1st.  */
1066     1,                  /* stm_regs_per_insn_1st.  */
1067     2,                  /* stm_regs_per_insn_subsequent.  */
1068     COSTS_N_INSNS (1),  /* storef.  */
1069     COSTS_N_INSNS (1),  /* stored.  */
1070     COSTS_N_INSNS (1),  /* store_unaligned.  */
1071     COSTS_N_INSNS (1),  /* loadv.  */
1072     COSTS_N_INSNS (1)   /* storev.  */
1073   },
1074   {
1075     /* FP SFmode */
1076     {
1077       COSTS_N_INSNS (14),       /* div.  */
1078       COSTS_N_INSNS (4),        /* mult.  */
1079       COSTS_N_INSNS (7),        /* mult_addsub. */
1080       COSTS_N_INSNS (30),       /* fma.  */
1081       COSTS_N_INSNS (3),        /* addsub.  */
1082       COSTS_N_INSNS (1),        /* fpconst.  */
1083       COSTS_N_INSNS (1),        /* neg.  */
1084       COSTS_N_INSNS (3),        /* compare.  */
1085       COSTS_N_INSNS (3),        /* widen.  */
1086       COSTS_N_INSNS (3),        /* narrow.  */
1087       COSTS_N_INSNS (3),        /* toint.  */
1088       COSTS_N_INSNS (3),        /* fromint.  */
1089       COSTS_N_INSNS (3)         /* roundint.  */
1090     },
1091     /* FP DFmode */
1092     {
1093       COSTS_N_INSNS (24),       /* div.  */
1094       COSTS_N_INSNS (5),        /* mult.  */
1095       COSTS_N_INSNS (8),        /* mult_addsub.  */
1096       COSTS_N_INSNS (30),       /* fma.  */
1097       COSTS_N_INSNS (3),        /* addsub.  */
1098       COSTS_N_INSNS (1),        /* fpconst.  */
1099       COSTS_N_INSNS (1),        /* neg.  */
1100       COSTS_N_INSNS (3),        /* compare.  */
1101       COSTS_N_INSNS (3),        /* widen.  */
1102       COSTS_N_INSNS (3),        /* narrow.  */
1103       COSTS_N_INSNS (3),        /* toint.  */
1104       COSTS_N_INSNS (3),        /* fromint.  */
1105       COSTS_N_INSNS (3)         /* roundint.  */
1106     }
1107   },
1108   /* Vector */
1109   {
1110     COSTS_N_INSNS (1)   /* alu.  */
1111   }
1112 };
1113
1114 const struct cpu_cost_table cortexa8_extra_costs =
1115 {
1116   /* ALU */
1117   {
1118     0,                  /* arith.  */
1119     0,                  /* logical.  */
1120     COSTS_N_INSNS (1),  /* shift.  */
1121     0,                  /* shift_reg.  */
1122     COSTS_N_INSNS (1),  /* arith_shift.  */
1123     0,                  /* arith_shift_reg.  */
1124     COSTS_N_INSNS (1),  /* log_shift.  */
1125     0,                  /* log_shift_reg.  */
1126     0,                  /* extend.  */
1127     0,                  /* extend_arith.  */
1128     0,                  /* bfi.  */
1129     0,                  /* bfx.  */
1130     0,                  /* clz.  */
1131     0,                  /* rev.  */
1132     0,                  /* non_exec.  */
1133     true                /* non_exec_costs_exec.  */
1134   },
1135   {
1136     /* MULT SImode */
1137     {
1138       COSTS_N_INSNS (1),        /* simple.  */
1139       COSTS_N_INSNS (1),        /* flag_setting.  */
1140       COSTS_N_INSNS (1),        /* extend.  */
1141       COSTS_N_INSNS (1),        /* add.  */
1142       COSTS_N_INSNS (1),        /* extend_add.  */
1143       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1144     },
1145     /* MULT DImode */
1146     {
1147       0,                        /* simple (N/A).  */
1148       0,                        /* flag_setting (N/A).  */
1149       COSTS_N_INSNS (2),        /* extend.  */
1150       0,                        /* add (N/A).  */
1151       COSTS_N_INSNS (2),        /* extend_add.  */
1152       0                         /* idiv (N/A).  */
1153     }
1154   },
1155   /* LD/ST */
1156   {
1157     COSTS_N_INSNS (1),  /* load.  */
1158     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1159     COSTS_N_INSNS (1),  /* ldrd.  */
1160     COSTS_N_INSNS (1),  /* ldm_1st.  */
1161     1,                  /* ldm_regs_per_insn_1st.  */
1162     2,                  /* ldm_regs_per_insn_subsequent.  */
1163     COSTS_N_INSNS (1),  /* loadf.  */
1164     COSTS_N_INSNS (1),  /* loadd.  */
1165     COSTS_N_INSNS (1),  /* load_unaligned.  */
1166     COSTS_N_INSNS (1),  /* store.  */
1167     COSTS_N_INSNS (1),  /* strd.  */
1168     COSTS_N_INSNS (1),  /* stm_1st.  */
1169     1,                  /* stm_regs_per_insn_1st.  */
1170     2,                  /* stm_regs_per_insn_subsequent.  */
1171     COSTS_N_INSNS (1),  /* storef.  */
1172     COSTS_N_INSNS (1),  /* stored.  */
1173     COSTS_N_INSNS (1),  /* store_unaligned.  */
1174     COSTS_N_INSNS (1),  /* loadv.  */
1175     COSTS_N_INSNS (1)   /* storev.  */
1176   },
1177   {
1178     /* FP SFmode */
1179     {
1180       COSTS_N_INSNS (36),       /* div.  */
1181       COSTS_N_INSNS (11),       /* mult.  */
1182       COSTS_N_INSNS (20),       /* mult_addsub. */
1183       COSTS_N_INSNS (30),       /* fma.  */
1184       COSTS_N_INSNS (9),        /* addsub.  */
1185       COSTS_N_INSNS (3),        /* fpconst.  */
1186       COSTS_N_INSNS (3),        /* neg.  */
1187       COSTS_N_INSNS (6),        /* compare.  */
1188       COSTS_N_INSNS (4),        /* widen.  */
1189       COSTS_N_INSNS (4),        /* narrow.  */
1190       COSTS_N_INSNS (8),        /* toint.  */
1191       COSTS_N_INSNS (8),        /* fromint.  */
1192       COSTS_N_INSNS (8)         /* roundint.  */
1193     },
1194     /* FP DFmode */
1195     {
1196       COSTS_N_INSNS (64),       /* div.  */
1197       COSTS_N_INSNS (16),       /* mult.  */
1198       COSTS_N_INSNS (25),       /* mult_addsub.  */
1199       COSTS_N_INSNS (30),       /* fma.  */
1200       COSTS_N_INSNS (9),        /* addsub.  */
1201       COSTS_N_INSNS (3),        /* fpconst.  */
1202       COSTS_N_INSNS (3),        /* neg.  */
1203       COSTS_N_INSNS (6),        /* compare.  */
1204       COSTS_N_INSNS (6),        /* widen.  */
1205       COSTS_N_INSNS (6),        /* narrow.  */
1206       COSTS_N_INSNS (8),        /* toint.  */
1207       COSTS_N_INSNS (8),        /* fromint.  */
1208       COSTS_N_INSNS (8)         /* roundint.  */
1209     }
1210   },
1211   /* Vector */
1212   {
1213     COSTS_N_INSNS (1)   /* alu.  */
1214   }
1215 };
1216
1217 const struct cpu_cost_table cortexa5_extra_costs =
1218 {
1219   /* ALU */
1220   {
1221     0,                  /* arith.  */
1222     0,                  /* logical.  */
1223     COSTS_N_INSNS (1),  /* shift.  */
1224     COSTS_N_INSNS (1),  /* shift_reg.  */
1225     COSTS_N_INSNS (1),  /* arith_shift.  */
1226     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1227     COSTS_N_INSNS (1),  /* log_shift.  */
1228     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1229     COSTS_N_INSNS (1),  /* extend.  */
1230     COSTS_N_INSNS (1),  /* extend_arith.  */
1231     COSTS_N_INSNS (1),  /* bfi.  */
1232     COSTS_N_INSNS (1),  /* bfx.  */
1233     COSTS_N_INSNS (1),  /* clz.  */
1234     COSTS_N_INSNS (1),  /* rev.  */
1235     0,                  /* non_exec.  */
1236     true                /* non_exec_costs_exec.  */
1237   },
1238
1239   {
1240     /* MULT SImode */
1241     {
1242       0,                        /* simple.  */
1243       COSTS_N_INSNS (1),        /* flag_setting.  */
1244       COSTS_N_INSNS (1),        /* extend.  */
1245       COSTS_N_INSNS (1),        /* add.  */
1246       COSTS_N_INSNS (1),        /* extend_add.  */
1247       COSTS_N_INSNS (7)         /* idiv.  */
1248     },
1249     /* MULT DImode */
1250     {
1251       0,                        /* simple (N/A).  */
1252       0,                        /* flag_setting (N/A).  */
1253       COSTS_N_INSNS (1),        /* extend.  */
1254       0,                        /* add.  */
1255       COSTS_N_INSNS (2),        /* extend_add.  */
1256       0                         /* idiv (N/A).  */
1257     }
1258   },
1259   /* LD/ST */
1260   {
1261     COSTS_N_INSNS (1),  /* load.  */
1262     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1263     COSTS_N_INSNS (6),  /* ldrd.  */
1264     COSTS_N_INSNS (1),  /* ldm_1st.  */
1265     1,                  /* ldm_regs_per_insn_1st.  */
1266     2,                  /* ldm_regs_per_insn_subsequent.  */
1267     COSTS_N_INSNS (2),  /* loadf.  */
1268     COSTS_N_INSNS (4),  /* loadd.  */
1269     COSTS_N_INSNS (1),  /* load_unaligned.  */
1270     COSTS_N_INSNS (1),  /* store.  */
1271     COSTS_N_INSNS (3),  /* strd.  */
1272     COSTS_N_INSNS (1),  /* stm_1st.  */
1273     1,                  /* stm_regs_per_insn_1st.  */
1274     2,                  /* stm_regs_per_insn_subsequent.  */
1275     COSTS_N_INSNS (2),  /* storef.  */
1276     COSTS_N_INSNS (2),  /* stored.  */
1277     COSTS_N_INSNS (1),  /* store_unaligned.  */
1278     COSTS_N_INSNS (1),  /* loadv.  */
1279     COSTS_N_INSNS (1)   /* storev.  */
1280   },
1281   {
1282     /* FP SFmode */
1283     {
1284       COSTS_N_INSNS (15),       /* div.  */
1285       COSTS_N_INSNS (3),        /* mult.  */
1286       COSTS_N_INSNS (7),        /* mult_addsub. */
1287       COSTS_N_INSNS (7),        /* fma.  */
1288       COSTS_N_INSNS (3),        /* addsub.  */
1289       COSTS_N_INSNS (3),        /* fpconst.  */
1290       COSTS_N_INSNS (3),        /* neg.  */
1291       COSTS_N_INSNS (3),        /* compare.  */
1292       COSTS_N_INSNS (3),        /* widen.  */
1293       COSTS_N_INSNS (3),        /* narrow.  */
1294       COSTS_N_INSNS (3),        /* toint.  */
1295       COSTS_N_INSNS (3),        /* fromint.  */
1296       COSTS_N_INSNS (3)         /* roundint.  */
1297     },
1298     /* FP DFmode */
1299     {
1300       COSTS_N_INSNS (30),       /* div.  */
1301       COSTS_N_INSNS (6),        /* mult.  */
1302       COSTS_N_INSNS (10),       /* mult_addsub.  */
1303       COSTS_N_INSNS (7),        /* fma.  */
1304       COSTS_N_INSNS (3),        /* addsub.  */
1305       COSTS_N_INSNS (3),        /* fpconst.  */
1306       COSTS_N_INSNS (3),        /* neg.  */
1307       COSTS_N_INSNS (3),        /* compare.  */
1308       COSTS_N_INSNS (3),        /* widen.  */
1309       COSTS_N_INSNS (3),        /* narrow.  */
1310       COSTS_N_INSNS (3),        /* toint.  */
1311       COSTS_N_INSNS (3),        /* fromint.  */
1312       COSTS_N_INSNS (3)         /* roundint.  */
1313     }
1314   },
1315   /* Vector */
1316   {
1317     COSTS_N_INSNS (1)   /* alu.  */
1318   }
1319 };
1320
1321
1322 const struct cpu_cost_table cortexa7_extra_costs =
1323 {
1324   /* ALU */
1325   {
1326     0,                  /* arith.  */
1327     0,                  /* logical.  */
1328     COSTS_N_INSNS (1),  /* shift.  */
1329     COSTS_N_INSNS (1),  /* shift_reg.  */
1330     COSTS_N_INSNS (1),  /* arith_shift.  */
1331     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1332     COSTS_N_INSNS (1),  /* log_shift.  */
1333     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1334     COSTS_N_INSNS (1),  /* extend.  */
1335     COSTS_N_INSNS (1),  /* extend_arith.  */
1336     COSTS_N_INSNS (1),  /* bfi.  */
1337     COSTS_N_INSNS (1),  /* bfx.  */
1338     COSTS_N_INSNS (1),  /* clz.  */
1339     COSTS_N_INSNS (1),  /* rev.  */
1340     0,                  /* non_exec.  */
1341     true                /* non_exec_costs_exec.  */
1342   },
1343
1344   {
1345     /* MULT SImode */
1346     {
1347       0,                        /* simple.  */
1348       COSTS_N_INSNS (1),        /* flag_setting.  */
1349       COSTS_N_INSNS (1),        /* extend.  */
1350       COSTS_N_INSNS (1),        /* add.  */
1351       COSTS_N_INSNS (1),        /* extend_add.  */
1352       COSTS_N_INSNS (7)         /* idiv.  */
1353     },
1354     /* MULT DImode */
1355     {
1356       0,                        /* simple (N/A).  */
1357       0,                        /* flag_setting (N/A).  */
1358       COSTS_N_INSNS (1),        /* extend.  */
1359       0,                        /* add.  */
1360       COSTS_N_INSNS (2),        /* extend_add.  */
1361       0                         /* idiv (N/A).  */
1362     }
1363   },
1364   /* LD/ST */
1365   {
1366     COSTS_N_INSNS (1),  /* load.  */
1367     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1368     COSTS_N_INSNS (3),  /* ldrd.  */
1369     COSTS_N_INSNS (1),  /* ldm_1st.  */
1370     1,                  /* ldm_regs_per_insn_1st.  */
1371     2,                  /* ldm_regs_per_insn_subsequent.  */
1372     COSTS_N_INSNS (2),  /* loadf.  */
1373     COSTS_N_INSNS (2),  /* loadd.  */
1374     COSTS_N_INSNS (1),  /* load_unaligned.  */
1375     COSTS_N_INSNS (1),  /* store.  */
1376     COSTS_N_INSNS (3),  /* strd.  */
1377     COSTS_N_INSNS (1),  /* stm_1st.  */
1378     1,                  /* stm_regs_per_insn_1st.  */
1379     2,                  /* stm_regs_per_insn_subsequent.  */
1380     COSTS_N_INSNS (2),  /* storef.  */
1381     COSTS_N_INSNS (2),  /* stored.  */
1382     COSTS_N_INSNS (1),  /* store_unaligned.  */
1383     COSTS_N_INSNS (1),  /* loadv.  */
1384     COSTS_N_INSNS (1)   /* storev.  */
1385   },
1386   {
1387     /* FP SFmode */
1388     {
1389       COSTS_N_INSNS (15),       /* div.  */
1390       COSTS_N_INSNS (3),        /* mult.  */
1391       COSTS_N_INSNS (7),        /* mult_addsub. */
1392       COSTS_N_INSNS (7),        /* fma.  */
1393       COSTS_N_INSNS (3),        /* addsub.  */
1394       COSTS_N_INSNS (3),        /* fpconst.  */
1395       COSTS_N_INSNS (3),        /* neg.  */
1396       COSTS_N_INSNS (3),        /* compare.  */
1397       COSTS_N_INSNS (3),        /* widen.  */
1398       COSTS_N_INSNS (3),        /* narrow.  */
1399       COSTS_N_INSNS (3),        /* toint.  */
1400       COSTS_N_INSNS (3),        /* fromint.  */
1401       COSTS_N_INSNS (3)         /* roundint.  */
1402     },
1403     /* FP DFmode */
1404     {
1405       COSTS_N_INSNS (30),       /* div.  */
1406       COSTS_N_INSNS (6),        /* mult.  */
1407       COSTS_N_INSNS (10),       /* mult_addsub.  */
1408       COSTS_N_INSNS (7),        /* fma.  */
1409       COSTS_N_INSNS (3),        /* addsub.  */
1410       COSTS_N_INSNS (3),        /* fpconst.  */
1411       COSTS_N_INSNS (3),        /* neg.  */
1412       COSTS_N_INSNS (3),        /* compare.  */
1413       COSTS_N_INSNS (3),        /* widen.  */
1414       COSTS_N_INSNS (3),        /* narrow.  */
1415       COSTS_N_INSNS (3),        /* toint.  */
1416       COSTS_N_INSNS (3),        /* fromint.  */
1417       COSTS_N_INSNS (3)         /* roundint.  */
1418     }
1419   },
1420   /* Vector */
1421   {
1422     COSTS_N_INSNS (1)   /* alu.  */
1423   }
1424 };
1425
1426 const struct cpu_cost_table cortexa12_extra_costs =
1427 {
1428   /* ALU */
1429   {
1430     0,                  /* arith.  */
1431     0,                  /* logical.  */
1432     0,                  /* shift.  */
1433     COSTS_N_INSNS (1),  /* shift_reg.  */
1434     COSTS_N_INSNS (1),  /* arith_shift.  */
1435     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1436     COSTS_N_INSNS (1),  /* log_shift.  */
1437     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1438     0,                  /* extend.  */
1439     COSTS_N_INSNS (1),  /* extend_arith.  */
1440     0,                  /* bfi.  */
1441     COSTS_N_INSNS (1),  /* bfx.  */
1442     COSTS_N_INSNS (1),  /* clz.  */
1443     COSTS_N_INSNS (1),  /* rev.  */
1444     0,                  /* non_exec.  */
1445     true                /* non_exec_costs_exec.  */
1446   },
1447   /* MULT SImode */
1448   {
1449     {
1450       COSTS_N_INSNS (2),        /* simple.  */
1451       COSTS_N_INSNS (3),        /* flag_setting.  */
1452       COSTS_N_INSNS (2),        /* extend.  */
1453       COSTS_N_INSNS (3),        /* add.  */
1454       COSTS_N_INSNS (2),        /* extend_add.  */
1455       COSTS_N_INSNS (18)        /* idiv.  */
1456     },
1457     /* MULT DImode */
1458     {
1459       0,                        /* simple (N/A).  */
1460       0,                        /* flag_setting (N/A).  */
1461       COSTS_N_INSNS (3),        /* extend.  */
1462       0,                        /* add (N/A).  */
1463       COSTS_N_INSNS (3),        /* extend_add.  */
1464       0                         /* idiv (N/A).  */
1465     }
1466   },
1467   /* LD/ST */
1468   {
1469     COSTS_N_INSNS (3),  /* load.  */
1470     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1471     COSTS_N_INSNS (3),  /* ldrd.  */
1472     COSTS_N_INSNS (3),  /* ldm_1st.  */
1473     1,                  /* ldm_regs_per_insn_1st.  */
1474     2,                  /* ldm_regs_per_insn_subsequent.  */
1475     COSTS_N_INSNS (3),  /* loadf.  */
1476     COSTS_N_INSNS (3),  /* loadd.  */
1477     0,                  /* load_unaligned.  */
1478     0,                  /* store.  */
1479     0,                  /* strd.  */
1480     0,                  /* stm_1st.  */
1481     1,                  /* stm_regs_per_insn_1st.  */
1482     2,                  /* stm_regs_per_insn_subsequent.  */
1483     COSTS_N_INSNS (2),  /* storef.  */
1484     COSTS_N_INSNS (2),  /* stored.  */
1485     0,                  /* store_unaligned.  */
1486     COSTS_N_INSNS (1),  /* loadv.  */
1487     COSTS_N_INSNS (1)   /* storev.  */
1488   },
1489   {
1490     /* FP SFmode */
1491     {
1492       COSTS_N_INSNS (17),       /* div.  */
1493       COSTS_N_INSNS (4),        /* mult.  */
1494       COSTS_N_INSNS (8),        /* mult_addsub. */
1495       COSTS_N_INSNS (8),        /* fma.  */
1496       COSTS_N_INSNS (4),        /* addsub.  */
1497       COSTS_N_INSNS (2),        /* fpconst. */
1498       COSTS_N_INSNS (2),        /* neg.  */
1499       COSTS_N_INSNS (2),        /* compare.  */
1500       COSTS_N_INSNS (4),        /* widen.  */
1501       COSTS_N_INSNS (4),        /* narrow.  */
1502       COSTS_N_INSNS (4),        /* toint.  */
1503       COSTS_N_INSNS (4),        /* fromint.  */
1504       COSTS_N_INSNS (4)         /* roundint.  */
1505     },
1506     /* FP DFmode */
1507     {
1508       COSTS_N_INSNS (31),       /* div.  */
1509       COSTS_N_INSNS (4),        /* mult.  */
1510       COSTS_N_INSNS (8),        /* mult_addsub.  */
1511       COSTS_N_INSNS (8),        /* fma.  */
1512       COSTS_N_INSNS (4),        /* addsub.  */
1513       COSTS_N_INSNS (2),        /* fpconst.  */
1514       COSTS_N_INSNS (2),        /* neg.  */
1515       COSTS_N_INSNS (2),        /* compare.  */
1516       COSTS_N_INSNS (4),        /* widen.  */
1517       COSTS_N_INSNS (4),        /* narrow.  */
1518       COSTS_N_INSNS (4),        /* toint.  */
1519       COSTS_N_INSNS (4),        /* fromint.  */
1520       COSTS_N_INSNS (4)         /* roundint.  */
1521     }
1522   },
1523   /* Vector */
1524   {
1525     COSTS_N_INSNS (1)   /* alu.  */
1526   }
1527 };
1528
1529 const struct cpu_cost_table cortexa15_extra_costs =
1530 {
1531   /* ALU */
1532   {
1533     0,                  /* arith.  */
1534     0,                  /* logical.  */
1535     0,                  /* shift.  */
1536     0,                  /* shift_reg.  */
1537     COSTS_N_INSNS (1),  /* arith_shift.  */
1538     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1539     COSTS_N_INSNS (1),  /* log_shift.  */
1540     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1541     0,                  /* extend.  */
1542     COSTS_N_INSNS (1),  /* extend_arith.  */
1543     COSTS_N_INSNS (1),  /* bfi.  */
1544     0,                  /* bfx.  */
1545     0,                  /* clz.  */
1546     0,                  /* rev.  */
1547     0,                  /* non_exec.  */
1548     true                /* non_exec_costs_exec.  */
1549   },
1550   /* MULT SImode */
1551   {
1552     {
1553       COSTS_N_INSNS (2),        /* simple.  */
1554       COSTS_N_INSNS (3),        /* flag_setting.  */
1555       COSTS_N_INSNS (2),        /* extend.  */
1556       COSTS_N_INSNS (2),        /* add.  */
1557       COSTS_N_INSNS (2),        /* extend_add.  */
1558       COSTS_N_INSNS (18)        /* idiv.  */
1559     },
1560     /* MULT DImode */
1561     {
1562       0,                        /* simple (N/A).  */
1563       0,                        /* flag_setting (N/A).  */
1564       COSTS_N_INSNS (3),        /* extend.  */
1565       0,                        /* add (N/A).  */
1566       COSTS_N_INSNS (3),        /* extend_add.  */
1567       0                         /* idiv (N/A).  */
1568     }
1569   },
1570   /* LD/ST */
1571   {
1572     COSTS_N_INSNS (3),  /* load.  */
1573     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1574     COSTS_N_INSNS (3),  /* ldrd.  */
1575     COSTS_N_INSNS (4),  /* ldm_1st.  */
1576     1,                  /* ldm_regs_per_insn_1st.  */
1577     2,                  /* ldm_regs_per_insn_subsequent.  */
1578     COSTS_N_INSNS (4),  /* loadf.  */
1579     COSTS_N_INSNS (4),  /* loadd.  */
1580     0,                  /* load_unaligned.  */
1581     0,                  /* store.  */
1582     0,                  /* strd.  */
1583     COSTS_N_INSNS (1),  /* stm_1st.  */
1584     1,                  /* stm_regs_per_insn_1st.  */
1585     2,                  /* stm_regs_per_insn_subsequent.  */
1586     0,                  /* storef.  */
1587     0,                  /* stored.  */
1588     0,                  /* store_unaligned.  */
1589     COSTS_N_INSNS (1),  /* loadv.  */
1590     COSTS_N_INSNS (1)   /* storev.  */
1591   },
1592   {
1593     /* FP SFmode */
1594     {
1595       COSTS_N_INSNS (17),       /* div.  */
1596       COSTS_N_INSNS (4),        /* mult.  */
1597       COSTS_N_INSNS (8),        /* mult_addsub. */
1598       COSTS_N_INSNS (8),        /* fma.  */
1599       COSTS_N_INSNS (4),        /* addsub.  */
1600       COSTS_N_INSNS (2),        /* fpconst. */
1601       COSTS_N_INSNS (2),        /* neg.  */
1602       COSTS_N_INSNS (5),        /* compare.  */
1603       COSTS_N_INSNS (4),        /* widen.  */
1604       COSTS_N_INSNS (4),        /* narrow.  */
1605       COSTS_N_INSNS (4),        /* toint.  */
1606       COSTS_N_INSNS (4),        /* fromint.  */
1607       COSTS_N_INSNS (4)         /* roundint.  */
1608     },
1609     /* FP DFmode */
1610     {
1611       COSTS_N_INSNS (31),       /* div.  */
1612       COSTS_N_INSNS (4),        /* mult.  */
1613       COSTS_N_INSNS (8),        /* mult_addsub.  */
1614       COSTS_N_INSNS (8),        /* fma.  */
1615       COSTS_N_INSNS (4),        /* addsub.  */
1616       COSTS_N_INSNS (2),        /* fpconst.  */
1617       COSTS_N_INSNS (2),        /* neg.  */
1618       COSTS_N_INSNS (2),        /* compare.  */
1619       COSTS_N_INSNS (4),        /* widen.  */
1620       COSTS_N_INSNS (4),        /* narrow.  */
1621       COSTS_N_INSNS (4),        /* toint.  */
1622       COSTS_N_INSNS (4),        /* fromint.  */
1623       COSTS_N_INSNS (4)         /* roundint.  */
1624     }
1625   },
1626   /* Vector */
1627   {
1628     COSTS_N_INSNS (1)   /* alu.  */
1629   }
1630 };
1631
1632 const struct cpu_cost_table v7m_extra_costs =
1633 {
1634   /* ALU */
1635   {
1636     0,                  /* arith.  */
1637     0,                  /* logical.  */
1638     0,                  /* shift.  */
1639     0,                  /* shift_reg.  */
1640     0,                  /* arith_shift.  */
1641     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1642     0,                  /* log_shift.  */
1643     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1644     0,                  /* extend.  */
1645     COSTS_N_INSNS (1),  /* extend_arith.  */
1646     0,                  /* bfi.  */
1647     0,                  /* bfx.  */
1648     0,                  /* clz.  */
1649     0,                  /* rev.  */
1650     COSTS_N_INSNS (1),  /* non_exec.  */
1651     false               /* non_exec_costs_exec.  */
1652   },
1653   {
1654     /* MULT SImode */
1655     {
1656       COSTS_N_INSNS (1),        /* simple.  */
1657       COSTS_N_INSNS (1),        /* flag_setting.  */
1658       COSTS_N_INSNS (2),        /* extend.  */
1659       COSTS_N_INSNS (1),        /* add.  */
1660       COSTS_N_INSNS (3),        /* extend_add.  */
1661       COSTS_N_INSNS (8)         /* idiv.  */
1662     },
1663     /* MULT DImode */
1664     {
1665       0,                        /* simple (N/A).  */
1666       0,                        /* flag_setting (N/A).  */
1667       COSTS_N_INSNS (2),        /* extend.  */
1668       0,                        /* add (N/A).  */
1669       COSTS_N_INSNS (3),        /* extend_add.  */
1670       0                         /* idiv (N/A).  */
1671     }
1672   },
1673   /* LD/ST */
1674   {
1675     COSTS_N_INSNS (2),  /* load.  */
1676     0,                  /* load_sign_extend.  */
1677     COSTS_N_INSNS (3),  /* ldrd.  */
1678     COSTS_N_INSNS (2),  /* ldm_1st.  */
1679     1,                  /* ldm_regs_per_insn_1st.  */
1680     1,                  /* ldm_regs_per_insn_subsequent.  */
1681     COSTS_N_INSNS (2),  /* loadf.  */
1682     COSTS_N_INSNS (3),  /* loadd.  */
1683     COSTS_N_INSNS (1),  /* load_unaligned.  */
1684     COSTS_N_INSNS (2),  /* store.  */
1685     COSTS_N_INSNS (3),  /* strd.  */
1686     COSTS_N_INSNS (2),  /* stm_1st.  */
1687     1,                  /* stm_regs_per_insn_1st.  */
1688     1,                  /* stm_regs_per_insn_subsequent.  */
1689     COSTS_N_INSNS (2),  /* storef.  */
1690     COSTS_N_INSNS (3),  /* stored.  */
1691     COSTS_N_INSNS (1),  /* store_unaligned.  */
1692     COSTS_N_INSNS (1),  /* loadv.  */
1693     COSTS_N_INSNS (1)   /* storev.  */
1694   },
1695   {
1696     /* FP SFmode */
1697     {
1698       COSTS_N_INSNS (7),        /* div.  */
1699       COSTS_N_INSNS (2),        /* mult.  */
1700       COSTS_N_INSNS (5),        /* mult_addsub.  */
1701       COSTS_N_INSNS (3),        /* fma.  */
1702       COSTS_N_INSNS (1),        /* addsub.  */
1703       0,                        /* fpconst.  */
1704       0,                        /* neg.  */
1705       0,                        /* compare.  */
1706       0,                        /* widen.  */
1707       0,                        /* narrow.  */
1708       0,                        /* toint.  */
1709       0,                        /* fromint.  */
1710       0                         /* roundint.  */
1711     },
1712     /* FP DFmode */
1713     {
1714       COSTS_N_INSNS (15),       /* div.  */
1715       COSTS_N_INSNS (5),        /* mult.  */
1716       COSTS_N_INSNS (7),        /* mult_addsub.  */
1717       COSTS_N_INSNS (7),        /* fma.  */
1718       COSTS_N_INSNS (3),        /* addsub.  */
1719       0,                        /* fpconst.  */
1720       0,                        /* neg.  */
1721       0,                        /* compare.  */
1722       0,                        /* widen.  */
1723       0,                        /* narrow.  */
1724       0,                        /* toint.  */
1725       0,                        /* fromint.  */
1726       0                         /* roundint.  */
1727     }
1728   },
1729   /* Vector */
1730   {
1731     COSTS_N_INSNS (1)   /* alu.  */
1732   }
1733 };
1734
1735 const struct tune_params arm_slowmul_tune =
1736 {
1737   &generic_extra_costs,                 /* Insn extra costs.  */
1738   NULL,                                 /* Sched adj cost.  */
1739   arm_default_branch_cost,
1740   &arm_default_vec_cost,
1741   3,                                            /* Constant limit.  */
1742   5,                                            /* Max cond insns.  */
1743   8,                                            /* Memset max inline.  */
1744   1,                                            /* Issue rate.  */
1745   ARM_PREFETCH_NOT_BENEFICIAL,
1746   tune_params::PREF_CONST_POOL_TRUE,
1747   tune_params::PREF_LDRD_FALSE,
1748   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1749   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1750   tune_params::DISPARAGE_FLAGS_NEITHER,
1751   tune_params::PREF_NEON_64_FALSE,
1752   tune_params::PREF_NEON_STRINGOPS_FALSE,
1753   tune_params::FUSE_NOTHING,
1754   tune_params::SCHED_AUTOPREF_OFF
1755 };
1756
1757 const struct tune_params arm_fastmul_tune =
1758 {
1759   &generic_extra_costs,                 /* Insn extra costs.  */
1760   NULL,                                 /* Sched adj cost.  */
1761   arm_default_branch_cost,
1762   &arm_default_vec_cost,
1763   1,                                            /* Constant limit.  */
1764   5,                                            /* Max cond insns.  */
1765   8,                                            /* Memset max inline.  */
1766   1,                                            /* Issue rate.  */
1767   ARM_PREFETCH_NOT_BENEFICIAL,
1768   tune_params::PREF_CONST_POOL_TRUE,
1769   tune_params::PREF_LDRD_FALSE,
1770   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1771   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1772   tune_params::DISPARAGE_FLAGS_NEITHER,
1773   tune_params::PREF_NEON_64_FALSE,
1774   tune_params::PREF_NEON_STRINGOPS_FALSE,
1775   tune_params::FUSE_NOTHING,
1776   tune_params::SCHED_AUTOPREF_OFF
1777 };
1778
1779 /* StrongARM has early execution of branches, so a sequence that is worth
1780    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1781
1782 const struct tune_params arm_strongarm_tune =
1783 {
1784   &generic_extra_costs,                 /* Insn extra costs.  */
1785   NULL,                                 /* Sched adj cost.  */
1786   arm_default_branch_cost,
1787   &arm_default_vec_cost,
1788   1,                                            /* Constant limit.  */
1789   3,                                            /* Max cond insns.  */
1790   8,                                            /* Memset max inline.  */
1791   1,                                            /* Issue rate.  */
1792   ARM_PREFETCH_NOT_BENEFICIAL,
1793   tune_params::PREF_CONST_POOL_TRUE,
1794   tune_params::PREF_LDRD_FALSE,
1795   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1796   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1797   tune_params::DISPARAGE_FLAGS_NEITHER,
1798   tune_params::PREF_NEON_64_FALSE,
1799   tune_params::PREF_NEON_STRINGOPS_FALSE,
1800   tune_params::FUSE_NOTHING,
1801   tune_params::SCHED_AUTOPREF_OFF
1802 };
1803
1804 const struct tune_params arm_xscale_tune =
1805 {
1806   &generic_extra_costs,                 /* Insn extra costs.  */
1807   xscale_sched_adjust_cost,
1808   arm_default_branch_cost,
1809   &arm_default_vec_cost,
1810   2,                                            /* Constant limit.  */
1811   3,                                            /* Max cond insns.  */
1812   8,                                            /* Memset max inline.  */
1813   1,                                            /* Issue rate.  */
1814   ARM_PREFETCH_NOT_BENEFICIAL,
1815   tune_params::PREF_CONST_POOL_TRUE,
1816   tune_params::PREF_LDRD_FALSE,
1817   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1818   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1819   tune_params::DISPARAGE_FLAGS_NEITHER,
1820   tune_params::PREF_NEON_64_FALSE,
1821   tune_params::PREF_NEON_STRINGOPS_FALSE,
1822   tune_params::FUSE_NOTHING,
1823   tune_params::SCHED_AUTOPREF_OFF
1824 };
1825
1826 const struct tune_params arm_9e_tune =
1827 {
1828   &generic_extra_costs,                 /* Insn extra costs.  */
1829   NULL,                                 /* Sched adj cost.  */
1830   arm_default_branch_cost,
1831   &arm_default_vec_cost,
1832   1,                                            /* Constant limit.  */
1833   5,                                            /* Max cond insns.  */
1834   8,                                            /* Memset max inline.  */
1835   1,                                            /* Issue rate.  */
1836   ARM_PREFETCH_NOT_BENEFICIAL,
1837   tune_params::PREF_CONST_POOL_TRUE,
1838   tune_params::PREF_LDRD_FALSE,
1839   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1840   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1841   tune_params::DISPARAGE_FLAGS_NEITHER,
1842   tune_params::PREF_NEON_64_FALSE,
1843   tune_params::PREF_NEON_STRINGOPS_FALSE,
1844   tune_params::FUSE_NOTHING,
1845   tune_params::SCHED_AUTOPREF_OFF
1846 };
1847
1848 const struct tune_params arm_marvell_pj4_tune =
1849 {
1850   &generic_extra_costs,                 /* Insn extra costs.  */
1851   NULL,                                 /* Sched adj cost.  */
1852   arm_default_branch_cost,
1853   &arm_default_vec_cost,
1854   1,                                            /* Constant limit.  */
1855   5,                                            /* Max cond insns.  */
1856   8,                                            /* Memset max inline.  */
1857   2,                                            /* Issue rate.  */
1858   ARM_PREFETCH_NOT_BENEFICIAL,
1859   tune_params::PREF_CONST_POOL_TRUE,
1860   tune_params::PREF_LDRD_FALSE,
1861   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1862   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1863   tune_params::DISPARAGE_FLAGS_NEITHER,
1864   tune_params::PREF_NEON_64_FALSE,
1865   tune_params::PREF_NEON_STRINGOPS_FALSE,
1866   tune_params::FUSE_NOTHING,
1867   tune_params::SCHED_AUTOPREF_OFF
1868 };
1869
1870 const struct tune_params arm_v6t2_tune =
1871 {
1872   &generic_extra_costs,                 /* Insn extra costs.  */
1873   NULL,                                 /* Sched adj cost.  */
1874   arm_default_branch_cost,
1875   &arm_default_vec_cost,
1876   1,                                            /* Constant limit.  */
1877   5,                                            /* Max cond insns.  */
1878   8,                                            /* Memset max inline.  */
1879   1,                                            /* Issue rate.  */
1880   ARM_PREFETCH_NOT_BENEFICIAL,
1881   tune_params::PREF_CONST_POOL_FALSE,
1882   tune_params::PREF_LDRD_FALSE,
1883   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1884   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1885   tune_params::DISPARAGE_FLAGS_NEITHER,
1886   tune_params::PREF_NEON_64_FALSE,
1887   tune_params::PREF_NEON_STRINGOPS_FALSE,
1888   tune_params::FUSE_NOTHING,
1889   tune_params::SCHED_AUTOPREF_OFF
1890 };
1891
1892
1893 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1894 const struct tune_params arm_cortex_tune =
1895 {
1896   &generic_extra_costs,
1897   NULL,                                 /* Sched adj cost.  */
1898   arm_default_branch_cost,
1899   &arm_default_vec_cost,
1900   1,                                            /* Constant limit.  */
1901   5,                                            /* Max cond insns.  */
1902   8,                                            /* Memset max inline.  */
1903   2,                                            /* Issue rate.  */
1904   ARM_PREFETCH_NOT_BENEFICIAL,
1905   tune_params::PREF_CONST_POOL_FALSE,
1906   tune_params::PREF_LDRD_FALSE,
1907   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1908   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1909   tune_params::DISPARAGE_FLAGS_NEITHER,
1910   tune_params::PREF_NEON_64_FALSE,
1911   tune_params::PREF_NEON_STRINGOPS_FALSE,
1912   tune_params::FUSE_NOTHING,
1913   tune_params::SCHED_AUTOPREF_OFF
1914 };
1915
1916 const struct tune_params arm_cortex_a8_tune =
1917 {
1918   &cortexa8_extra_costs,
1919   NULL,                                 /* Sched adj cost.  */
1920   arm_default_branch_cost,
1921   &arm_default_vec_cost,
1922   1,                                            /* Constant limit.  */
1923   5,                                            /* Max cond insns.  */
1924   8,                                            /* Memset max inline.  */
1925   2,                                            /* Issue rate.  */
1926   ARM_PREFETCH_NOT_BENEFICIAL,
1927   tune_params::PREF_CONST_POOL_FALSE,
1928   tune_params::PREF_LDRD_FALSE,
1929   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1930   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1931   tune_params::DISPARAGE_FLAGS_NEITHER,
1932   tune_params::PREF_NEON_64_FALSE,
1933   tune_params::PREF_NEON_STRINGOPS_TRUE,
1934   tune_params::FUSE_NOTHING,
1935   tune_params::SCHED_AUTOPREF_OFF
1936 };
1937
1938 const struct tune_params arm_cortex_a7_tune =
1939 {
1940   &cortexa7_extra_costs,
1941   NULL,                                 /* Sched adj cost.  */
1942   arm_default_branch_cost,
1943   &arm_default_vec_cost,
1944   1,                                            /* Constant limit.  */
1945   5,                                            /* Max cond insns.  */
1946   8,                                            /* Memset max inline.  */
1947   2,                                            /* Issue rate.  */
1948   ARM_PREFETCH_NOT_BENEFICIAL,
1949   tune_params::PREF_CONST_POOL_FALSE,
1950   tune_params::PREF_LDRD_FALSE,
1951   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1952   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1953   tune_params::DISPARAGE_FLAGS_NEITHER,
1954   tune_params::PREF_NEON_64_FALSE,
1955   tune_params::PREF_NEON_STRINGOPS_TRUE,
1956   tune_params::FUSE_NOTHING,
1957   tune_params::SCHED_AUTOPREF_OFF
1958 };
1959
1960 const struct tune_params arm_cortex_a15_tune =
1961 {
1962   &cortexa15_extra_costs,
1963   NULL,                                 /* Sched adj cost.  */
1964   arm_default_branch_cost,
1965   &arm_default_vec_cost,
1966   1,                                            /* Constant limit.  */
1967   2,                                            /* Max cond insns.  */
1968   8,                                            /* Memset max inline.  */
1969   3,                                            /* Issue rate.  */
1970   ARM_PREFETCH_NOT_BENEFICIAL,
1971   tune_params::PREF_CONST_POOL_FALSE,
1972   tune_params::PREF_LDRD_TRUE,
1973   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1974   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1975   tune_params::DISPARAGE_FLAGS_ALL,
1976   tune_params::PREF_NEON_64_FALSE,
1977   tune_params::PREF_NEON_STRINGOPS_TRUE,
1978   tune_params::FUSE_NOTHING,
1979   tune_params::SCHED_AUTOPREF_FULL
1980 };
1981
1982 const struct tune_params arm_cortex_a35_tune =
1983 {
1984   &cortexa53_extra_costs,
1985   NULL,                                 /* Sched adj cost.  */
1986   arm_default_branch_cost,
1987   &arm_default_vec_cost,
1988   1,                                            /* Constant limit.  */
1989   5,                                            /* Max cond insns.  */
1990   8,                                            /* Memset max inline.  */
1991   1,                                            /* Issue rate.  */
1992   ARM_PREFETCH_NOT_BENEFICIAL,
1993   tune_params::PREF_CONST_POOL_FALSE,
1994   tune_params::PREF_LDRD_FALSE,
1995   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1996   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1997   tune_params::DISPARAGE_FLAGS_NEITHER,
1998   tune_params::PREF_NEON_64_FALSE,
1999   tune_params::PREF_NEON_STRINGOPS_TRUE,
2000   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2001   tune_params::SCHED_AUTOPREF_OFF
2002 };
2003
2004 const struct tune_params arm_cortex_a53_tune =
2005 {
2006   &cortexa53_extra_costs,
2007   NULL,                                 /* Sched adj cost.  */
2008   arm_default_branch_cost,
2009   &arm_default_vec_cost,
2010   1,                                            /* Constant limit.  */
2011   5,                                            /* Max cond insns.  */
2012   8,                                            /* Memset max inline.  */
2013   2,                                            /* Issue rate.  */
2014   ARM_PREFETCH_NOT_BENEFICIAL,
2015   tune_params::PREF_CONST_POOL_FALSE,
2016   tune_params::PREF_LDRD_FALSE,
2017   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2018   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2019   tune_params::DISPARAGE_FLAGS_NEITHER,
2020   tune_params::PREF_NEON_64_FALSE,
2021   tune_params::PREF_NEON_STRINGOPS_TRUE,
2022   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2023   tune_params::SCHED_AUTOPREF_OFF
2024 };
2025
2026 const struct tune_params arm_cortex_a57_tune =
2027 {
2028   &cortexa57_extra_costs,
2029   NULL,                                 /* Sched adj cost.  */
2030   arm_default_branch_cost,
2031   &arm_default_vec_cost,
2032   1,                                            /* Constant limit.  */
2033   2,                                            /* Max cond insns.  */
2034   8,                                            /* Memset max inline.  */
2035   3,                                            /* Issue rate.  */
2036   ARM_PREFETCH_NOT_BENEFICIAL,
2037   tune_params::PREF_CONST_POOL_FALSE,
2038   tune_params::PREF_LDRD_TRUE,
2039   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2040   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2041   tune_params::DISPARAGE_FLAGS_ALL,
2042   tune_params::PREF_NEON_64_FALSE,
2043   tune_params::PREF_NEON_STRINGOPS_TRUE,
2044   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2045   tune_params::SCHED_AUTOPREF_FULL
2046 };
2047
2048 const struct tune_params arm_exynosm1_tune =
2049 {
2050   &exynosm1_extra_costs,
2051   NULL,                                         /* Sched adj cost.  */
2052   arm_default_branch_cost,
2053   &arm_default_vec_cost,
2054   1,                                            /* Constant limit.  */
2055   2,                                            /* Max cond insns.  */
2056   8,                                            /* Memset max inline.  */
2057   3,                                            /* Issue rate.  */
2058   ARM_PREFETCH_NOT_BENEFICIAL,
2059   tune_params::PREF_CONST_POOL_FALSE,
2060   tune_params::PREF_LDRD_TRUE,
2061   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2062   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2063   tune_params::DISPARAGE_FLAGS_ALL,
2064   tune_params::PREF_NEON_64_FALSE,
2065   tune_params::PREF_NEON_STRINGOPS_TRUE,
2066   tune_params::FUSE_NOTHING,
2067   tune_params::SCHED_AUTOPREF_OFF
2068 };
2069
2070 const struct tune_params arm_xgene1_tune =
2071 {
2072   &xgene1_extra_costs,
2073   NULL,                                 /* Sched adj cost.  */
2074   arm_default_branch_cost,
2075   &arm_default_vec_cost,
2076   1,                                            /* Constant limit.  */
2077   2,                                            /* Max cond insns.  */
2078   32,                                           /* Memset max inline.  */
2079   4,                                            /* Issue rate.  */
2080   ARM_PREFETCH_NOT_BENEFICIAL,
2081   tune_params::PREF_CONST_POOL_FALSE,
2082   tune_params::PREF_LDRD_TRUE,
2083   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2084   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2085   tune_params::DISPARAGE_FLAGS_ALL,
2086   tune_params::PREF_NEON_64_FALSE,
2087   tune_params::PREF_NEON_STRINGOPS_FALSE,
2088   tune_params::FUSE_NOTHING,
2089   tune_params::SCHED_AUTOPREF_OFF
2090 };
2091
2092 const struct tune_params arm_qdf24xx_tune =
2093 {
2094   &qdf24xx_extra_costs,
2095   NULL,                                         /* Scheduler cost adjustment.  */
2096   arm_default_branch_cost,
2097   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2098   1,                                            /* Constant limit.  */
2099   2,                                            /* Max cond insns.  */
2100   8,                                            /* Memset max inline.  */
2101   4,                                            /* Issue rate.  */
2102   ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2103   tune_params::PREF_CONST_POOL_FALSE,
2104   tune_params::PREF_LDRD_TRUE,
2105   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,   /* Thumb.  */
2106   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,   /* ARM.  */
2107   tune_params::DISPARAGE_FLAGS_ALL,
2108   tune_params::PREF_NEON_64_FALSE,
2109   tune_params::PREF_NEON_STRINGOPS_TRUE,
2110   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2111   tune_params::SCHED_AUTOPREF_FULL
2112 };
2113
2114 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2115    less appealing.  Set max_insns_skipped to a low value.  */
2116
2117 const struct tune_params arm_cortex_a5_tune =
2118 {
2119   &cortexa5_extra_costs,
2120   NULL,                                 /* Sched adj cost.  */
2121   arm_cortex_a5_branch_cost,
2122   &arm_default_vec_cost,
2123   1,                                            /* Constant limit.  */
2124   1,                                            /* Max cond insns.  */
2125   8,                                            /* Memset max inline.  */
2126   2,                                            /* Issue rate.  */
2127   ARM_PREFETCH_NOT_BENEFICIAL,
2128   tune_params::PREF_CONST_POOL_FALSE,
2129   tune_params::PREF_LDRD_FALSE,
2130   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2131   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2132   tune_params::DISPARAGE_FLAGS_NEITHER,
2133   tune_params::PREF_NEON_64_FALSE,
2134   tune_params::PREF_NEON_STRINGOPS_TRUE,
2135   tune_params::FUSE_NOTHING,
2136   tune_params::SCHED_AUTOPREF_OFF
2137 };
2138
2139 const struct tune_params arm_cortex_a9_tune =
2140 {
2141   &cortexa9_extra_costs,
2142   cortex_a9_sched_adjust_cost,
2143   arm_default_branch_cost,
2144   &arm_default_vec_cost,
2145   1,                                            /* Constant limit.  */
2146   5,                                            /* Max cond insns.  */
2147   8,                                            /* Memset max inline.  */
2148   2,                                            /* Issue rate.  */
2149   ARM_PREFETCH_BENEFICIAL(4,32,32),
2150   tune_params::PREF_CONST_POOL_FALSE,
2151   tune_params::PREF_LDRD_FALSE,
2152   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2153   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2154   tune_params::DISPARAGE_FLAGS_NEITHER,
2155   tune_params::PREF_NEON_64_FALSE,
2156   tune_params::PREF_NEON_STRINGOPS_FALSE,
2157   tune_params::FUSE_NOTHING,
2158   tune_params::SCHED_AUTOPREF_OFF
2159 };
2160
2161 const struct tune_params arm_cortex_a12_tune =
2162 {
2163   &cortexa12_extra_costs,
2164   NULL,                                 /* Sched adj cost.  */
2165   arm_default_branch_cost,
2166   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2167   1,                                            /* Constant limit.  */
2168   2,                                            /* Max cond insns.  */
2169   8,                                            /* Memset max inline.  */
2170   2,                                            /* Issue rate.  */
2171   ARM_PREFETCH_NOT_BENEFICIAL,
2172   tune_params::PREF_CONST_POOL_FALSE,
2173   tune_params::PREF_LDRD_TRUE,
2174   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2175   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2176   tune_params::DISPARAGE_FLAGS_ALL,
2177   tune_params::PREF_NEON_64_FALSE,
2178   tune_params::PREF_NEON_STRINGOPS_TRUE,
2179   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2180   tune_params::SCHED_AUTOPREF_OFF
2181 };
2182
2183 const struct tune_params arm_cortex_a73_tune =
2184 {
2185   &cortexa57_extra_costs,
2186   NULL,                                         /* Sched adj cost.  */
2187   arm_default_branch_cost,
2188   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2189   1,                                            /* Constant limit.  */
2190   2,                                            /* Max cond insns.  */
2191   8,                                            /* Memset max inline.  */
2192   2,                                            /* Issue rate.  */
2193   ARM_PREFETCH_NOT_BENEFICIAL,
2194   tune_params::PREF_CONST_POOL_FALSE,
2195   tune_params::PREF_LDRD_TRUE,
2196   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2197   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2198   tune_params::DISPARAGE_FLAGS_ALL,
2199   tune_params::PREF_NEON_64_FALSE,
2200   tune_params::PREF_NEON_STRINGOPS_TRUE,
2201   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2202   tune_params::SCHED_AUTOPREF_FULL
2203 };
2204
2205 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2206    cycle to execute each.  An LDR from the constant pool also takes two cycles
2207    to execute, but mildly increases pipelining opportunity (consecutive
2208    loads/stores can be pipelined together, saving one cycle), and may also
2209    improve icache utilisation.  Hence we prefer the constant pool for such
2210    processors.  */
2211
2212 const struct tune_params arm_v7m_tune =
2213 {
2214   &v7m_extra_costs,
2215   NULL,                                 /* Sched adj cost.  */
2216   arm_cortex_m_branch_cost,
2217   &arm_default_vec_cost,
2218   1,                                            /* Constant limit.  */
2219   2,                                            /* Max cond insns.  */
2220   8,                                            /* Memset max inline.  */
2221   1,                                            /* Issue rate.  */
2222   ARM_PREFETCH_NOT_BENEFICIAL,
2223   tune_params::PREF_CONST_POOL_TRUE,
2224   tune_params::PREF_LDRD_FALSE,
2225   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2226   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2227   tune_params::DISPARAGE_FLAGS_NEITHER,
2228   tune_params::PREF_NEON_64_FALSE,
2229   tune_params::PREF_NEON_STRINGOPS_FALSE,
2230   tune_params::FUSE_NOTHING,
2231   tune_params::SCHED_AUTOPREF_OFF
2232 };
2233
2234 /* Cortex-M7 tuning.  */
2235
2236 const struct tune_params arm_cortex_m7_tune =
2237 {
2238   &v7m_extra_costs,
2239   NULL,                                 /* Sched adj cost.  */
2240   arm_cortex_m7_branch_cost,
2241   &arm_default_vec_cost,
2242   0,                                            /* Constant limit.  */
2243   1,                                            /* Max cond insns.  */
2244   8,                                            /* Memset max inline.  */
2245   2,                                            /* Issue rate.  */
2246   ARM_PREFETCH_NOT_BENEFICIAL,
2247   tune_params::PREF_CONST_POOL_TRUE,
2248   tune_params::PREF_LDRD_FALSE,
2249   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2250   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2251   tune_params::DISPARAGE_FLAGS_NEITHER,
2252   tune_params::PREF_NEON_64_FALSE,
2253   tune_params::PREF_NEON_STRINGOPS_FALSE,
2254   tune_params::FUSE_NOTHING,
2255   tune_params::SCHED_AUTOPREF_OFF
2256 };
2257
2258 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2259    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2260    cortex-m23.  */
2261 const struct tune_params arm_v6m_tune =
2262 {
2263   &generic_extra_costs,                 /* Insn extra costs.  */
2264   NULL,                                 /* Sched adj cost.  */
2265   arm_default_branch_cost,
2266   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2267   1,                                            /* Constant limit.  */
2268   5,                                            /* Max cond insns.  */
2269   8,                                            /* Memset max inline.  */
2270   1,                                            /* Issue rate.  */
2271   ARM_PREFETCH_NOT_BENEFICIAL,
2272   tune_params::PREF_CONST_POOL_FALSE,
2273   tune_params::PREF_LDRD_FALSE,
2274   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2275   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2276   tune_params::DISPARAGE_FLAGS_NEITHER,
2277   tune_params::PREF_NEON_64_FALSE,
2278   tune_params::PREF_NEON_STRINGOPS_FALSE,
2279   tune_params::FUSE_NOTHING,
2280   tune_params::SCHED_AUTOPREF_OFF
2281 };
2282
2283 const struct tune_params arm_fa726te_tune =
2284 {
2285   &generic_extra_costs,                         /* Insn extra costs.  */
2286   fa726te_sched_adjust_cost,
2287   arm_default_branch_cost,
2288   &arm_default_vec_cost,
2289   1,                                            /* Constant limit.  */
2290   5,                                            /* Max cond insns.  */
2291   8,                                            /* Memset max inline.  */
2292   2,                                            /* Issue rate.  */
2293   ARM_PREFETCH_NOT_BENEFICIAL,
2294   tune_params::PREF_CONST_POOL_TRUE,
2295   tune_params::PREF_LDRD_FALSE,
2296   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2297   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2298   tune_params::DISPARAGE_FLAGS_NEITHER,
2299   tune_params::PREF_NEON_64_FALSE,
2300   tune_params::PREF_NEON_STRINGOPS_FALSE,
2301   tune_params::FUSE_NOTHING,
2302   tune_params::SCHED_AUTOPREF_OFF
2303 };
2304
2305 /* Auto-generated CPU, FPU and architecture tables.  */
2306 #include "arm-cpu-data.h"
2307
2308 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2309    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2310    is thus chosen to be big enough to hold the longest architecture name.  */
2311
2312 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2313
2314 /* Supported TLS relocations.  */
2315
2316 enum tls_reloc {
2317   TLS_GD32,
2318   TLS_LDM32,
2319   TLS_LDO32,
2320   TLS_IE32,
2321   TLS_LE32,
2322   TLS_DESCSEQ   /* GNU scheme */
2323 };
2324
2325 /* The maximum number of insns to be used when loading a constant.  */
2326 inline static int
2327 arm_constant_limit (bool size_p)
2328 {
2329   return size_p ? 1 : current_tune->constant_limit;
2330 }
2331
2332 /* Emit an insn that's a simple single-set.  Both the operands must be known
2333    to be valid.  */
2334 inline static rtx_insn *
2335 emit_set_insn (rtx x, rtx y)
2336 {
2337   return emit_insn (gen_rtx_SET (x, y));
2338 }
2339
2340 /* Return the number of bits set in VALUE.  */
2341 static unsigned
2342 bit_count (unsigned long value)
2343 {
2344   unsigned long count = 0;
2345
2346   while (value)
2347     {
2348       count++;
2349       value &= value - 1;  /* Clear the least-significant set bit.  */
2350     }
2351
2352   return count;
2353 }
2354
2355 /* Return the number of bits set in BMAP.  */
2356 static unsigned
2357 bitmap_popcount (const sbitmap bmap)
2358 {
2359   unsigned int count = 0;
2360   unsigned int n = 0;
2361   sbitmap_iterator sbi;
2362
2363   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2364     count++;
2365   return count;
2366 }
2367
2368 typedef struct
2369 {
2370   machine_mode mode;
2371   const char *name;
2372 } arm_fixed_mode_set;
2373
2374 /* A small helper for setting fixed-point library libfuncs.  */
2375
2376 static void
2377 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2378                              const char *funcname, const char *modename,
2379                              int num_suffix)
2380 {
2381   char buffer[50];
2382
2383   if (num_suffix == 0)
2384     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2385   else
2386     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2387
2388   set_optab_libfunc (optable, mode, buffer);
2389 }
2390
2391 static void
2392 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2393                             machine_mode from, const char *funcname,
2394                             const char *toname, const char *fromname)
2395 {
2396   char buffer[50];
2397   const char *maybe_suffix_2 = "";
2398
2399   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2400   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2401       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2402       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2403     maybe_suffix_2 = "2";
2404
2405   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2406            maybe_suffix_2);
2407
2408   set_conv_libfunc (optable, to, from, buffer);
2409 }
2410
2411 /* Set up library functions unique to ARM.  */
2412
2413 static void
2414 arm_init_libfuncs (void)
2415 {
2416   /* For Linux, we have access to kernel support for atomic operations.  */
2417   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2418     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2419
2420   /* There are no special library functions unless we are using the
2421      ARM BPABI.  */
2422   if (!TARGET_BPABI)
2423     return;
2424
2425   /* The functions below are described in Section 4 of the "Run-Time
2426      ABI for the ARM architecture", Version 1.0.  */
2427
2428   /* Double-precision floating-point arithmetic.  Table 2.  */
2429   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2430   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2431   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2432   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2433   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2434
2435   /* Double-precision comparisons.  Table 3.  */
2436   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2437   set_optab_libfunc (ne_optab, DFmode, NULL);
2438   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2439   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2440   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2441   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2442   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2443
2444   /* Single-precision floating-point arithmetic.  Table 4.  */
2445   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2446   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2447   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2448   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2449   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2450
2451   /* Single-precision comparisons.  Table 5.  */
2452   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2453   set_optab_libfunc (ne_optab, SFmode, NULL);
2454   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2455   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2456   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2457   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2458   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2459
2460   /* Floating-point to integer conversions.  Table 6.  */
2461   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2462   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2463   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2464   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2465   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2466   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2467   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2468   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2469
2470   /* Conversions between floating types.  Table 7.  */
2471   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2472   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2473
2474   /* Integer to floating-point conversions.  Table 8.  */
2475   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2476   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2477   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2478   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2479   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2480   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2481   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2482   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2483
2484   /* Long long.  Table 9.  */
2485   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2486   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2487   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2488   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2489   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2490   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2491   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2492   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2493
2494   /* Integer (32/32->32) division.  \S 4.3.1.  */
2495   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2496   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2497
2498   /* The divmod functions are designed so that they can be used for
2499      plain division, even though they return both the quotient and the
2500      remainder.  The quotient is returned in the usual location (i.e.,
2501      r0 for SImode, {r0, r1} for DImode), just as would be expected
2502      for an ordinary division routine.  Because the AAPCS calling
2503      conventions specify that all of { r0, r1, r2, r3 } are
2504      callee-saved registers, there is no need to tell the compiler
2505      explicitly that those registers are clobbered by these
2506      routines.  */
2507   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2508   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2509
2510   /* For SImode division the ABI provides div-without-mod routines,
2511      which are faster.  */
2512   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2513   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2514
2515   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2516      divmod libcalls instead.  */
2517   set_optab_libfunc (smod_optab, DImode, NULL);
2518   set_optab_libfunc (umod_optab, DImode, NULL);
2519   set_optab_libfunc (smod_optab, SImode, NULL);
2520   set_optab_libfunc (umod_optab, SImode, NULL);
2521
2522   /* Half-precision float operations.  The compiler handles all operations
2523      with NULL libfuncs by converting the SFmode.  */
2524   switch (arm_fp16_format)
2525     {
2526     case ARM_FP16_FORMAT_IEEE:
2527     case ARM_FP16_FORMAT_ALTERNATIVE:
2528
2529       /* Conversions.  */
2530       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2531                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2532                          ? "__gnu_f2h_ieee"
2533                          : "__gnu_f2h_alternative"));
2534       set_conv_libfunc (sext_optab, SFmode, HFmode,
2535                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2536                          ? "__gnu_h2f_ieee"
2537                          : "__gnu_h2f_alternative"));
2538
2539       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2540                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2541                          ? "__gnu_d2h_ieee"
2542                          : "__gnu_d2h_alternative"));
2543
2544       /* Arithmetic.  */
2545       set_optab_libfunc (add_optab, HFmode, NULL);
2546       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2547       set_optab_libfunc (smul_optab, HFmode, NULL);
2548       set_optab_libfunc (neg_optab, HFmode, NULL);
2549       set_optab_libfunc (sub_optab, HFmode, NULL);
2550
2551       /* Comparisons.  */
2552       set_optab_libfunc (eq_optab, HFmode, NULL);
2553       set_optab_libfunc (ne_optab, HFmode, NULL);
2554       set_optab_libfunc (lt_optab, HFmode, NULL);
2555       set_optab_libfunc (le_optab, HFmode, NULL);
2556       set_optab_libfunc (ge_optab, HFmode, NULL);
2557       set_optab_libfunc (gt_optab, HFmode, NULL);
2558       set_optab_libfunc (unord_optab, HFmode, NULL);
2559       break;
2560
2561     default:
2562       break;
2563     }
2564
2565   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2566   {
2567     const arm_fixed_mode_set fixed_arith_modes[] =
2568       {
2569         { QQmode, "qq" },
2570         { UQQmode, "uqq" },
2571         { HQmode, "hq" },
2572         { UHQmode, "uhq" },
2573         { SQmode, "sq" },
2574         { USQmode, "usq" },
2575         { DQmode, "dq" },
2576         { UDQmode, "udq" },
2577         { TQmode, "tq" },
2578         { UTQmode, "utq" },
2579         { HAmode, "ha" },
2580         { UHAmode, "uha" },
2581         { SAmode, "sa" },
2582         { USAmode, "usa" },
2583         { DAmode, "da" },
2584         { UDAmode, "uda" },
2585         { TAmode, "ta" },
2586         { UTAmode, "uta" }
2587       };
2588     const arm_fixed_mode_set fixed_conv_modes[] =
2589       {
2590         { QQmode, "qq" },
2591         { UQQmode, "uqq" },
2592         { HQmode, "hq" },
2593         { UHQmode, "uhq" },
2594         { SQmode, "sq" },
2595         { USQmode, "usq" },
2596         { DQmode, "dq" },
2597         { UDQmode, "udq" },
2598         { TQmode, "tq" },
2599         { UTQmode, "utq" },
2600         { HAmode, "ha" },
2601         { UHAmode, "uha" },
2602         { SAmode, "sa" },
2603         { USAmode, "usa" },
2604         { DAmode, "da" },
2605         { UDAmode, "uda" },
2606         { TAmode, "ta" },
2607         { UTAmode, "uta" },
2608         { QImode, "qi" },
2609         { HImode, "hi" },
2610         { SImode, "si" },
2611         { DImode, "di" },
2612         { TImode, "ti" },
2613         { SFmode, "sf" },
2614         { DFmode, "df" }
2615       };
2616     unsigned int i, j;
2617
2618     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2619       {
2620         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2621                                      "add", fixed_arith_modes[i].name, 3);
2622         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2623                                      "ssadd", fixed_arith_modes[i].name, 3);
2624         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2625                                      "usadd", fixed_arith_modes[i].name, 3);
2626         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2627                                      "sub", fixed_arith_modes[i].name, 3);
2628         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2629                                      "sssub", fixed_arith_modes[i].name, 3);
2630         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2631                                      "ussub", fixed_arith_modes[i].name, 3);
2632         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2633                                      "mul", fixed_arith_modes[i].name, 3);
2634         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2635                                      "ssmul", fixed_arith_modes[i].name, 3);
2636         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2637                                      "usmul", fixed_arith_modes[i].name, 3);
2638         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2639                                      "div", fixed_arith_modes[i].name, 3);
2640         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2641                                      "udiv", fixed_arith_modes[i].name, 3);
2642         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2643                                      "ssdiv", fixed_arith_modes[i].name, 3);
2644         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2645                                      "usdiv", fixed_arith_modes[i].name, 3);
2646         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2647                                      "neg", fixed_arith_modes[i].name, 2);
2648         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2649                                      "ssneg", fixed_arith_modes[i].name, 2);
2650         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2651                                      "usneg", fixed_arith_modes[i].name, 2);
2652         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2653                                      "ashl", fixed_arith_modes[i].name, 3);
2654         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2655                                      "ashr", fixed_arith_modes[i].name, 3);
2656         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2657                                      "lshr", fixed_arith_modes[i].name, 3);
2658         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2659                                      "ssashl", fixed_arith_modes[i].name, 3);
2660         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2661                                      "usashl", fixed_arith_modes[i].name, 3);
2662         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2663                                      "cmp", fixed_arith_modes[i].name, 2);
2664       }
2665
2666     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2667       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2668         {
2669           if (i == j
2670               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2671                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2672             continue;
2673
2674           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2675                                       fixed_conv_modes[j].mode, "fract",
2676                                       fixed_conv_modes[i].name,
2677                                       fixed_conv_modes[j].name);
2678           arm_set_fixed_conv_libfunc (satfract_optab,
2679                                       fixed_conv_modes[i].mode,
2680                                       fixed_conv_modes[j].mode, "satfract",
2681                                       fixed_conv_modes[i].name,
2682                                       fixed_conv_modes[j].name);
2683           arm_set_fixed_conv_libfunc (fractuns_optab,
2684                                       fixed_conv_modes[i].mode,
2685                                       fixed_conv_modes[j].mode, "fractuns",
2686                                       fixed_conv_modes[i].name,
2687                                       fixed_conv_modes[j].name);
2688           arm_set_fixed_conv_libfunc (satfractuns_optab,
2689                                       fixed_conv_modes[i].mode,
2690                                       fixed_conv_modes[j].mode, "satfractuns",
2691                                       fixed_conv_modes[i].name,
2692                                       fixed_conv_modes[j].name);
2693         }
2694   }
2695
2696   if (TARGET_AAPCS_BASED)
2697     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2698 }
2699
2700 /* On AAPCS systems, this is the "struct __va_list".  */
2701 static GTY(()) tree va_list_type;
2702
2703 /* Return the type to use as __builtin_va_list.  */
2704 static tree
2705 arm_build_builtin_va_list (void)
2706 {
2707   tree va_list_name;
2708   tree ap_field;
2709
2710   if (!TARGET_AAPCS_BASED)
2711     return std_build_builtin_va_list ();
2712
2713   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2714      defined as:
2715
2716        struct __va_list
2717        {
2718          void *__ap;
2719        };
2720
2721      The C Library ABI further reinforces this definition in \S
2722      4.1.
2723
2724      We must follow this definition exactly.  The structure tag
2725      name is visible in C++ mangled names, and thus forms a part
2726      of the ABI.  The field name may be used by people who
2727      #include <stdarg.h>.  */
2728   /* Create the type.  */
2729   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2730   /* Give it the required name.  */
2731   va_list_name = build_decl (BUILTINS_LOCATION,
2732                              TYPE_DECL,
2733                              get_identifier ("__va_list"),
2734                              va_list_type);
2735   DECL_ARTIFICIAL (va_list_name) = 1;
2736   TYPE_NAME (va_list_type) = va_list_name;
2737   TYPE_STUB_DECL (va_list_type) = va_list_name;
2738   /* Create the __ap field.  */
2739   ap_field = build_decl (BUILTINS_LOCATION,
2740                          FIELD_DECL,
2741                          get_identifier ("__ap"),
2742                          ptr_type_node);
2743   DECL_ARTIFICIAL (ap_field) = 1;
2744   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2745   TYPE_FIELDS (va_list_type) = ap_field;
2746   /* Compute its layout.  */
2747   layout_type (va_list_type);
2748
2749   return va_list_type;
2750 }
2751
2752 /* Return an expression of type "void *" pointing to the next
2753    available argument in a variable-argument list.  VALIST is the
2754    user-level va_list object, of type __builtin_va_list.  */
2755 static tree
2756 arm_extract_valist_ptr (tree valist)
2757 {
2758   if (TREE_TYPE (valist) == error_mark_node)
2759     return error_mark_node;
2760
2761   /* On an AAPCS target, the pointer is stored within "struct
2762      va_list".  */
2763   if (TARGET_AAPCS_BASED)
2764     {
2765       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2766       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2767                        valist, ap_field, NULL_TREE);
2768     }
2769
2770   return valist;
2771 }
2772
2773 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2774 static void
2775 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2776 {
2777   valist = arm_extract_valist_ptr (valist);
2778   std_expand_builtin_va_start (valist, nextarg);
2779 }
2780
2781 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2782 static tree
2783 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2784                           gimple_seq *post_p)
2785 {
2786   valist = arm_extract_valist_ptr (valist);
2787   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2788 }
2789
2790 /* Check any incompatible options that the user has specified.  */
2791 static void
2792 arm_option_check_internal (struct gcc_options *opts)
2793 {
2794   int flags = opts->x_target_flags;
2795
2796   /* iWMMXt and NEON are incompatible.  */
2797   if (TARGET_IWMMXT
2798       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2799     error ("iWMMXt and NEON are incompatible");
2800
2801   /* Make sure that the processor choice does not conflict with any of the
2802      other command line choices.  */
2803   if (TARGET_ARM_P (flags)
2804       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2805     error ("target CPU does not support ARM mode");
2806
2807   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2808   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2809     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2810
2811   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2812     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2813
2814   /* If this target is normally configured to use APCS frames, warn if they
2815      are turned off and debugging is turned on.  */
2816   if (TARGET_ARM_P (flags)
2817       && write_symbols != NO_DEBUG
2818       && !TARGET_APCS_FRAME
2819       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2820     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2821
2822   /* iWMMXt unsupported under Thumb mode.  */
2823   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2824     error ("iWMMXt unsupported under Thumb mode");
2825
2826   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2827     error ("can not use -mtp=cp15 with 16-bit Thumb");
2828
2829   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2830     {
2831       error ("RTP PIC is incompatible with Thumb");
2832       flag_pic = 0;
2833     }
2834
2835   /* We only support -mslow-flash-data on armv7-m targets.  */
2836   if (target_slow_flash_data
2837       && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2838           || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2839     error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2840
2841   /* We only support pure-code on Thumb-2 M-profile targets.  */
2842   if (target_pure_code
2843       && (!arm_arch_thumb2 || arm_arch_notm || flag_pic || TARGET_NEON))
2844     error ("-mpure-code only supports non-pic code on armv7-m targets");
2845
2846 }
2847
2848 /* Recompute the global settings depending on target attribute options.  */
2849
2850 static void
2851 arm_option_params_internal (void)
2852 {
2853   /* If we are not using the default (ARM mode) section anchor offset
2854      ranges, then set the correct ranges now.  */
2855   if (TARGET_THUMB1)
2856     {
2857       /* Thumb-1 LDR instructions cannot have negative offsets.
2858          Permissible positive offset ranges are 5-bit (for byte loads),
2859          6-bit (for halfword loads), or 7-bit (for word loads).
2860          Empirical results suggest a 7-bit anchor range gives the best
2861          overall code size.  */
2862       targetm.min_anchor_offset = 0;
2863       targetm.max_anchor_offset = 127;
2864     }
2865   else if (TARGET_THUMB2)
2866     {
2867       /* The minimum is set such that the total size of the block
2868          for a particular anchor is 248 + 1 + 4095 bytes, which is
2869          divisible by eight, ensuring natural spacing of anchors.  */
2870       targetm.min_anchor_offset = -248;
2871       targetm.max_anchor_offset = 4095;
2872     }
2873   else
2874     {
2875       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2876       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2877     }
2878
2879   if (optimize_size)
2880     {
2881       /* If optimizing for size, bump the number of instructions that we
2882          are prepared to conditionally execute (even on a StrongARM).  */
2883       max_insns_skipped = 6;
2884
2885       /* For THUMB2, we limit the conditional sequence to one IT block.  */
2886       if (TARGET_THUMB2)
2887         max_insns_skipped = arm_restrict_it ? 1 : 4;
2888     }
2889   else
2890     /* When -mrestrict-it is in use tone down the if-conversion.  */
2891     max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2892       ? 1 : current_tune->max_insns_skipped;
2893 }
2894
2895 /* True if -mflip-thumb should next add an attribute for the default
2896    mode, false if it should next add an attribute for the opposite mode.  */
2897 static GTY(()) bool thumb_flipper;
2898
2899 /* Options after initial target override.  */
2900 static GTY(()) tree init_optimize;
2901
2902 static void
2903 arm_override_options_after_change_1 (struct gcc_options *opts)
2904 {
2905   if (opts->x_align_functions <= 0)
2906     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2907       && opts->x_optimize_size ? 2 : 4;
2908 }
2909
2910 /* Implement targetm.override_options_after_change.  */
2911
2912 static void
2913 arm_override_options_after_change (void)
2914 {
2915   arm_configure_build_target (&arm_active_target,
2916                               TREE_TARGET_OPTION (target_option_default_node),
2917                               &global_options_set, false);
2918
2919   arm_override_options_after_change_1 (&global_options);
2920 }
2921
2922 static void
2923 arm_option_restore (struct gcc_options *, struct cl_target_option *ptr)
2924 {
2925   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2926                               false);
2927 }
2928
2929 /* Reset options between modes that the user has specified.  */
2930 static void
2931 arm_option_override_internal (struct gcc_options *opts,
2932                               struct gcc_options *opts_set)
2933 {
2934   arm_override_options_after_change_1 (opts);
2935
2936   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2937     {
2938       /* The default is to enable interworking, so this warning message would
2939          be confusing to users who have just compiled with, eg, -march=armv3.  */
2940       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2941       opts->x_target_flags &= ~MASK_INTERWORK;
2942     }
2943
2944   if (TARGET_THUMB_P (opts->x_target_flags)
2945       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2946     {
2947       warning (0, "target CPU does not support THUMB instructions");
2948       opts->x_target_flags &= ~MASK_THUMB;
2949     }
2950
2951   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2952     {
2953       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2954       opts->x_target_flags &= ~MASK_APCS_FRAME;
2955     }
2956
2957   /* Callee super interworking implies thumb interworking.  Adding
2958      this to the flags here simplifies the logic elsewhere.  */
2959   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2960     opts->x_target_flags |= MASK_INTERWORK;
2961
2962   /* need to remember initial values so combinaisons of options like
2963      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
2964   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2965
2966   if (! opts_set->x_arm_restrict_it)
2967     opts->x_arm_restrict_it = arm_arch8;
2968
2969   /* ARM execution state and M profile don't have [restrict] IT.  */
2970   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2971     opts->x_arm_restrict_it = 0;
2972
2973   /* Enable -munaligned-access by default for
2974      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2975      i.e. Thumb2 and ARM state only.
2976      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2977      - ARMv8 architecture-base processors.
2978
2979      Disable -munaligned-access by default for
2980      - all pre-ARMv6 architecture-based processors
2981      - ARMv6-M architecture-based processors
2982      - ARMv8-M Baseline processors.  */
2983
2984   if (! opts_set->x_unaligned_access)
2985     {
2986       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2987                           && arm_arch6 && (arm_arch_notm || arm_arch7));
2988     }
2989   else if (opts->x_unaligned_access == 1
2990            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2991     {
2992       warning (0, "target CPU does not support unaligned accesses");
2993      opts->x_unaligned_access = 0;
2994     }
2995
2996   /* Don't warn since it's on by default in -O2.  */
2997   if (TARGET_THUMB1_P (opts->x_target_flags))
2998     opts->x_flag_schedule_insns = 0;
2999   else
3000     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3001
3002   /* Disable shrink-wrap when optimizing function for size, since it tends to
3003      generate additional returns.  */
3004   if (optimize_function_for_size_p (cfun)
3005       && TARGET_THUMB2_P (opts->x_target_flags))
3006     opts->x_flag_shrink_wrap = false;
3007   else
3008     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3009
3010   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3011      - epilogue_insns - does not accurately model the corresponding insns
3012      emitted in the asm file.  In particular, see the comment in thumb_exit
3013      'Find out how many of the (return) argument registers we can corrupt'.
3014      As a consequence, the epilogue may clobber registers without fipa-ra
3015      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3016      TODO: Accurately model clobbers for epilogue_insns and reenable
3017      fipa-ra.  */
3018   if (TARGET_THUMB1_P (opts->x_target_flags))
3019     opts->x_flag_ipa_ra = 0;
3020   else
3021     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3022
3023   /* Thumb2 inline assembly code should always use unified syntax.
3024      This will apply to ARM and Thumb1 eventually.  */
3025   opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3026
3027 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3028   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3029 #endif
3030 }
3031
3032 /* Convert a static initializer array of feature bits to sbitmap
3033    representation.  */
3034 static void
3035 arm_initialize_isa (sbitmap isa, const enum isa_feature *isa_bits)
3036 {
3037   bitmap_clear (isa);
3038   while (*isa_bits != isa_nobit)
3039     bitmap_set_bit (isa, *(isa_bits++));
3040 }
3041
3042 static sbitmap isa_all_fpubits;
3043 static sbitmap isa_quirkbits;
3044
3045 /* Configure a build target TARGET from the user-specified options OPTS and
3046    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3047    architecture have been specified, but the two are not identical.  */
3048 void
3049 arm_configure_build_target (struct arm_build_target *target,
3050                             struct cl_target_option *opts,
3051                             struct gcc_options *opts_set,
3052                             bool warn_compatible)
3053 {
3054   const struct processors *arm_selected_tune = NULL;
3055   const struct processors *arm_selected_arch = NULL;
3056   const struct processors *arm_selected_cpu = NULL;
3057   const struct arm_fpu_desc *arm_selected_fpu = NULL;
3058
3059   bitmap_clear (target->isa);
3060   target->core_name = NULL;
3061   target->arch_name = NULL;
3062
3063   if (opts_set->x_arm_arch_option)
3064     arm_selected_arch = &all_architectures[opts->x_arm_arch_option];
3065
3066   if (opts_set->x_arm_cpu_option)
3067     {
3068       arm_selected_cpu = &all_cores[(int) opts->x_arm_cpu_option];
3069       arm_selected_tune = &all_cores[(int) opts->x_arm_cpu_option];
3070     }
3071
3072   if (opts_set->x_arm_tune_option)
3073     arm_selected_tune = &all_cores[(int) opts->x_arm_tune_option];
3074
3075   if (arm_selected_arch)
3076     {
3077       arm_initialize_isa (target->isa, arm_selected_arch->isa_bits);
3078
3079       if (arm_selected_cpu)
3080         {
3081           auto_sbitmap cpu_isa (isa_num_bits);
3082
3083           arm_initialize_isa (cpu_isa, arm_selected_cpu->isa_bits);
3084           bitmap_xor (cpu_isa, cpu_isa, target->isa);
3085           /* Ignore any bits that are quirk bits.  */
3086           bitmap_and_compl (cpu_isa, cpu_isa, isa_quirkbits);
3087           /* Ignore (for now) any bits that might be set by -mfpu.  */
3088           bitmap_and_compl (cpu_isa, cpu_isa, isa_all_fpubits);
3089
3090           if (!bitmap_empty_p (cpu_isa))
3091             {
3092               if (warn_compatible)
3093                 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3094                          arm_selected_cpu->name, arm_selected_arch->name);
3095               /* -march wins for code generation.
3096                  -mcpu wins for default tuning.  */
3097               if (!arm_selected_tune)
3098                 arm_selected_tune = arm_selected_cpu;
3099
3100               arm_selected_cpu = arm_selected_arch;
3101             }
3102           else
3103             {
3104               /* Architecture and CPU are essentially the same.
3105                  Prefer the CPU setting.  */
3106               arm_selected_arch = NULL;
3107             }
3108
3109           target->core_name = arm_selected_cpu->name;
3110         }
3111       else
3112         {
3113           /* Pick a CPU based on the architecture.  */
3114           arm_selected_cpu = arm_selected_arch;
3115           target->arch_name = arm_selected_arch->name;
3116           /* Note: target->core_name is left unset in this path.  */
3117         }
3118     }
3119   else if (arm_selected_cpu)
3120     {
3121       target->core_name = arm_selected_cpu->name;
3122       arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3123     }
3124   /* If the user did not specify a processor, choose one for them.  */
3125   else
3126     {
3127       const struct processors * sel;
3128       auto_sbitmap sought_isa (isa_num_bits);
3129       bitmap_clear (sought_isa);
3130       auto_sbitmap default_isa (isa_num_bits);
3131
3132       arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3133       gcc_assert (arm_selected_cpu->name);
3134
3135       /* RWE: All of the selection logic below (to the end of this
3136          'if' clause) looks somewhat suspect.  It appears to be mostly
3137          there to support forcing thumb support when the default CPU
3138          does not have thumb (somewhat dubious in terms of what the
3139          user might be expecting).  I think it should be removed once
3140          support for the pre-thumb era cores is removed.  */
3141       sel = arm_selected_cpu;
3142       arm_initialize_isa (default_isa, sel->isa_bits);
3143
3144       /* Now check to see if the user has specified any command line
3145          switches that require certain abilities from the cpu.  */
3146
3147       if (TARGET_INTERWORK || TARGET_THUMB)
3148         {
3149           bitmap_set_bit (sought_isa, isa_bit_thumb);
3150           bitmap_set_bit (sought_isa, isa_bit_mode32);
3151
3152           /* There are no ARM processors that support both APCS-26 and
3153              interworking.  Therefore we forcibly remove MODE26 from
3154              from the isa features here (if it was set), so that the
3155              search below will always be able to find a compatible
3156              processor.  */
3157           bitmap_clear_bit (default_isa, isa_bit_mode26);
3158         }
3159
3160       /* If there are such requirements and the default CPU does not
3161          satisfy them, we need to run over the complete list of
3162          cores looking for one that is satisfactory.  */
3163       if (!bitmap_empty_p (sought_isa)
3164           && !bitmap_subset_p (sought_isa, default_isa))
3165         {
3166           auto_sbitmap candidate_isa (isa_num_bits);
3167           /* We're only interested in a CPU with at least the
3168              capabilities of the default CPU and the required
3169              additional features.  */
3170           bitmap_ior (default_isa, default_isa, sought_isa);
3171
3172           /* Try to locate a CPU type that supports all of the abilities
3173              of the default CPU, plus the extra abilities requested by
3174              the user.  */
3175           for (sel = all_cores; sel->name != NULL; sel++)
3176             {
3177               arm_initialize_isa (candidate_isa, sel->isa_bits);
3178               /* An exact match?  */
3179               if (bitmap_equal_p (default_isa, candidate_isa))
3180                 break;
3181             }
3182
3183           if (sel->name == NULL)
3184             {
3185               unsigned current_bit_count = isa_num_bits;
3186               const struct processors * best_fit = NULL;
3187
3188               /* Ideally we would like to issue an error message here
3189                  saying that it was not possible to find a CPU compatible
3190                  with the default CPU, but which also supports the command
3191                  line options specified by the programmer, and so they
3192                  ought to use the -mcpu=<name> command line option to
3193                  override the default CPU type.
3194
3195                  If we cannot find a CPU that has exactly the
3196                  characteristics of the default CPU and the given
3197                  command line options we scan the array again looking
3198                  for a best match.  The best match must have at least
3199                  the capabilities of the perfect match.  */
3200               for (sel = all_cores; sel->name != NULL; sel++)
3201                 {
3202                   arm_initialize_isa (candidate_isa, sel->isa_bits);
3203
3204                   if (bitmap_subset_p (default_isa, candidate_isa))
3205                     {
3206                       unsigned count;
3207
3208                       bitmap_and_compl (candidate_isa, candidate_isa,
3209                                         default_isa);
3210                       count = bitmap_popcount (candidate_isa);
3211
3212                       if (count < current_bit_count)
3213                         {
3214                           best_fit = sel;
3215                           current_bit_count = count;
3216                         }
3217                     }
3218
3219                   gcc_assert (best_fit);
3220                   sel = best_fit;
3221                 }
3222             }
3223           arm_selected_cpu = sel;
3224         }
3225
3226       /* Now we know the CPU, we can finally initialize the target
3227          structure.  */
3228       target->core_name = arm_selected_cpu->name;
3229       arm_initialize_isa (target->isa, arm_selected_cpu->isa_bits);
3230     }
3231
3232   gcc_assert (arm_selected_cpu);
3233
3234   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3235     {
3236       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3237       auto_sbitmap fpu_bits (isa_num_bits);
3238
3239       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3240       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3241       bitmap_ior (target->isa, target->isa, fpu_bits);
3242     }
3243   else if (target->core_name == NULL)
3244     /* To support this we need to be able to parse FPU feature options
3245        from the architecture string.  */
3246     sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3247
3248   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
3249   if (!arm_selected_tune)
3250     arm_selected_tune = &all_cores[arm_selected_cpu->core];
3251
3252   /* Finish initializing the target structure.  */
3253   target->arch_pp_name = arm_selected_cpu->arch;
3254   target->base_arch = arm_selected_cpu->base_arch;
3255   target->arch_core = arm_selected_cpu->core;
3256
3257   target->tune_flags = arm_selected_tune->tune_flags;
3258   target->tune = arm_selected_tune->tune;
3259   target->tune_core = arm_selected_tune->core;
3260 }
3261
3262 /* Fix up any incompatible options that the user has specified.  */
3263 static void
3264 arm_option_override (void)
3265 {
3266   static const enum isa_feature fpu_bitlist[] = { ISA_ALL_FPU, isa_nobit };
3267   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3268   cl_target_option opts;
3269
3270   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3271   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3272
3273   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3274   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3275
3276   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3277
3278   if (!global_options_set.x_arm_fpu_index)
3279     {
3280       const char *target_fpu_name;
3281       bool ok;
3282       int fpu_index;
3283
3284 #ifdef FPUTYPE_DEFAULT
3285       target_fpu_name = FPUTYPE_DEFAULT;
3286 #else
3287       target_fpu_name = "vfp";
3288 #endif
3289
3290       ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &fpu_index,
3291                                   CL_TARGET);
3292       gcc_assert (ok);
3293       arm_fpu_index = (enum fpu_type) fpu_index;
3294     }
3295
3296   cl_target_option_save (&opts, &global_options);
3297   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3298                               true);
3299
3300 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3301   SUBTARGET_OVERRIDE_OPTIONS;
3302 #endif
3303
3304   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3305   arm_base_arch = arm_active_target.base_arch;
3306
3307   arm_tune = arm_active_target.tune_core;
3308   tune_flags = arm_active_target.tune_flags;
3309   current_tune = arm_active_target.tune;
3310
3311   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3312   if (TARGET_APCS_FRAME)
3313     flag_shrink_wrap = false;
3314
3315   /* BPABI targets use linker tricks to allow interworking on cores
3316      without thumb support.  */
3317   if (TARGET_INTERWORK
3318       && !TARGET_BPABI
3319       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3320     {
3321       warning (0, "target CPU does not support interworking" );
3322       target_flags &= ~MASK_INTERWORK;
3323     }
3324
3325   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3326     {
3327       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3328       target_flags |= MASK_APCS_FRAME;
3329     }
3330
3331   if (TARGET_POKE_FUNCTION_NAME)
3332     target_flags |= MASK_APCS_FRAME;
3333
3334   if (TARGET_APCS_REENT && flag_pic)
3335     error ("-fpic and -mapcs-reent are incompatible");
3336
3337   if (TARGET_APCS_REENT)
3338     warning (0, "APCS reentrant code not supported.  Ignored");
3339
3340   /* Initialize boolean versions of the architectural flags, for use
3341      in the arm.md file.  */
3342   arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv3m);
3343   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv4);
3344   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3345   arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5);
3346   arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv5e);
3347   arm_arch5te = arm_arch5e
3348     && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3349   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6);
3350   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv6k);
3351   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3352   arm_arch6m = arm_arch6 && !arm_arch_notm;
3353   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7);
3354   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv7em);
3355   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8);
3356   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_1);
3357   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_ARMv8_2);
3358   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3359   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3360   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3361   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3362   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3363   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3364   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3365   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3366   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3367   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3368   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3369   if (arm_fp16_inst)
3370     {
3371       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3372         error ("selected fp16 options are incompatible");
3373       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3374     }
3375
3376
3377   /* Set up some tuning parameters.  */
3378   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3379   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3380   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3381   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3382   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3383   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3384
3385   /* And finally, set up some quirks.  */
3386   arm_arch_no_volatile_ce
3387     = bitmap_bit_p (arm_active_target.isa, isa_quirk_no_volatile_ce);
3388   arm_arch6kz
3389     = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_quirk_ARMv6kz);
3390
3391   /* V5 code we generate is completely interworking capable, so we turn off
3392      TARGET_INTERWORK here to avoid many tests later on.  */
3393
3394   /* XXX However, we must pass the right pre-processor defines to CPP
3395      or GLD can get confused.  This is a hack.  */
3396   if (TARGET_INTERWORK)
3397     arm_cpp_interwork = 1;
3398
3399   if (arm_arch5)
3400     target_flags &= ~MASK_INTERWORK;
3401
3402   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3403     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3404
3405   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3406     error ("iwmmxt abi requires an iwmmxt capable cpu");
3407
3408   /* If soft-float is specified then don't use FPU.  */
3409   if (TARGET_SOFT_FLOAT)
3410     arm_fpu_attr = FPU_NONE;
3411   else
3412     arm_fpu_attr = FPU_VFP;
3413
3414   if (TARGET_AAPCS_BASED)
3415     {
3416       if (TARGET_CALLER_INTERWORKING)
3417         error ("AAPCS does not support -mcaller-super-interworking");
3418       else
3419         if (TARGET_CALLEE_INTERWORKING)
3420           error ("AAPCS does not support -mcallee-super-interworking");
3421     }
3422
3423   /* __fp16 support currently assumes the core has ldrh.  */
3424   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3425     sorry ("__fp16 and no ldrh");
3426
3427   if (TARGET_AAPCS_BASED)
3428     {
3429       if (arm_abi == ARM_ABI_IWMMXT)
3430         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3431       else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3432                && TARGET_HARD_FLOAT)
3433         {
3434           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3435           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_VFPv2))
3436             error ("-mfloat-abi=hard: selected processor lacks an FPU");
3437         }
3438       else
3439         arm_pcs_default = ARM_PCS_AAPCS;
3440     }
3441   else
3442     {
3443       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3444         sorry ("-mfloat-abi=hard and VFP");
3445
3446       if (arm_abi == ARM_ABI_APCS)
3447         arm_pcs_default = ARM_PCS_APCS;
3448       else
3449         arm_pcs_default = ARM_PCS_ATPCS;
3450     }
3451
3452   /* For arm2/3 there is no need to do any scheduling if we are doing
3453      software floating-point.  */
3454   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3455     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3456
3457   /* Use the cp15 method if it is available.  */
3458   if (target_thread_pointer == TP_AUTO)
3459     {
3460       if (arm_arch6k && !TARGET_THUMB1)
3461         target_thread_pointer = TP_CP15;
3462       else
3463         target_thread_pointer = TP_SOFT;
3464     }
3465
3466   /* Override the default structure alignment for AAPCS ABI.  */
3467   if (!global_options_set.x_arm_structure_size_boundary)
3468     {
3469       if (TARGET_AAPCS_BASED)
3470         arm_structure_size_boundary = 8;
3471     }
3472   else
3473     {
3474       if (arm_structure_size_boundary != 8
3475           && arm_structure_size_boundary != 32
3476           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3477         {
3478           if (ARM_DOUBLEWORD_ALIGN)
3479             warning (0,
3480                      "structure size boundary can only be set to 8, 32 or 64");
3481           else
3482             warning (0, "structure size boundary can only be set to 8 or 32");
3483           arm_structure_size_boundary
3484             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3485         }
3486     }
3487
3488   if (TARGET_VXWORKS_RTP)
3489     {
3490       if (!global_options_set.x_arm_pic_data_is_text_relative)
3491         arm_pic_data_is_text_relative = 0;
3492     }
3493   else if (flag_pic
3494            && !arm_pic_data_is_text_relative
3495            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3496     /* When text & data segments don't have a fixed displacement, the
3497        intended use is with a single, read only, pic base register.
3498        Unless the user explicitly requested not to do that, set
3499        it.  */
3500     target_flags |= MASK_SINGLE_PIC_BASE;
3501
3502   /* If stack checking is disabled, we can use r10 as the PIC register,
3503      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3504   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3505     {
3506       if (TARGET_VXWORKS_RTP)
3507         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3508       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3509     }
3510
3511   if (flag_pic && TARGET_VXWORKS_RTP)
3512     arm_pic_register = 9;
3513
3514   if (arm_pic_register_string != NULL)
3515     {
3516       int pic_register = decode_reg_name (arm_pic_register_string);
3517
3518       if (!flag_pic)
3519         warning (0, "-mpic-register= is useless without -fpic");
3520
3521       /* Prevent the user from choosing an obviously stupid PIC register.  */
3522       else if (pic_register < 0 || call_used_regs[pic_register]
3523                || pic_register == HARD_FRAME_POINTER_REGNUM
3524                || pic_register == STACK_POINTER_REGNUM
3525                || pic_register >= PC_REGNUM
3526                || (TARGET_VXWORKS_RTP
3527                    && (unsigned int) pic_register != arm_pic_register))
3528         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3529       else
3530         arm_pic_register = pic_register;
3531     }
3532
3533   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3534   if (fix_cm3_ldrd == 2)
3535     {
3536       if (bitmap_bit_p (arm_active_target.isa, isa_quirk_cm3_ldrd))
3537         fix_cm3_ldrd = 1;
3538       else
3539         fix_cm3_ldrd = 0;
3540     }
3541
3542   /* Hot/Cold partitioning is not currently supported, since we can't
3543      handle literal pool placement in that case.  */
3544   if (flag_reorder_blocks_and_partition)
3545     {
3546       inform (input_location,
3547               "-freorder-blocks-and-partition not supported on this architecture");
3548       flag_reorder_blocks_and_partition = 0;
3549       flag_reorder_blocks = 1;
3550     }
3551
3552   if (flag_pic)
3553     /* Hoisting PIC address calculations more aggressively provides a small,
3554        but measurable, size reduction for PIC code.  Therefore, we decrease
3555        the bar for unrestricted expression hoisting to the cost of PIC address
3556        calculation, which is 2 instructions.  */
3557     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3558                            global_options.x_param_values,
3559                            global_options_set.x_param_values);
3560
3561   /* ARM EABI defaults to strict volatile bitfields.  */
3562   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3563       && abi_version_at_least(2))
3564     flag_strict_volatile_bitfields = 1;
3565
3566   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3567      have deemed it beneficial (signified by setting
3568      prefetch.num_slots to 1 or more).  */
3569   if (flag_prefetch_loop_arrays < 0
3570       && HAVE_prefetch
3571       && optimize >= 3
3572       && current_tune->prefetch.num_slots > 0)
3573     flag_prefetch_loop_arrays = 1;
3574
3575   /* Set up parameters to be used in prefetching algorithm.  Do not
3576      override the defaults unless we are tuning for a core we have
3577      researched values for.  */
3578   if (current_tune->prefetch.num_slots > 0)
3579     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3580                            current_tune->prefetch.num_slots,
3581                            global_options.x_param_values,
3582                            global_options_set.x_param_values);
3583   if (current_tune->prefetch.l1_cache_line_size >= 0)
3584     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3585                            current_tune->prefetch.l1_cache_line_size,
3586                            global_options.x_param_values,
3587                            global_options_set.x_param_values);
3588   if (current_tune->prefetch.l1_cache_size >= 0)
3589     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3590                            current_tune->prefetch.l1_cache_size,
3591                            global_options.x_param_values,
3592                            global_options_set.x_param_values);
3593
3594   /* Use Neon to perform 64-bits operations rather than core
3595      registers.  */
3596   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3597   if (use_neon_for_64bits == 1)
3598      prefer_neon_for_64bits = true;
3599
3600   /* Use the alternative scheduling-pressure algorithm by default.  */
3601   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3602                          global_options.x_param_values,
3603                          global_options_set.x_param_values);
3604
3605   /* Look through ready list and all of queue for instructions
3606      relevant for L2 auto-prefetcher.  */
3607   int param_sched_autopref_queue_depth;
3608
3609   switch (current_tune->sched_autopref)
3610     {
3611     case tune_params::SCHED_AUTOPREF_OFF:
3612       param_sched_autopref_queue_depth = -1;
3613       break;
3614
3615     case tune_params::SCHED_AUTOPREF_RANK:
3616       param_sched_autopref_queue_depth = 0;
3617       break;
3618
3619     case tune_params::SCHED_AUTOPREF_FULL:
3620       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3621       break;
3622
3623     default:
3624       gcc_unreachable ();
3625     }
3626
3627   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3628                          param_sched_autopref_queue_depth,
3629                          global_options.x_param_values,
3630                          global_options_set.x_param_values);
3631
3632   /* Currently, for slow flash data, we just disable literal pools.  We also
3633      disable it for pure-code.  */
3634   if (target_slow_flash_data || target_pure_code)
3635     arm_disable_literal_pool = true;
3636
3637   if (use_cmse && !arm_arch_cmse)
3638     error ("target CPU does not support ARMv8-M Security Extensions");
3639
3640   /* Disable scheduling fusion by default if it's not armv7 processor
3641      or doesn't prefer ldrd/strd.  */
3642   if (flag_schedule_fusion == 2
3643       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3644     flag_schedule_fusion = 0;
3645
3646   /* Need to remember initial options before they are overriden.  */
3647   init_optimize = build_optimization_node (&global_options);
3648
3649   arm_option_override_internal (&global_options, &global_options_set);
3650   arm_option_check_internal (&global_options);
3651   arm_option_params_internal ();
3652
3653   /* Create the default target_options structure.  */
3654   target_option_default_node = target_option_current_node
3655     = build_target_option_node (&global_options);
3656
3657   /* Register global variables with the garbage collector.  */
3658   arm_add_gc_roots ();
3659
3660   /* Init initial mode for testing.  */
3661   thumb_flipper = TARGET_THUMB;
3662 }
3663
3664 static void
3665 arm_add_gc_roots (void)
3666 {
3667   gcc_obstack_init(&minipool_obstack);
3668   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3669 }
3670 \f
3671 /* A table of known ARM exception types.
3672    For use with the interrupt function attribute.  */
3673
3674 typedef struct
3675 {
3676   const char *const arg;
3677   const unsigned long return_value;
3678 }
3679 isr_attribute_arg;
3680
3681 static const isr_attribute_arg isr_attribute_args [] =
3682 {
3683   { "IRQ",   ARM_FT_ISR },
3684   { "irq",   ARM_FT_ISR },
3685   { "FIQ",   ARM_FT_FIQ },
3686   { "fiq",   ARM_FT_FIQ },
3687   { "ABORT", ARM_FT_ISR },
3688   { "abort", ARM_FT_ISR },
3689   { "ABORT", ARM_FT_ISR },
3690   { "abort", ARM_FT_ISR },
3691   { "UNDEF", ARM_FT_EXCEPTION },
3692   { "undef", ARM_FT_EXCEPTION },
3693   { "SWI",   ARM_FT_EXCEPTION },
3694   { "swi",   ARM_FT_EXCEPTION },
3695   { NULL,    ARM_FT_NORMAL }
3696 };
3697
3698 /* Returns the (interrupt) function type of the current
3699    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3700
3701 static unsigned long
3702 arm_isr_value (tree argument)
3703 {
3704   const isr_attribute_arg * ptr;
3705   const char *              arg;
3706
3707   if (!arm_arch_notm)
3708     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3709
3710   /* No argument - default to IRQ.  */
3711   if (argument == NULL_TREE)
3712     return ARM_FT_ISR;
3713
3714   /* Get the value of the argument.  */
3715   if (TREE_VALUE (argument) == NULL_TREE
3716       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3717     return ARM_FT_UNKNOWN;
3718
3719   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3720
3721   /* Check it against the list of known arguments.  */
3722   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3723     if (streq (arg, ptr->arg))
3724       return ptr->return_value;
3725
3726   /* An unrecognized interrupt type.  */
3727   return ARM_FT_UNKNOWN;
3728 }
3729
3730 /* Computes the type of the current function.  */
3731
3732 static unsigned long
3733 arm_compute_func_type (void)
3734 {
3735   unsigned long type = ARM_FT_UNKNOWN;
3736   tree a;
3737   tree attr;
3738
3739   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3740
3741   /* Decide if the current function is volatile.  Such functions
3742      never return, and many memory cycles can be saved by not storing
3743      register values that will never be needed again.  This optimization
3744      was added to speed up context switching in a kernel application.  */
3745   if (optimize > 0
3746       && (TREE_NOTHROW (current_function_decl)
3747           || !(flag_unwind_tables
3748                || (flag_exceptions
3749                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3750       && TREE_THIS_VOLATILE (current_function_decl))
3751     type |= ARM_FT_VOLATILE;
3752
3753   if (cfun->static_chain_decl != NULL)
3754     type |= ARM_FT_NESTED;
3755
3756   attr = DECL_ATTRIBUTES (current_function_decl);
3757
3758   a = lookup_attribute ("naked", attr);
3759   if (a != NULL_TREE)
3760     type |= ARM_FT_NAKED;
3761
3762   a = lookup_attribute ("isr", attr);
3763   if (a == NULL_TREE)
3764     a = lookup_attribute ("interrupt", attr);
3765
3766   if (a == NULL_TREE)
3767     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3768   else
3769     type |= arm_isr_value (TREE_VALUE (a));
3770
3771   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3772     type |= ARM_FT_CMSE_ENTRY;
3773
3774   return type;
3775 }
3776
3777 /* Returns the type of the current function.  */
3778
3779 unsigned long
3780 arm_current_func_type (void)
3781 {
3782   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3783     cfun->machine->func_type = arm_compute_func_type ();
3784
3785   return cfun->machine->func_type;
3786 }
3787
3788 bool
3789 arm_allocate_stack_slots_for_args (void)
3790 {
3791   /* Naked functions should not allocate stack slots for arguments.  */
3792   return !IS_NAKED (arm_current_func_type ());
3793 }
3794
3795 static bool
3796 arm_warn_func_return (tree decl)
3797 {
3798   /* Naked functions are implemented entirely in assembly, including the
3799      return sequence, so suppress warnings about this.  */
3800   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3801 }
3802
3803 \f
3804 /* Output assembler code for a block containing the constant parts
3805    of a trampoline, leaving space for the variable parts.
3806
3807    On the ARM, (if r8 is the static chain regnum, and remembering that
3808    referencing pc adds an offset of 8) the trampoline looks like:
3809            ldr          r8, [pc, #0]
3810            ldr          pc, [pc]
3811            .word        static chain value
3812            .word        function's address
3813    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3814
3815 static void
3816 arm_asm_trampoline_template (FILE *f)
3817 {
3818   fprintf (f, "\t.syntax unified\n");
3819
3820   if (TARGET_ARM)
3821     {
3822       fprintf (f, "\t.arm\n");
3823       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3824       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3825     }
3826   else if (TARGET_THUMB2)
3827     {
3828       fprintf (f, "\t.thumb\n");
3829       /* The Thumb-2 trampoline is similar to the arm implementation.
3830          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3831       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3832                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3833       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3834     }
3835   else
3836     {
3837       ASM_OUTPUT_ALIGN (f, 2);
3838       fprintf (f, "\t.code\t16\n");
3839       fprintf (f, ".Ltrampoline_start:\n");
3840       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3841       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3842       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3843       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3844       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3845       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3846     }
3847   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3848   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3849 }
3850
3851 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3852
3853 static void
3854 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3855 {
3856   rtx fnaddr, mem, a_tramp;
3857
3858   emit_block_move (m_tramp, assemble_trampoline_template (),
3859                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3860
3861   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3862   emit_move_insn (mem, chain_value);
3863
3864   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3865   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3866   emit_move_insn (mem, fnaddr);
3867
3868   a_tramp = XEXP (m_tramp, 0);
3869   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3870                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3871                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3872 }
3873
3874 /* Thumb trampolines should be entered in thumb mode, so set
3875    the bottom bit of the address.  */
3876
3877 static rtx
3878 arm_trampoline_adjust_address (rtx addr)
3879 {
3880   if (TARGET_THUMB)
3881     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3882                                 NULL, 0, OPTAB_LIB_WIDEN);
3883   return addr;
3884 }
3885 \f
3886 /* Return 1 if it is possible to return using a single instruction.
3887    If SIBLING is non-null, this is a test for a return before a sibling
3888    call.  SIBLING is the call insn, so we can examine its register usage.  */
3889
3890 int
3891 use_return_insn (int iscond, rtx sibling)
3892 {
3893   int regno;
3894   unsigned int func_type;
3895   unsigned long saved_int_regs;
3896   unsigned HOST_WIDE_INT stack_adjust;
3897   arm_stack_offsets *offsets;
3898
3899   /* Never use a return instruction before reload has run.  */
3900   if (!reload_completed)
3901     return 0;
3902
3903   func_type = arm_current_func_type ();
3904
3905   /* Naked, volatile and stack alignment functions need special
3906      consideration.  */
3907   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3908     return 0;
3909
3910   /* So do interrupt functions that use the frame pointer and Thumb
3911      interrupt functions.  */
3912   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3913     return 0;
3914
3915   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3916       && !optimize_function_for_size_p (cfun))
3917     return 0;
3918
3919   offsets = arm_get_frame_offsets ();
3920   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3921
3922   /* As do variadic functions.  */
3923   if (crtl->args.pretend_args_size
3924       || cfun->machine->uses_anonymous_args
3925       /* Or if the function calls __builtin_eh_return () */
3926       || crtl->calls_eh_return
3927       /* Or if the function calls alloca */
3928       || cfun->calls_alloca
3929       /* Or if there is a stack adjustment.  However, if the stack pointer
3930          is saved on the stack, we can use a pre-incrementing stack load.  */
3931       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3932                                  && stack_adjust == 4))
3933       /* Or if the static chain register was saved above the frame, under the
3934          assumption that the stack pointer isn't saved on the stack.  */
3935       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3936           && arm_compute_static_chain_stack_bytes() != 0))
3937     return 0;
3938
3939   saved_int_regs = offsets->saved_regs_mask;
3940
3941   /* Unfortunately, the insn
3942
3943        ldmib sp, {..., sp, ...}
3944
3945      triggers a bug on most SA-110 based devices, such that the stack
3946      pointer won't be correctly restored if the instruction takes a
3947      page fault.  We work around this problem by popping r3 along with
3948      the other registers, since that is never slower than executing
3949      another instruction.
3950
3951      We test for !arm_arch5 here, because code for any architecture
3952      less than this could potentially be run on one of the buggy
3953      chips.  */
3954   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3955     {
3956       /* Validate that r3 is a call-clobbered register (always true in
3957          the default abi) ...  */
3958       if (!call_used_regs[3])
3959         return 0;
3960
3961       /* ... that it isn't being used for a return value ... */
3962       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3963         return 0;
3964
3965       /* ... or for a tail-call argument ...  */
3966       if (sibling)
3967         {
3968           gcc_assert (CALL_P (sibling));
3969
3970           if (find_regno_fusage (sibling, USE, 3))
3971             return 0;
3972         }
3973
3974       /* ... and that there are no call-saved registers in r0-r2
3975          (always true in the default ABI).  */
3976       if (saved_int_regs & 0x7)
3977         return 0;
3978     }
3979
3980   /* Can't be done if interworking with Thumb, and any registers have been
3981      stacked.  */
3982   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3983     return 0;
3984
3985   /* On StrongARM, conditional returns are expensive if they aren't
3986      taken and multiple registers have been stacked.  */
3987   if (iscond && arm_tune_strongarm)
3988     {
3989       /* Conditional return when just the LR is stored is a simple
3990          conditional-load instruction, that's not expensive.  */
3991       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3992         return 0;
3993
3994       if (flag_pic
3995           && arm_pic_register != INVALID_REGNUM
3996           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3997         return 0;
3998     }
3999
4000   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4001      several instructions if anything needs to be popped.  */
4002   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4003     return 0;
4004
4005   /* If there are saved registers but the LR isn't saved, then we need
4006      two instructions for the return.  */
4007   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4008     return 0;
4009
4010   /* Can't be done if any of the VFP regs are pushed,
4011      since this also requires an insn.  */
4012   if (TARGET_HARD_FLOAT)
4013     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4014       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4015         return 0;
4016
4017   if (TARGET_REALLY_IWMMXT)
4018     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4019       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4020         return 0;
4021
4022   return 1;
4023 }
4024
4025 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4026    shrink-wrapping if possible.  This is the case if we need to emit a
4027    prologue, which we can test by looking at the offsets.  */
4028 bool
4029 use_simple_return_p (void)
4030 {
4031   arm_stack_offsets *offsets;
4032
4033   offsets = arm_get_frame_offsets ();
4034   return offsets->outgoing_args != 0;
4035 }
4036
4037 /* Return TRUE if int I is a valid immediate ARM constant.  */
4038
4039 int
4040 const_ok_for_arm (HOST_WIDE_INT i)
4041 {
4042   int lowbit;
4043
4044   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4045      be all zero, or all one.  */
4046   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4047       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4048           != ((~(unsigned HOST_WIDE_INT) 0)
4049               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4050     return FALSE;
4051
4052   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4053
4054   /* Fast return for 0 and small values.  We must do this for zero, since
4055      the code below can't handle that one case.  */
4056   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4057     return TRUE;
4058
4059   /* Get the number of trailing zeros.  */
4060   lowbit = ffs((int) i) - 1;
4061
4062   /* Only even shifts are allowed in ARM mode so round down to the
4063      nearest even number.  */
4064   if (TARGET_ARM)
4065     lowbit &= ~1;
4066
4067   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4068     return TRUE;
4069
4070   if (TARGET_ARM)
4071     {
4072       /* Allow rotated constants in ARM mode.  */
4073       if (lowbit <= 4
4074            && ((i & ~0xc000003f) == 0
4075                || (i & ~0xf000000f) == 0
4076                || (i & ~0xfc000003) == 0))
4077         return TRUE;
4078     }
4079   else
4080     {
4081       HOST_WIDE_INT v;
4082
4083       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4084       v = i & 0xff;
4085       v |= v << 16;
4086       if (i == v || i == (v | (v << 8)))
4087         return TRUE;
4088
4089       /* Allow repeated pattern 0xXY00XY00.  */
4090       v = i & 0xff00;
4091       v |= v << 16;
4092       if (i == v)
4093         return TRUE;
4094     }
4095
4096   return FALSE;
4097 }
4098
4099 /* Return true if I is a valid constant for the operation CODE.  */
4100 int
4101 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4102 {
4103   if (const_ok_for_arm (i))
4104     return 1;
4105
4106   switch (code)
4107     {
4108     case SET:
4109       /* See if we can use movw.  */
4110       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4111         return 1;
4112       else
4113         /* Otherwise, try mvn.  */
4114         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4115
4116     case PLUS:
4117       /* See if we can use addw or subw.  */
4118       if (TARGET_THUMB2
4119           && ((i & 0xfffff000) == 0
4120               || ((-i) & 0xfffff000) == 0))
4121         return 1;
4122       /* Fall through.  */
4123     case COMPARE:
4124     case EQ:
4125     case NE:
4126     case GT:
4127     case LE:
4128     case LT:
4129     case GE:
4130     case GEU:
4131     case LTU:
4132     case GTU:
4133     case LEU:
4134     case UNORDERED:
4135     case ORDERED:
4136     case UNEQ:
4137     case UNGE:
4138     case UNLT:
4139     case UNGT:
4140     case UNLE:
4141       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4142
4143     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4144     case XOR:
4145       return 0;
4146
4147     case IOR:
4148       if (TARGET_THUMB2)
4149         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4150       return 0;
4151
4152     case AND:
4153       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4154
4155     default:
4156       gcc_unreachable ();
4157     }
4158 }
4159
4160 /* Return true if I is a valid di mode constant for the operation CODE.  */
4161 int
4162 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4163 {
4164   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4165   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4166   rtx hi = GEN_INT (hi_val);
4167   rtx lo = GEN_INT (lo_val);
4168
4169   if (TARGET_THUMB1)
4170     return 0;
4171
4172   switch (code)
4173     {
4174     case AND:
4175     case IOR:
4176     case XOR:
4177       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4178               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4179     case PLUS:
4180       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4181
4182     default:
4183       return 0;
4184     }
4185 }
4186
4187 /* Emit a sequence of insns to handle a large constant.
4188    CODE is the code of the operation required, it can be any of SET, PLUS,
4189    IOR, AND, XOR, MINUS;
4190    MODE is the mode in which the operation is being performed;
4191    VAL is the integer to operate on;
4192    SOURCE is the other operand (a register, or a null-pointer for SET);
4193    SUBTARGETS means it is safe to create scratch registers if that will
4194    either produce a simpler sequence, or we will want to cse the values.
4195    Return value is the number of insns emitted.  */
4196
4197 /* ??? Tweak this for thumb2.  */
4198 int
4199 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4200                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4201 {
4202   rtx cond;
4203
4204   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4205     cond = COND_EXEC_TEST (PATTERN (insn));
4206   else
4207     cond = NULL_RTX;
4208
4209   if (subtargets || code == SET
4210       || (REG_P (target) && REG_P (source)
4211           && REGNO (target) != REGNO (source)))
4212     {
4213       /* After arm_reorg has been called, we can't fix up expensive
4214          constants by pushing them into memory so we must synthesize
4215          them in-line, regardless of the cost.  This is only likely to
4216          be more costly on chips that have load delay slots and we are
4217          compiling without running the scheduler (so no splitting
4218          occurred before the final instruction emission).
4219
4220          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4221       */
4222       if (!cfun->machine->after_arm_reorg
4223           && !cond
4224           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4225                                 1, 0)
4226               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4227                  + (code != SET))))
4228         {
4229           if (code == SET)
4230             {
4231               /* Currently SET is the only monadic value for CODE, all
4232                  the rest are diadic.  */
4233               if (TARGET_USE_MOVT)
4234                 arm_emit_movpair (target, GEN_INT (val));
4235               else
4236                 emit_set_insn (target, GEN_INT (val));
4237
4238               return 1;
4239             }
4240           else
4241             {
4242               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4243
4244               if (TARGET_USE_MOVT)
4245                 arm_emit_movpair (temp, GEN_INT (val));
4246               else
4247                 emit_set_insn (temp, GEN_INT (val));
4248
4249               /* For MINUS, the value is subtracted from, since we never
4250                  have subtraction of a constant.  */
4251               if (code == MINUS)
4252                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4253               else
4254                 emit_set_insn (target,
4255                                gen_rtx_fmt_ee (code, mode, source, temp));
4256               return 2;
4257             }
4258         }
4259     }
4260
4261   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4262                            1);
4263 }
4264
4265 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4266    ARM/THUMB2 immediates, and add up to VAL.
4267    Thr function return value gives the number of insns required.  */
4268 static int
4269 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4270                             struct four_ints *return_sequence)
4271 {
4272   int best_consecutive_zeros = 0;
4273   int i;
4274   int best_start = 0;
4275   int insns1, insns2;
4276   struct four_ints tmp_sequence;
4277
4278   /* If we aren't targeting ARM, the best place to start is always at
4279      the bottom, otherwise look more closely.  */
4280   if (TARGET_ARM)
4281     {
4282       for (i = 0; i < 32; i += 2)
4283         {
4284           int consecutive_zeros = 0;
4285
4286           if (!(val & (3 << i)))
4287             {
4288               while ((i < 32) && !(val & (3 << i)))
4289                 {
4290                   consecutive_zeros += 2;
4291                   i += 2;
4292                 }
4293               if (consecutive_zeros > best_consecutive_zeros)
4294                 {
4295                   best_consecutive_zeros = consecutive_zeros;
4296                   best_start = i - consecutive_zeros;
4297                 }
4298               i -= 2;
4299             }
4300         }
4301     }
4302
4303   /* So long as it won't require any more insns to do so, it's
4304      desirable to emit a small constant (in bits 0...9) in the last
4305      insn.  This way there is more chance that it can be combined with
4306      a later addressing insn to form a pre-indexed load or store
4307      operation.  Consider:
4308
4309            *((volatile int *)0xe0000100) = 1;
4310            *((volatile int *)0xe0000110) = 2;
4311
4312      We want this to wind up as:
4313
4314             mov rA, #0xe0000000
4315             mov rB, #1
4316             str rB, [rA, #0x100]
4317             mov rB, #2
4318             str rB, [rA, #0x110]
4319
4320      rather than having to synthesize both large constants from scratch.
4321
4322      Therefore, we calculate how many insns would be required to emit
4323      the constant starting from `best_start', and also starting from
4324      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4325      yield a shorter sequence, we may as well use zero.  */
4326   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4327   if (best_start != 0
4328       && ((HOST_WIDE_INT_1U << best_start) < val))
4329     {
4330       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4331       if (insns2 <= insns1)
4332         {
4333           *return_sequence = tmp_sequence;
4334           insns1 = insns2;
4335         }
4336     }
4337
4338   return insns1;
4339 }
4340
4341 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4342 static int
4343 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4344                              struct four_ints *return_sequence, int i)
4345 {
4346   int remainder = val & 0xffffffff;
4347   int insns = 0;
4348
4349   /* Try and find a way of doing the job in either two or three
4350      instructions.
4351
4352      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4353      location.  We start at position I.  This may be the MSB, or
4354      optimial_immediate_sequence may have positioned it at the largest block
4355      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4356      wrapping around to the top of the word when we drop off the bottom.
4357      In the worst case this code should produce no more than four insns.
4358
4359      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4360      constants, shifted to any arbitrary location.  We should always start
4361      at the MSB.  */
4362   do
4363     {
4364       int end;
4365       unsigned int b1, b2, b3, b4;
4366       unsigned HOST_WIDE_INT result;
4367       int loc;
4368
4369       gcc_assert (insns < 4);
4370
4371       if (i <= 0)
4372         i += 32;
4373
4374       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4375       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4376         {
4377           loc = i;
4378           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4379             /* We can use addw/subw for the last 12 bits.  */
4380             result = remainder;
4381           else
4382             {
4383               /* Use an 8-bit shifted/rotated immediate.  */
4384               end = i - 8;
4385               if (end < 0)
4386                 end += 32;
4387               result = remainder & ((0x0ff << end)
4388                                    | ((i < end) ? (0xff >> (32 - end))
4389                                                 : 0));
4390               i -= 8;
4391             }
4392         }
4393       else
4394         {
4395           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4396              arbitrary shifts.  */
4397           i -= TARGET_ARM ? 2 : 1;
4398           continue;
4399         }
4400
4401       /* Next, see if we can do a better job with a thumb2 replicated
4402          constant.
4403
4404          We do it this way around to catch the cases like 0x01F001E0 where
4405          two 8-bit immediates would work, but a replicated constant would
4406          make it worse.
4407
4408          TODO: 16-bit constants that don't clear all the bits, but still win.
4409          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4410       if (TARGET_THUMB2)
4411         {
4412           b1 = (remainder & 0xff000000) >> 24;
4413           b2 = (remainder & 0x00ff0000) >> 16;
4414           b3 = (remainder & 0x0000ff00) >> 8;
4415           b4 = remainder & 0xff;
4416
4417           if (loc > 24)
4418             {
4419               /* The 8-bit immediate already found clears b1 (and maybe b2),
4420                  but must leave b3 and b4 alone.  */
4421
4422               /* First try to find a 32-bit replicated constant that clears
4423                  almost everything.  We can assume that we can't do it in one,
4424                  or else we wouldn't be here.  */
4425               unsigned int tmp = b1 & b2 & b3 & b4;
4426               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4427                                   + (tmp << 24);
4428               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4429                                             + (tmp == b3) + (tmp == b4);
4430               if (tmp
4431                   && (matching_bytes >= 3
4432                       || (matching_bytes == 2
4433                           && const_ok_for_op (remainder & ~tmp2, code))))
4434                 {
4435                   /* At least 3 of the bytes match, and the fourth has at
4436                      least as many bits set, or two of the bytes match
4437                      and it will only require one more insn to finish.  */
4438                   result = tmp2;
4439                   i = tmp != b1 ? 32
4440                       : tmp != b2 ? 24
4441                       : tmp != b3 ? 16
4442                       : 8;
4443                 }
4444
4445               /* Second, try to find a 16-bit replicated constant that can
4446                  leave three of the bytes clear.  If b2 or b4 is already
4447                  zero, then we can.  If the 8-bit from above would not
4448                  clear b2 anyway, then we still win.  */
4449               else if (b1 == b3 && (!b2 || !b4
4450                                || (remainder & 0x00ff0000 & ~result)))
4451                 {
4452                   result = remainder & 0xff00ff00;
4453                   i = 24;
4454                 }
4455             }
4456           else if (loc > 16)
4457             {
4458               /* The 8-bit immediate already found clears b2 (and maybe b3)
4459                  and we don't get here unless b1 is alredy clear, but it will
4460                  leave b4 unchanged.  */
4461
4462               /* If we can clear b2 and b4 at once, then we win, since the
4463                  8-bits couldn't possibly reach that far.  */
4464               if (b2 == b4)
4465                 {
4466                   result = remainder & 0x00ff00ff;
4467                   i = 16;
4468                 }
4469             }
4470         }
4471
4472       return_sequence->i[insns++] = result;
4473       remainder &= ~result;
4474
4475       if (code == SET || code == MINUS)
4476         code = PLUS;
4477     }
4478   while (remainder);
4479
4480   return insns;
4481 }
4482
4483 /* Emit an instruction with the indicated PATTERN.  If COND is
4484    non-NULL, conditionalize the execution of the instruction on COND
4485    being true.  */
4486
4487 static void
4488 emit_constant_insn (rtx cond, rtx pattern)
4489 {
4490   if (cond)
4491     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4492   emit_insn (pattern);
4493 }
4494
4495 /* As above, but extra parameter GENERATE which, if clear, suppresses
4496    RTL generation.  */
4497
4498 static int
4499 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4500                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4501                   int subtargets, int generate)
4502 {
4503   int can_invert = 0;
4504   int can_negate = 0;
4505   int final_invert = 0;
4506   int i;
4507   int set_sign_bit_copies = 0;
4508   int clear_sign_bit_copies = 0;
4509   int clear_zero_bit_copies = 0;
4510   int set_zero_bit_copies = 0;
4511   int insns = 0, neg_insns, inv_insns;
4512   unsigned HOST_WIDE_INT temp1, temp2;
4513   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4514   struct four_ints *immediates;
4515   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4516
4517   /* Find out which operations are safe for a given CODE.  Also do a quick
4518      check for degenerate cases; these can occur when DImode operations
4519      are split.  */
4520   switch (code)
4521     {
4522     case SET:
4523       can_invert = 1;
4524       break;
4525
4526     case PLUS:
4527       can_negate = 1;
4528       break;
4529
4530     case IOR:
4531       if (remainder == 0xffffffff)
4532         {
4533           if (generate)
4534             emit_constant_insn (cond,
4535                                 gen_rtx_SET (target,
4536                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4537           return 1;
4538         }
4539
4540       if (remainder == 0)
4541         {
4542           if (reload_completed && rtx_equal_p (target, source))
4543             return 0;
4544
4545           if (generate)
4546             emit_constant_insn (cond, gen_rtx_SET (target, source));
4547           return 1;
4548         }
4549       break;
4550
4551     case AND:
4552       if (remainder == 0)
4553         {
4554           if (generate)
4555             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4556           return 1;
4557         }
4558       if (remainder == 0xffffffff)
4559         {
4560           if (reload_completed && rtx_equal_p (target, source))
4561             return 0;
4562           if (generate)
4563             emit_constant_insn (cond, gen_rtx_SET (target, source));
4564           return 1;
4565         }
4566       can_invert = 1;
4567       break;
4568
4569     case XOR:
4570       if (remainder == 0)
4571         {
4572           if (reload_completed && rtx_equal_p (target, source))
4573             return 0;
4574           if (generate)
4575             emit_constant_insn (cond, gen_rtx_SET (target, source));
4576           return 1;
4577         }
4578
4579       if (remainder == 0xffffffff)
4580         {
4581           if (generate)
4582             emit_constant_insn (cond,
4583                                 gen_rtx_SET (target,
4584                                              gen_rtx_NOT (mode, source)));
4585           return 1;
4586         }
4587       final_invert = 1;
4588       break;
4589
4590     case MINUS:
4591       /* We treat MINUS as (val - source), since (source - val) is always
4592          passed as (source + (-val)).  */
4593       if (remainder == 0)
4594         {
4595           if (generate)
4596             emit_constant_insn (cond,
4597                                 gen_rtx_SET (target,
4598                                              gen_rtx_NEG (mode, source)));
4599           return 1;
4600         }
4601       if (const_ok_for_arm (val))
4602         {
4603           if (generate)
4604             emit_constant_insn (cond,
4605                                 gen_rtx_SET (target,
4606                                              gen_rtx_MINUS (mode, GEN_INT (val),
4607                                                             source)));
4608           return 1;
4609         }
4610
4611       break;
4612
4613     default:
4614       gcc_unreachable ();
4615     }
4616
4617   /* If we can do it in one insn get out quickly.  */
4618   if (const_ok_for_op (val, code))
4619     {
4620       if (generate)
4621         emit_constant_insn (cond,
4622                             gen_rtx_SET (target,
4623                                          (source
4624                                           ? gen_rtx_fmt_ee (code, mode, source,
4625                                                             GEN_INT (val))
4626                                           : GEN_INT (val))));
4627       return 1;
4628     }
4629
4630   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4631      insn.  */
4632   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4633       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4634     {
4635       if (generate)
4636         {
4637           if (mode == SImode && i == 16)
4638             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4639                smaller insn.  */
4640             emit_constant_insn (cond,
4641                                 gen_zero_extendhisi2
4642                                 (target, gen_lowpart (HImode, source)));
4643           else
4644             /* Extz only supports SImode, but we can coerce the operands
4645                into that mode.  */
4646             emit_constant_insn (cond,
4647                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4648                                               gen_lowpart (SImode, source),
4649                                               GEN_INT (i), const0_rtx));
4650         }
4651
4652       return 1;
4653     }
4654
4655   /* Calculate a few attributes that may be useful for specific
4656      optimizations.  */
4657   /* Count number of leading zeros.  */
4658   for (i = 31; i >= 0; i--)
4659     {
4660       if ((remainder & (1 << i)) == 0)
4661         clear_sign_bit_copies++;
4662       else
4663         break;
4664     }
4665
4666   /* Count number of leading 1's.  */
4667   for (i = 31; i >= 0; i--)
4668     {
4669       if ((remainder & (1 << i)) != 0)
4670         set_sign_bit_copies++;
4671       else
4672         break;
4673     }
4674
4675   /* Count number of trailing zero's.  */
4676   for (i = 0; i <= 31; i++)
4677     {
4678       if ((remainder & (1 << i)) == 0)
4679         clear_zero_bit_copies++;
4680       else
4681         break;
4682     }
4683
4684   /* Count number of trailing 1's.  */
4685   for (i = 0; i <= 31; i++)
4686     {
4687       if ((remainder & (1 << i)) != 0)
4688         set_zero_bit_copies++;
4689       else
4690         break;
4691     }
4692
4693   switch (code)
4694     {
4695     case SET:
4696       /* See if we can do this by sign_extending a constant that is known
4697          to be negative.  This is a good, way of doing it, since the shift
4698          may well merge into a subsequent insn.  */
4699       if (set_sign_bit_copies > 1)
4700         {
4701           if (const_ok_for_arm
4702               (temp1 = ARM_SIGN_EXTEND (remainder
4703                                         << (set_sign_bit_copies - 1))))
4704             {
4705               if (generate)
4706                 {
4707                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4708                   emit_constant_insn (cond,
4709                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4710                   emit_constant_insn (cond,
4711                                       gen_ashrsi3 (target, new_src,
4712                                                    GEN_INT (set_sign_bit_copies - 1)));
4713                 }
4714               return 2;
4715             }
4716           /* For an inverted constant, we will need to set the low bits,
4717              these will be shifted out of harm's way.  */
4718           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4719           if (const_ok_for_arm (~temp1))
4720             {
4721               if (generate)
4722                 {
4723                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4724                   emit_constant_insn (cond,
4725                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4726                   emit_constant_insn (cond,
4727                                       gen_ashrsi3 (target, new_src,
4728                                                    GEN_INT (set_sign_bit_copies - 1)));
4729                 }
4730               return 2;
4731             }
4732         }
4733
4734       /* See if we can calculate the value as the difference between two
4735          valid immediates.  */
4736       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4737         {
4738           int topshift = clear_sign_bit_copies & ~1;
4739
4740           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4741                                    & (0xff000000 >> topshift));
4742
4743           /* If temp1 is zero, then that means the 9 most significant
4744              bits of remainder were 1 and we've caused it to overflow.
4745              When topshift is 0 we don't need to do anything since we
4746              can borrow from 'bit 32'.  */
4747           if (temp1 == 0 && topshift != 0)
4748             temp1 = 0x80000000 >> (topshift - 1);
4749
4750           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4751
4752           if (const_ok_for_arm (temp2))
4753             {
4754               if (generate)
4755                 {
4756                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4757                   emit_constant_insn (cond,
4758                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4759                   emit_constant_insn (cond,
4760                                       gen_addsi3 (target, new_src,
4761                                                   GEN_INT (-temp2)));
4762                 }
4763
4764               return 2;
4765             }
4766         }
4767
4768       /* See if we can generate this by setting the bottom (or the top)
4769          16 bits, and then shifting these into the other half of the
4770          word.  We only look for the simplest cases, to do more would cost
4771          too much.  Be careful, however, not to generate this when the
4772          alternative would take fewer insns.  */
4773       if (val & 0xffff0000)
4774         {
4775           temp1 = remainder & 0xffff0000;
4776           temp2 = remainder & 0x0000ffff;
4777
4778           /* Overlaps outside this range are best done using other methods.  */
4779           for (i = 9; i < 24; i++)
4780             {
4781               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4782                   && !const_ok_for_arm (temp2))
4783                 {
4784                   rtx new_src = (subtargets
4785                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4786                                  : target);
4787                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4788                                             source, subtargets, generate);
4789                   source = new_src;
4790                   if (generate)
4791                     emit_constant_insn
4792                       (cond,
4793                        gen_rtx_SET
4794                        (target,
4795                         gen_rtx_IOR (mode,
4796                                      gen_rtx_ASHIFT (mode, source,
4797                                                      GEN_INT (i)),
4798                                      source)));
4799                   return insns + 1;
4800                 }
4801             }
4802
4803           /* Don't duplicate cases already considered.  */
4804           for (i = 17; i < 24; i++)
4805             {
4806               if (((temp1 | (temp1 >> i)) == remainder)
4807                   && !const_ok_for_arm (temp1))
4808                 {
4809                   rtx new_src = (subtargets
4810                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4811                                  : target);
4812                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4813                                             source, subtargets, generate);
4814                   source = new_src;
4815                   if (generate)
4816                     emit_constant_insn
4817                       (cond,
4818                        gen_rtx_SET (target,
4819                                     gen_rtx_IOR
4820                                     (mode,
4821                                      gen_rtx_LSHIFTRT (mode, source,
4822                                                        GEN_INT (i)),
4823                                      source)));
4824                   return insns + 1;
4825                 }
4826             }
4827         }
4828       break;
4829
4830     case IOR:
4831     case XOR:
4832       /* If we have IOR or XOR, and the constant can be loaded in a
4833          single instruction, and we can find a temporary to put it in,
4834          then this can be done in two instructions instead of 3-4.  */
4835       if (subtargets
4836           /* TARGET can't be NULL if SUBTARGETS is 0 */
4837           || (reload_completed && !reg_mentioned_p (target, source)))
4838         {
4839           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4840             {
4841               if (generate)
4842                 {
4843                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4844
4845                   emit_constant_insn (cond,
4846                                       gen_rtx_SET (sub, GEN_INT (val)));
4847                   emit_constant_insn (cond,
4848                                       gen_rtx_SET (target,
4849                                                    gen_rtx_fmt_ee (code, mode,
4850                                                                    source, sub)));
4851                 }
4852               return 2;
4853             }
4854         }
4855
4856       if (code == XOR)
4857         break;
4858
4859       /*  Convert.
4860           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4861                              and the remainder 0s for e.g. 0xfff00000)
4862           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4863
4864           This can be done in 2 instructions by using shifts with mov or mvn.
4865           e.g. for
4866           x = x | 0xfff00000;
4867           we generate.
4868           mvn   r0, r0, asl #12
4869           mvn   r0, r0, lsr #12  */
4870       if (set_sign_bit_copies > 8
4871           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4872         {
4873           if (generate)
4874             {
4875               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4876               rtx shift = GEN_INT (set_sign_bit_copies);
4877
4878               emit_constant_insn
4879                 (cond,
4880                  gen_rtx_SET (sub,
4881                               gen_rtx_NOT (mode,
4882                                            gen_rtx_ASHIFT (mode,
4883                                                            source,
4884                                                            shift))));
4885               emit_constant_insn
4886                 (cond,
4887                  gen_rtx_SET (target,
4888                               gen_rtx_NOT (mode,
4889                                            gen_rtx_LSHIFTRT (mode, sub,
4890                                                              shift))));
4891             }
4892           return 2;
4893         }
4894
4895       /* Convert
4896           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4897            to
4898           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4899
4900           For eg. r0 = r0 | 0xfff
4901                mvn      r0, r0, lsr #12
4902                mvn      r0, r0, asl #12
4903
4904       */
4905       if (set_zero_bit_copies > 8
4906           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4907         {
4908           if (generate)
4909             {
4910               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4911               rtx shift = GEN_INT (set_zero_bit_copies);
4912
4913               emit_constant_insn
4914                 (cond,
4915                  gen_rtx_SET (sub,
4916                               gen_rtx_NOT (mode,
4917                                            gen_rtx_LSHIFTRT (mode,
4918                                                              source,
4919                                                              shift))));
4920               emit_constant_insn
4921                 (cond,
4922                  gen_rtx_SET (target,
4923                               gen_rtx_NOT (mode,
4924                                            gen_rtx_ASHIFT (mode, sub,
4925                                                            shift))));
4926             }
4927           return 2;
4928         }
4929
4930       /* This will never be reached for Thumb2 because orn is a valid
4931          instruction. This is for Thumb1 and the ARM 32 bit cases.
4932
4933          x = y | constant (such that ~constant is a valid constant)
4934          Transform this to
4935          x = ~(~y & ~constant).
4936       */
4937       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4938         {
4939           if (generate)
4940             {
4941               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4942               emit_constant_insn (cond,
4943                                   gen_rtx_SET (sub,
4944                                                gen_rtx_NOT (mode, source)));
4945               source = sub;
4946               if (subtargets)
4947                 sub = gen_reg_rtx (mode);
4948               emit_constant_insn (cond,
4949                                   gen_rtx_SET (sub,
4950                                                gen_rtx_AND (mode, source,
4951                                                             GEN_INT (temp1))));
4952               emit_constant_insn (cond,
4953                                   gen_rtx_SET (target,
4954                                                gen_rtx_NOT (mode, sub)));
4955             }
4956           return 3;
4957         }
4958       break;
4959
4960     case AND:
4961       /* See if two shifts will do 2 or more insn's worth of work.  */
4962       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4963         {
4964           HOST_WIDE_INT shift_mask = ((0xffffffff
4965                                        << (32 - clear_sign_bit_copies))
4966                                       & 0xffffffff);
4967
4968           if ((remainder | shift_mask) != 0xffffffff)
4969             {
4970               HOST_WIDE_INT new_val
4971                 = ARM_SIGN_EXTEND (remainder | shift_mask);
4972
4973               if (generate)
4974                 {
4975                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4976                   insns = arm_gen_constant (AND, SImode, cond, new_val,
4977                                             new_src, source, subtargets, 1);
4978                   source = new_src;
4979                 }
4980               else
4981                 {
4982                   rtx targ = subtargets ? NULL_RTX : target;
4983                   insns = arm_gen_constant (AND, mode, cond, new_val,
4984                                             targ, source, subtargets, 0);
4985                 }
4986             }
4987
4988           if (generate)
4989             {
4990               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4991               rtx shift = GEN_INT (clear_sign_bit_copies);
4992
4993               emit_insn (gen_ashlsi3 (new_src, source, shift));
4994               emit_insn (gen_lshrsi3 (target, new_src, shift));
4995             }
4996
4997           return insns + 2;
4998         }
4999
5000       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5001         {
5002           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5003
5004           if ((remainder | shift_mask) != 0xffffffff)
5005             {
5006               HOST_WIDE_INT new_val
5007                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5008               if (generate)
5009                 {
5010                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5011
5012                   insns = arm_gen_constant (AND, mode, cond, new_val,
5013                                             new_src, source, subtargets, 1);
5014                   source = new_src;
5015                 }
5016               else
5017                 {
5018                   rtx targ = subtargets ? NULL_RTX : target;
5019
5020                   insns = arm_gen_constant (AND, mode, cond, new_val,
5021                                             targ, source, subtargets, 0);
5022                 }
5023             }
5024
5025           if (generate)
5026             {
5027               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5028               rtx shift = GEN_INT (clear_zero_bit_copies);
5029
5030               emit_insn (gen_lshrsi3 (new_src, source, shift));
5031               emit_insn (gen_ashlsi3 (target, new_src, shift));
5032             }
5033
5034           return insns + 2;
5035         }
5036
5037       break;
5038
5039     default:
5040       break;
5041     }
5042
5043   /* Calculate what the instruction sequences would be if we generated it
5044      normally, negated, or inverted.  */
5045   if (code == AND)
5046     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5047     insns = 99;
5048   else
5049     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5050
5051   if (can_negate)
5052     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5053                                             &neg_immediates);
5054   else
5055     neg_insns = 99;
5056
5057   if (can_invert || final_invert)
5058     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5059                                             &inv_immediates);
5060   else
5061     inv_insns = 99;
5062
5063   immediates = &pos_immediates;
5064
5065   /* Is the negated immediate sequence more efficient?  */
5066   if (neg_insns < insns && neg_insns <= inv_insns)
5067     {
5068       insns = neg_insns;
5069       immediates = &neg_immediates;
5070     }
5071   else
5072     can_negate = 0;
5073
5074   /* Is the inverted immediate sequence more efficient?
5075      We must allow for an extra NOT instruction for XOR operations, although
5076      there is some chance that the final 'mvn' will get optimized later.  */
5077   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5078     {
5079       insns = inv_insns;
5080       immediates = &inv_immediates;
5081     }
5082   else
5083     {
5084       can_invert = 0;
5085       final_invert = 0;
5086     }
5087
5088   /* Now output the chosen sequence as instructions.  */
5089   if (generate)
5090     {
5091       for (i = 0; i < insns; i++)
5092         {
5093           rtx new_src, temp1_rtx;
5094
5095           temp1 = immediates->i[i];
5096
5097           if (code == SET || code == MINUS)
5098             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5099           else if ((final_invert || i < (insns - 1)) && subtargets)
5100             new_src = gen_reg_rtx (mode);
5101           else
5102             new_src = target;
5103
5104           if (can_invert)
5105             temp1 = ~temp1;
5106           else if (can_negate)
5107             temp1 = -temp1;
5108
5109           temp1 = trunc_int_for_mode (temp1, mode);
5110           temp1_rtx = GEN_INT (temp1);
5111
5112           if (code == SET)
5113             ;
5114           else if (code == MINUS)
5115             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5116           else
5117             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5118
5119           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5120           source = new_src;
5121
5122           if (code == SET)
5123             {
5124               can_negate = can_invert;
5125               can_invert = 0;
5126               code = PLUS;
5127             }
5128           else if (code == MINUS)
5129             code = PLUS;
5130         }
5131     }
5132
5133   if (final_invert)
5134     {
5135       if (generate)
5136         emit_constant_insn (cond, gen_rtx_SET (target,
5137                                                gen_rtx_NOT (mode, source)));
5138       insns++;
5139     }
5140
5141   return insns;
5142 }
5143
5144 /* Canonicalize a comparison so that we are more likely to recognize it.
5145    This can be done for a few constant compares, where we can make the
5146    immediate value easier to load.  */
5147
5148 static void
5149 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5150                              bool op0_preserve_value)
5151 {
5152   machine_mode mode;
5153   unsigned HOST_WIDE_INT i, maxval;
5154
5155   mode = GET_MODE (*op0);
5156   if (mode == VOIDmode)
5157     mode = GET_MODE (*op1);
5158
5159   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5160
5161   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5162      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5163      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5164      for GTU/LEU in Thumb mode.  */
5165   if (mode == DImode)
5166     {
5167
5168       if (*code == GT || *code == LE
5169           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5170         {
5171           /* Missing comparison.  First try to use an available
5172              comparison.  */
5173           if (CONST_INT_P (*op1))
5174             {
5175               i = INTVAL (*op1);
5176               switch (*code)
5177                 {
5178                 case GT:
5179                 case LE:
5180                   if (i != maxval
5181                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5182                     {
5183                       *op1 = GEN_INT (i + 1);
5184                       *code = *code == GT ? GE : LT;
5185                       return;
5186                     }
5187                   break;
5188                 case GTU:
5189                 case LEU:
5190                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5191                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5192                     {
5193                       *op1 = GEN_INT (i + 1);
5194                       *code = *code == GTU ? GEU : LTU;
5195                       return;
5196                     }
5197                   break;
5198                 default:
5199                   gcc_unreachable ();
5200                 }
5201             }
5202
5203           /* If that did not work, reverse the condition.  */
5204           if (!op0_preserve_value)
5205             {
5206               std::swap (*op0, *op1);
5207               *code = (int)swap_condition ((enum rtx_code)*code);
5208             }
5209         }
5210       return;
5211     }
5212
5213   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5214      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5215      to facilitate possible combining with a cmp into 'ands'.  */
5216   if (mode == SImode
5217       && GET_CODE (*op0) == ZERO_EXTEND
5218       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5219       && GET_MODE (XEXP (*op0, 0)) == QImode
5220       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5221       && subreg_lowpart_p (XEXP (*op0, 0))
5222       && *op1 == const0_rtx)
5223     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5224                         GEN_INT (255));
5225
5226   /* Comparisons smaller than DImode.  Only adjust comparisons against
5227      an out-of-range constant.  */
5228   if (!CONST_INT_P (*op1)
5229       || const_ok_for_arm (INTVAL (*op1))
5230       || const_ok_for_arm (- INTVAL (*op1)))
5231     return;
5232
5233   i = INTVAL (*op1);
5234
5235   switch (*code)
5236     {
5237     case EQ:
5238     case NE:
5239       return;
5240
5241     case GT:
5242     case LE:
5243       if (i != maxval
5244           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5245         {
5246           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5247           *code = *code == GT ? GE : LT;
5248           return;
5249         }
5250       break;
5251
5252     case GE:
5253     case LT:
5254       if (i != ~maxval
5255           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5256         {
5257           *op1 = GEN_INT (i - 1);
5258           *code = *code == GE ? GT : LE;
5259           return;
5260         }
5261       break;
5262
5263     case GTU:
5264     case LEU:
5265       if (i != ~((unsigned HOST_WIDE_INT) 0)
5266           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5267         {
5268           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5269           *code = *code == GTU ? GEU : LTU;
5270           return;
5271         }
5272       break;
5273
5274     case GEU:
5275     case LTU:
5276       if (i != 0
5277           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5278         {
5279           *op1 = GEN_INT (i - 1);
5280           *code = *code == GEU ? GTU : LEU;
5281           return;
5282         }
5283       break;
5284
5285     default:
5286       gcc_unreachable ();
5287     }
5288 }
5289
5290
5291 /* Define how to find the value returned by a function.  */
5292
5293 static rtx
5294 arm_function_value(const_tree type, const_tree func,
5295                    bool outgoing ATTRIBUTE_UNUSED)
5296 {
5297   machine_mode mode;
5298   int unsignedp ATTRIBUTE_UNUSED;
5299   rtx r ATTRIBUTE_UNUSED;
5300
5301   mode = TYPE_MODE (type);
5302
5303   if (TARGET_AAPCS_BASED)
5304     return aapcs_allocate_return_reg (mode, type, func);
5305
5306   /* Promote integer types.  */
5307   if (INTEGRAL_TYPE_P (type))
5308     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5309
5310   /* Promotes small structs returned in a register to full-word size
5311      for big-endian AAPCS.  */
5312   if (arm_return_in_msb (type))
5313     {
5314       HOST_WIDE_INT size = int_size_in_bytes (type);
5315       if (size % UNITS_PER_WORD != 0)
5316         {
5317           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5318           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5319         }
5320     }
5321
5322   return arm_libcall_value_1 (mode);
5323 }
5324
5325 /* libcall hashtable helpers.  */
5326
5327 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5328 {
5329   static inline hashval_t hash (const rtx_def *);
5330   static inline bool equal (const rtx_def *, const rtx_def *);
5331   static inline void remove (rtx_def *);
5332 };
5333
5334 inline bool
5335 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5336 {
5337   return rtx_equal_p (p1, p2);
5338 }
5339
5340 inline hashval_t
5341 libcall_hasher::hash (const rtx_def *p1)
5342 {
5343   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5344 }
5345
5346 typedef hash_table<libcall_hasher> libcall_table_type;
5347
5348 static void
5349 add_libcall (libcall_table_type *htab, rtx libcall)
5350 {
5351   *htab->find_slot (libcall, INSERT) = libcall;
5352 }
5353
5354 static bool
5355 arm_libcall_uses_aapcs_base (const_rtx libcall)
5356 {
5357   static bool init_done = false;
5358   static libcall_table_type *libcall_htab = NULL;
5359
5360   if (!init_done)
5361     {
5362       init_done = true;
5363
5364       libcall_htab = new libcall_table_type (31);
5365       add_libcall (libcall_htab,
5366                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5367       add_libcall (libcall_htab,
5368                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5369       add_libcall (libcall_htab,
5370                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5371       add_libcall (libcall_htab,
5372                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5373
5374       add_libcall (libcall_htab,
5375                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5376       add_libcall (libcall_htab,
5377                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5378       add_libcall (libcall_htab,
5379                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5380       add_libcall (libcall_htab,
5381                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5382
5383       add_libcall (libcall_htab,
5384                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5385       add_libcall (libcall_htab,
5386                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5387       add_libcall (libcall_htab,
5388                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5389       add_libcall (libcall_htab,
5390                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5391       add_libcall (libcall_htab,
5392                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5393       add_libcall (libcall_htab,
5394                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5395       add_libcall (libcall_htab,
5396                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5397       add_libcall (libcall_htab,
5398                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5399
5400       /* Values from double-precision helper functions are returned in core
5401          registers if the selected core only supports single-precision
5402          arithmetic, even if we are using the hard-float ABI.  The same is
5403          true for single-precision helpers, but we will never be using the
5404          hard-float ABI on a CPU which doesn't support single-precision
5405          operations in hardware.  */
5406       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5407       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5408       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5409       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5410       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5411       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5412       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5413       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5414       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5415       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5416       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5417       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5418                                                         SFmode));
5419       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5420                                                         DFmode));
5421       add_libcall (libcall_htab,
5422                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5423     }
5424
5425   return libcall && libcall_htab->find (libcall) != NULL;
5426 }
5427
5428 static rtx
5429 arm_libcall_value_1 (machine_mode mode)
5430 {
5431   if (TARGET_AAPCS_BASED)
5432     return aapcs_libcall_value (mode);
5433   else if (TARGET_IWMMXT_ABI
5434            && arm_vector_mode_supported_p (mode))
5435     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5436   else
5437     return gen_rtx_REG (mode, ARG_REGISTER (1));
5438 }
5439
5440 /* Define how to find the value returned by a library function
5441    assuming the value has mode MODE.  */
5442
5443 static rtx
5444 arm_libcall_value (machine_mode mode, const_rtx libcall)
5445 {
5446   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5447       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5448     {
5449       /* The following libcalls return their result in integer registers,
5450          even though they return a floating point value.  */
5451       if (arm_libcall_uses_aapcs_base (libcall))
5452         return gen_rtx_REG (mode, ARG_REGISTER(1));
5453
5454     }
5455
5456   return arm_libcall_value_1 (mode);
5457 }
5458
5459 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5460
5461 static bool
5462 arm_function_value_regno_p (const unsigned int regno)
5463 {
5464   if (regno == ARG_REGISTER (1)
5465       || (TARGET_32BIT
5466           && TARGET_AAPCS_BASED
5467           && TARGET_HARD_FLOAT
5468           && regno == FIRST_VFP_REGNUM)
5469       || (TARGET_IWMMXT_ABI
5470           && regno == FIRST_IWMMXT_REGNUM))
5471     return true;
5472
5473   return false;
5474 }
5475
5476 /* Determine the amount of memory needed to store the possible return
5477    registers of an untyped call.  */
5478 int
5479 arm_apply_result_size (void)
5480 {
5481   int size = 16;
5482
5483   if (TARGET_32BIT)
5484     {
5485       if (TARGET_HARD_FLOAT_ABI)
5486         size += 32;
5487       if (TARGET_IWMMXT_ABI)
5488         size += 8;
5489     }
5490
5491   return size;
5492 }
5493
5494 /* Decide whether TYPE should be returned in memory (true)
5495    or in a register (false).  FNTYPE is the type of the function making
5496    the call.  */
5497 static bool
5498 arm_return_in_memory (const_tree type, const_tree fntype)
5499 {
5500   HOST_WIDE_INT size;
5501
5502   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5503
5504   if (TARGET_AAPCS_BASED)
5505     {
5506       /* Simple, non-aggregate types (ie not including vectors and
5507          complex) are always returned in a register (or registers).
5508          We don't care about which register here, so we can short-cut
5509          some of the detail.  */
5510       if (!AGGREGATE_TYPE_P (type)
5511           && TREE_CODE (type) != VECTOR_TYPE
5512           && TREE_CODE (type) != COMPLEX_TYPE)
5513         return false;
5514
5515       /* Any return value that is no larger than one word can be
5516          returned in r0.  */
5517       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5518         return false;
5519
5520       /* Check any available co-processors to see if they accept the
5521          type as a register candidate (VFP, for example, can return
5522          some aggregates in consecutive registers).  These aren't
5523          available if the call is variadic.  */
5524       if (aapcs_select_return_coproc (type, fntype) >= 0)
5525         return false;
5526
5527       /* Vector values should be returned using ARM registers, not
5528          memory (unless they're over 16 bytes, which will break since
5529          we only have four call-clobbered registers to play with).  */
5530       if (TREE_CODE (type) == VECTOR_TYPE)
5531         return (size < 0 || size > (4 * UNITS_PER_WORD));
5532
5533       /* The rest go in memory.  */
5534       return true;
5535     }
5536
5537   if (TREE_CODE (type) == VECTOR_TYPE)
5538     return (size < 0 || size > (4 * UNITS_PER_WORD));
5539
5540   if (!AGGREGATE_TYPE_P (type) &&
5541       (TREE_CODE (type) != VECTOR_TYPE))
5542     /* All simple types are returned in registers.  */
5543     return false;
5544
5545   if (arm_abi != ARM_ABI_APCS)
5546     {
5547       /* ATPCS and later return aggregate types in memory only if they are
5548          larger than a word (or are variable size).  */
5549       return (size < 0 || size > UNITS_PER_WORD);
5550     }
5551
5552   /* For the arm-wince targets we choose to be compatible with Microsoft's
5553      ARM and Thumb compilers, which always return aggregates in memory.  */
5554 #ifndef ARM_WINCE
5555   /* All structures/unions bigger than one word are returned in memory.
5556      Also catch the case where int_size_in_bytes returns -1.  In this case
5557      the aggregate is either huge or of variable size, and in either case
5558      we will want to return it via memory and not in a register.  */
5559   if (size < 0 || size > UNITS_PER_WORD)
5560     return true;
5561
5562   if (TREE_CODE (type) == RECORD_TYPE)
5563     {
5564       tree field;
5565
5566       /* For a struct the APCS says that we only return in a register
5567          if the type is 'integer like' and every addressable element
5568          has an offset of zero.  For practical purposes this means
5569          that the structure can have at most one non bit-field element
5570          and that this element must be the first one in the structure.  */
5571
5572       /* Find the first field, ignoring non FIELD_DECL things which will
5573          have been created by C++.  */
5574       for (field = TYPE_FIELDS (type);
5575            field && TREE_CODE (field) != FIELD_DECL;
5576            field = DECL_CHAIN (field))
5577         continue;
5578
5579       if (field == NULL)
5580         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5581
5582       /* Check that the first field is valid for returning in a register.  */
5583
5584       /* ... Floats are not allowed */
5585       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5586         return true;
5587
5588       /* ... Aggregates that are not themselves valid for returning in
5589          a register are not allowed.  */
5590       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5591         return true;
5592
5593       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5594          since they are not addressable.  */
5595       for (field = DECL_CHAIN (field);
5596            field;
5597            field = DECL_CHAIN (field))
5598         {
5599           if (TREE_CODE (field) != FIELD_DECL)
5600             continue;
5601
5602           if (!DECL_BIT_FIELD_TYPE (field))
5603             return true;
5604         }
5605
5606       return false;
5607     }
5608
5609   if (TREE_CODE (type) == UNION_TYPE)
5610     {
5611       tree field;
5612
5613       /* Unions can be returned in registers if every element is
5614          integral, or can be returned in an integer register.  */
5615       for (field = TYPE_FIELDS (type);
5616            field;
5617            field = DECL_CHAIN (field))
5618         {
5619           if (TREE_CODE (field) != FIELD_DECL)
5620             continue;
5621
5622           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5623             return true;
5624
5625           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5626             return true;
5627         }
5628
5629       return false;
5630     }
5631 #endif /* not ARM_WINCE */
5632
5633   /* Return all other types in memory.  */
5634   return true;
5635 }
5636
5637 const struct pcs_attribute_arg
5638 {
5639   const char *arg;
5640   enum arm_pcs value;
5641 } pcs_attribute_args[] =
5642   {
5643     {"aapcs", ARM_PCS_AAPCS},
5644     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5645 #if 0
5646     /* We could recognize these, but changes would be needed elsewhere
5647      * to implement them.  */
5648     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5649     {"atpcs", ARM_PCS_ATPCS},
5650     {"apcs", ARM_PCS_APCS},
5651 #endif
5652     {NULL, ARM_PCS_UNKNOWN}
5653   };
5654
5655 static enum arm_pcs
5656 arm_pcs_from_attribute (tree attr)
5657 {
5658   const struct pcs_attribute_arg *ptr;
5659   const char *arg;
5660
5661   /* Get the value of the argument.  */
5662   if (TREE_VALUE (attr) == NULL_TREE
5663       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5664     return ARM_PCS_UNKNOWN;
5665
5666   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5667
5668   /* Check it against the list of known arguments.  */
5669   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5670     if (streq (arg, ptr->arg))
5671       return ptr->value;
5672
5673   /* An unrecognized interrupt type.  */
5674   return ARM_PCS_UNKNOWN;
5675 }
5676
5677 /* Get the PCS variant to use for this call.  TYPE is the function's type
5678    specification, DECL is the specific declartion.  DECL may be null if
5679    the call could be indirect or if this is a library call.  */
5680 static enum arm_pcs
5681 arm_get_pcs_model (const_tree type, const_tree decl)
5682 {
5683   bool user_convention = false;
5684   enum arm_pcs user_pcs = arm_pcs_default;
5685   tree attr;
5686
5687   gcc_assert (type);
5688
5689   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5690   if (attr)
5691     {
5692       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5693       user_convention = true;
5694     }
5695
5696   if (TARGET_AAPCS_BASED)
5697     {
5698       /* Detect varargs functions.  These always use the base rules
5699          (no argument is ever a candidate for a co-processor
5700          register).  */
5701       bool base_rules = stdarg_p (type);
5702
5703       if (user_convention)
5704         {
5705           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5706             sorry ("non-AAPCS derived PCS variant");
5707           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5708             error ("variadic functions must use the base AAPCS variant");
5709         }
5710
5711       if (base_rules)
5712         return ARM_PCS_AAPCS;
5713       else if (user_convention)
5714         return user_pcs;
5715       else if (decl && flag_unit_at_a_time)
5716         {
5717           /* Local functions never leak outside this compilation unit,
5718              so we are free to use whatever conventions are
5719              appropriate.  */
5720           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5721           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5722           if (i && i->local)
5723             return ARM_PCS_AAPCS_LOCAL;
5724         }
5725     }
5726   else if (user_convention && user_pcs != arm_pcs_default)
5727     sorry ("PCS variant");
5728
5729   /* For everything else we use the target's default.  */
5730   return arm_pcs_default;
5731 }
5732
5733
5734 static void
5735 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5736                     const_tree fntype ATTRIBUTE_UNUSED,
5737                     rtx libcall ATTRIBUTE_UNUSED,
5738                     const_tree fndecl ATTRIBUTE_UNUSED)
5739 {
5740   /* Record the unallocated VFP registers.  */
5741   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5742   pcum->aapcs_vfp_reg_alloc = 0;
5743 }
5744
5745 /* Walk down the type tree of TYPE counting consecutive base elements.
5746    If *MODEP is VOIDmode, then set it to the first valid floating point
5747    type.  If a non-floating point type is found, or if a floating point
5748    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5749    otherwise return the count in the sub-tree.  */
5750 static int
5751 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5752 {
5753   machine_mode mode;
5754   HOST_WIDE_INT size;
5755
5756   switch (TREE_CODE (type))
5757     {
5758     case REAL_TYPE:
5759       mode = TYPE_MODE (type);
5760       if (mode != DFmode && mode != SFmode && mode != HFmode)
5761         return -1;
5762
5763       if (*modep == VOIDmode)
5764         *modep = mode;
5765
5766       if (*modep == mode)
5767         return 1;
5768
5769       break;
5770
5771     case COMPLEX_TYPE:
5772       mode = TYPE_MODE (TREE_TYPE (type));
5773       if (mode != DFmode && mode != SFmode)
5774         return -1;
5775
5776       if (*modep == VOIDmode)
5777         *modep = mode;
5778
5779       if (*modep == mode)
5780         return 2;
5781
5782       break;
5783
5784     case VECTOR_TYPE:
5785       /* Use V2SImode and V4SImode as representatives of all 64-bit
5786          and 128-bit vector types, whether or not those modes are
5787          supported with the present options.  */
5788       size = int_size_in_bytes (type);
5789       switch (size)
5790         {
5791         case 8:
5792           mode = V2SImode;
5793           break;
5794         case 16:
5795           mode = V4SImode;
5796           break;
5797         default:
5798           return -1;
5799         }
5800
5801       if (*modep == VOIDmode)
5802         *modep = mode;
5803
5804       /* Vector modes are considered to be opaque: two vectors are
5805          equivalent for the purposes of being homogeneous aggregates
5806          if they are the same size.  */
5807       if (*modep == mode)
5808         return 1;
5809
5810       break;
5811
5812     case ARRAY_TYPE:
5813       {
5814         int count;
5815         tree index = TYPE_DOMAIN (type);
5816
5817         /* Can't handle incomplete types nor sizes that are not
5818            fixed.  */
5819         if (!COMPLETE_TYPE_P (type)
5820             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5821           return -1;
5822
5823         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5824         if (count == -1
5825             || !index
5826             || !TYPE_MAX_VALUE (index)
5827             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5828             || !TYPE_MIN_VALUE (index)
5829             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5830             || count < 0)
5831           return -1;
5832
5833         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5834                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5835
5836         /* There must be no padding.  */
5837         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5838           return -1;
5839
5840         return count;
5841       }
5842
5843     case RECORD_TYPE:
5844       {
5845         int count = 0;
5846         int sub_count;
5847         tree field;
5848
5849         /* Can't handle incomplete types nor sizes that are not
5850            fixed.  */
5851         if (!COMPLETE_TYPE_P (type)
5852             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5853           return -1;
5854
5855         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5856           {
5857             if (TREE_CODE (field) != FIELD_DECL)
5858               continue;
5859
5860             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5861             if (sub_count < 0)
5862               return -1;
5863             count += sub_count;
5864           }
5865
5866         /* There must be no padding.  */
5867         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5868           return -1;
5869
5870         return count;
5871       }
5872
5873     case UNION_TYPE:
5874     case QUAL_UNION_TYPE:
5875       {
5876         /* These aren't very interesting except in a degenerate case.  */
5877         int count = 0;
5878         int sub_count;
5879         tree field;
5880
5881         /* Can't handle incomplete types nor sizes that are not
5882            fixed.  */
5883         if (!COMPLETE_TYPE_P (type)
5884             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5885           return -1;
5886
5887         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5888           {
5889             if (TREE_CODE (field) != FIELD_DECL)
5890               continue;
5891
5892             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5893             if (sub_count < 0)
5894               return -1;
5895             count = count > sub_count ? count : sub_count;
5896           }
5897
5898         /* There must be no padding.  */
5899         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5900           return -1;
5901
5902         return count;
5903       }
5904
5905     default:
5906       break;
5907     }
5908
5909   return -1;
5910 }
5911
5912 /* Return true if PCS_VARIANT should use VFP registers.  */
5913 static bool
5914 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5915 {
5916   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5917     {
5918       static bool seen_thumb1_vfp = false;
5919
5920       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5921         {
5922           sorry ("Thumb-1 hard-float VFP ABI");
5923           /* sorry() is not immediately fatal, so only display this once.  */
5924           seen_thumb1_vfp = true;
5925         }
5926
5927       return true;
5928     }
5929
5930   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5931     return false;
5932
5933   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5934           (TARGET_VFP_DOUBLE || !is_double));
5935 }
5936
5937 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5938    suitable for passing or returning in VFP registers for the PCS
5939    variant selected.  If it is, then *BASE_MODE is updated to contain
5940    a machine mode describing each element of the argument's type and
5941    *COUNT to hold the number of such elements.  */
5942 static bool
5943 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5944                                        machine_mode mode, const_tree type,
5945                                        machine_mode *base_mode, int *count)
5946 {
5947   machine_mode new_mode = VOIDmode;
5948
5949   /* If we have the type information, prefer that to working things
5950      out from the mode.  */
5951   if (type)
5952     {
5953       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5954
5955       if (ag_count > 0 && ag_count <= 4)
5956         *count = ag_count;
5957       else
5958         return false;
5959     }
5960   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5961            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5962            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5963     {
5964       *count = 1;
5965       new_mode = mode;
5966     }
5967   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5968     {
5969       *count = 2;
5970       new_mode = (mode == DCmode ? DFmode : SFmode);
5971     }
5972   else
5973     return false;
5974
5975
5976   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5977     return false;
5978
5979   *base_mode = new_mode;
5980   return true;
5981 }
5982
5983 static bool
5984 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5985                                machine_mode mode, const_tree type)
5986 {
5987   int count ATTRIBUTE_UNUSED;
5988   machine_mode ag_mode ATTRIBUTE_UNUSED;
5989
5990   if (!use_vfp_abi (pcs_variant, false))
5991     return false;
5992   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5993                                                 &ag_mode, &count);
5994 }
5995
5996 static bool
5997 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5998                              const_tree type)
5999 {
6000   if (!use_vfp_abi (pcum->pcs_variant, false))
6001     return false;
6002
6003   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6004                                                 &pcum->aapcs_vfp_rmode,
6005                                                 &pcum->aapcs_vfp_rcount);
6006 }
6007
6008 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6009    for the behaviour of this function.  */
6010
6011 static bool
6012 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6013                     const_tree type  ATTRIBUTE_UNUSED)
6014 {
6015   int rmode_size
6016     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6017   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6018   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6019   int regno;
6020
6021   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6022     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6023       {
6024         pcum->aapcs_vfp_reg_alloc = mask << regno;
6025         if (mode == BLKmode
6026             || (mode == TImode && ! TARGET_NEON)
6027             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6028           {
6029             int i;
6030             int rcount = pcum->aapcs_vfp_rcount;
6031             int rshift = shift;
6032             machine_mode rmode = pcum->aapcs_vfp_rmode;
6033             rtx par;
6034             if (!TARGET_NEON)
6035               {
6036                 /* Avoid using unsupported vector modes.  */
6037                 if (rmode == V2SImode)
6038                   rmode = DImode;
6039                 else if (rmode == V4SImode)
6040                   {
6041                     rmode = DImode;
6042                     rcount *= 2;
6043                     rshift /= 2;
6044                   }
6045               }
6046             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6047             for (i = 0; i < rcount; i++)
6048               {
6049                 rtx tmp = gen_rtx_REG (rmode,
6050                                        FIRST_VFP_REGNUM + regno + i * rshift);
6051                 tmp = gen_rtx_EXPR_LIST
6052                   (VOIDmode, tmp,
6053                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6054                 XVECEXP (par, 0, i) = tmp;
6055               }
6056
6057             pcum->aapcs_reg = par;
6058           }
6059         else
6060           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6061         return true;
6062       }
6063   return false;
6064 }
6065
6066 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6067    comment there for the behaviour of this function.  */
6068
6069 static rtx
6070 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6071                                machine_mode mode,
6072                                const_tree type ATTRIBUTE_UNUSED)
6073 {
6074   if (!use_vfp_abi (pcs_variant, false))
6075     return NULL;
6076
6077   if (mode == BLKmode
6078       || (GET_MODE_CLASS (mode) == MODE_INT
6079           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6080           && !TARGET_NEON))
6081     {
6082       int count;
6083       machine_mode ag_mode;
6084       int i;
6085       rtx par;
6086       int shift;
6087
6088       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6089                                              &ag_mode, &count);
6090
6091       if (!TARGET_NEON)
6092         {
6093           if (ag_mode == V2SImode)
6094             ag_mode = DImode;
6095           else if (ag_mode == V4SImode)
6096             {
6097               ag_mode = DImode;
6098               count *= 2;
6099             }
6100         }
6101       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6102       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6103       for (i = 0; i < count; i++)
6104         {
6105           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6106           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6107                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6108           XVECEXP (par, 0, i) = tmp;
6109         }
6110
6111       return par;
6112     }
6113
6114   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6115 }
6116
6117 static void
6118 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6119                    machine_mode mode  ATTRIBUTE_UNUSED,
6120                    const_tree type  ATTRIBUTE_UNUSED)
6121 {
6122   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6123   pcum->aapcs_vfp_reg_alloc = 0;
6124   return;
6125 }
6126
6127 #define AAPCS_CP(X)                             \
6128   {                                             \
6129     aapcs_ ## X ## _cum_init,                   \
6130     aapcs_ ## X ## _is_call_candidate,          \
6131     aapcs_ ## X ## _allocate,                   \
6132     aapcs_ ## X ## _is_return_candidate,        \
6133     aapcs_ ## X ## _allocate_return_reg,        \
6134     aapcs_ ## X ## _advance                     \
6135   }
6136
6137 /* Table of co-processors that can be used to pass arguments in
6138    registers.  Idealy no arugment should be a candidate for more than
6139    one co-processor table entry, but the table is processed in order
6140    and stops after the first match.  If that entry then fails to put
6141    the argument into a co-processor register, the argument will go on
6142    the stack.  */
6143 static struct
6144 {
6145   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6146   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6147
6148   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6149      BLKmode) is a candidate for this co-processor's registers; this
6150      function should ignore any position-dependent state in
6151      CUMULATIVE_ARGS and only use call-type dependent information.  */
6152   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6153
6154   /* Return true if the argument does get a co-processor register; it
6155      should set aapcs_reg to an RTX of the register allocated as is
6156      required for a return from FUNCTION_ARG.  */
6157   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6158
6159   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6160      be returned in this co-processor's registers.  */
6161   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6162
6163   /* Allocate and return an RTX element to hold the return type of a call.  This
6164      routine must not fail and will only be called if is_return_candidate
6165      returned true with the same parameters.  */
6166   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6167
6168   /* Finish processing this argument and prepare to start processing
6169      the next one.  */
6170   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6171 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6172   {
6173     AAPCS_CP(vfp)
6174   };
6175
6176 #undef AAPCS_CP
6177
6178 static int
6179 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6180                           const_tree type)
6181 {
6182   int i;
6183
6184   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6185     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6186       return i;
6187
6188   return -1;
6189 }
6190
6191 static int
6192 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6193 {
6194   /* We aren't passed a decl, so we can't check that a call is local.
6195      However, it isn't clear that that would be a win anyway, since it
6196      might limit some tail-calling opportunities.  */
6197   enum arm_pcs pcs_variant;
6198
6199   if (fntype)
6200     {
6201       const_tree fndecl = NULL_TREE;
6202
6203       if (TREE_CODE (fntype) == FUNCTION_DECL)
6204         {
6205           fndecl = fntype;
6206           fntype = TREE_TYPE (fntype);
6207         }
6208
6209       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6210     }
6211   else
6212     pcs_variant = arm_pcs_default;
6213
6214   if (pcs_variant != ARM_PCS_AAPCS)
6215     {
6216       int i;
6217
6218       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6219         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6220                                                         TYPE_MODE (type),
6221                                                         type))
6222           return i;
6223     }
6224   return -1;
6225 }
6226
6227 static rtx
6228 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6229                            const_tree fntype)
6230 {
6231   /* We aren't passed a decl, so we can't check that a call is local.
6232      However, it isn't clear that that would be a win anyway, since it
6233      might limit some tail-calling opportunities.  */
6234   enum arm_pcs pcs_variant;
6235   int unsignedp ATTRIBUTE_UNUSED;
6236
6237   if (fntype)
6238     {
6239       const_tree fndecl = NULL_TREE;
6240
6241       if (TREE_CODE (fntype) == FUNCTION_DECL)
6242         {
6243           fndecl = fntype;
6244           fntype = TREE_TYPE (fntype);
6245         }
6246
6247       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6248     }
6249   else
6250     pcs_variant = arm_pcs_default;
6251
6252   /* Promote integer types.  */
6253   if (type && INTEGRAL_TYPE_P (type))
6254     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6255
6256   if (pcs_variant != ARM_PCS_AAPCS)
6257     {
6258       int i;
6259
6260       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6261         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6262                                                         type))
6263           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6264                                                              mode, type);
6265     }
6266
6267   /* Promotes small structs returned in a register to full-word size
6268      for big-endian AAPCS.  */
6269   if (type && arm_return_in_msb (type))
6270     {
6271       HOST_WIDE_INT size = int_size_in_bytes (type);
6272       if (size % UNITS_PER_WORD != 0)
6273         {
6274           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6275           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6276         }
6277     }
6278
6279   return gen_rtx_REG (mode, R0_REGNUM);
6280 }
6281
6282 static rtx
6283 aapcs_libcall_value (machine_mode mode)
6284 {
6285   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6286       && GET_MODE_SIZE (mode) <= 4)
6287     mode = SImode;
6288
6289   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6290 }
6291
6292 /* Lay out a function argument using the AAPCS rules.  The rule
6293    numbers referred to here are those in the AAPCS.  */
6294 static void
6295 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6296                   const_tree type, bool named)
6297 {
6298   int nregs, nregs2;
6299   int ncrn;
6300
6301   /* We only need to do this once per argument.  */
6302   if (pcum->aapcs_arg_processed)
6303     return;
6304
6305   pcum->aapcs_arg_processed = true;
6306
6307   /* Special case: if named is false then we are handling an incoming
6308      anonymous argument which is on the stack.  */
6309   if (!named)
6310     return;
6311
6312   /* Is this a potential co-processor register candidate?  */
6313   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6314     {
6315       int slot = aapcs_select_call_coproc (pcum, mode, type);
6316       pcum->aapcs_cprc_slot = slot;
6317
6318       /* We don't have to apply any of the rules from part B of the
6319          preparation phase, these are handled elsewhere in the
6320          compiler.  */
6321
6322       if (slot >= 0)
6323         {
6324           /* A Co-processor register candidate goes either in its own
6325              class of registers or on the stack.  */
6326           if (!pcum->aapcs_cprc_failed[slot])
6327             {
6328               /* C1.cp - Try to allocate the argument to co-processor
6329                  registers.  */
6330               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6331                 return;
6332
6333               /* C2.cp - Put the argument on the stack and note that we
6334                  can't assign any more candidates in this slot.  We also
6335                  need to note that we have allocated stack space, so that
6336                  we won't later try to split a non-cprc candidate between
6337                  core registers and the stack.  */
6338               pcum->aapcs_cprc_failed[slot] = true;
6339               pcum->can_split = false;
6340             }
6341
6342           /* We didn't get a register, so this argument goes on the
6343              stack.  */
6344           gcc_assert (pcum->can_split == false);
6345           return;
6346         }
6347     }
6348
6349   /* C3 - For double-word aligned arguments, round the NCRN up to the
6350      next even number.  */
6351   ncrn = pcum->aapcs_ncrn;
6352   if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6353     ncrn++;
6354
6355   nregs = ARM_NUM_REGS2(mode, type);
6356
6357   /* Sigh, this test should really assert that nregs > 0, but a GCC
6358      extension allows empty structs and then gives them empty size; it
6359      then allows such a structure to be passed by value.  For some of
6360      the code below we have to pretend that such an argument has
6361      non-zero size so that we 'locate' it correctly either in
6362      registers or on the stack.  */
6363   gcc_assert (nregs >= 0);
6364
6365   nregs2 = nregs ? nregs : 1;
6366
6367   /* C4 - Argument fits entirely in core registers.  */
6368   if (ncrn + nregs2 <= NUM_ARG_REGS)
6369     {
6370       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6371       pcum->aapcs_next_ncrn = ncrn + nregs;
6372       return;
6373     }
6374
6375   /* C5 - Some core registers left and there are no arguments already
6376      on the stack: split this argument between the remaining core
6377      registers and the stack.  */
6378   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6379     {
6380       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6381       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6382       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6383       return;
6384     }
6385
6386   /* C6 - NCRN is set to 4.  */
6387   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6388
6389   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6390   return;
6391 }
6392
6393 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6394    for a call to a function whose data type is FNTYPE.
6395    For a library call, FNTYPE is NULL.  */
6396 void
6397 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6398                           rtx libname,
6399                           tree fndecl ATTRIBUTE_UNUSED)
6400 {
6401   /* Long call handling.  */
6402   if (fntype)
6403     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6404   else
6405     pcum->pcs_variant = arm_pcs_default;
6406
6407   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6408     {
6409       if (arm_libcall_uses_aapcs_base (libname))
6410         pcum->pcs_variant = ARM_PCS_AAPCS;
6411
6412       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6413       pcum->aapcs_reg = NULL_RTX;
6414       pcum->aapcs_partial = 0;
6415       pcum->aapcs_arg_processed = false;
6416       pcum->aapcs_cprc_slot = -1;
6417       pcum->can_split = true;
6418
6419       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6420         {
6421           int i;
6422
6423           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6424             {
6425               pcum->aapcs_cprc_failed[i] = false;
6426               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6427             }
6428         }
6429       return;
6430     }
6431
6432   /* Legacy ABIs */
6433
6434   /* On the ARM, the offset starts at 0.  */
6435   pcum->nregs = 0;
6436   pcum->iwmmxt_nregs = 0;
6437   pcum->can_split = true;
6438
6439   /* Varargs vectors are treated the same as long long.
6440      named_count avoids having to change the way arm handles 'named' */
6441   pcum->named_count = 0;
6442   pcum->nargs = 0;
6443
6444   if (TARGET_REALLY_IWMMXT && fntype)
6445     {
6446       tree fn_arg;
6447
6448       for (fn_arg = TYPE_ARG_TYPES (fntype);
6449            fn_arg;
6450            fn_arg = TREE_CHAIN (fn_arg))
6451         pcum->named_count += 1;
6452
6453       if (! pcum->named_count)
6454         pcum->named_count = INT_MAX;
6455     }
6456 }
6457
6458 /* Return true if mode/type need doubleword alignment.  */
6459 static bool
6460 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6461 {
6462   if (!type)
6463     return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6464
6465   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6466   if (!AGGREGATE_TYPE_P (type))
6467     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6468
6469   /* Array types: Use member alignment of element type.  */
6470   if (TREE_CODE (type) == ARRAY_TYPE)
6471     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6472
6473   /* Record/aggregate types: Use greatest member alignment of any member.  */
6474   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6475     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6476       return true;
6477
6478   return false;
6479 }
6480
6481
6482 /* Determine where to put an argument to a function.
6483    Value is zero to push the argument on the stack,
6484    or a hard register in which to store the argument.
6485
6486    MODE is the argument's machine mode.
6487    TYPE is the data type of the argument (as a tree).
6488     This is null for libcalls where that information may
6489     not be available.
6490    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6491     the preceding args and about the function being called.
6492    NAMED is nonzero if this argument is a named parameter
6493     (otherwise it is an extra parameter matching an ellipsis).
6494
6495    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6496    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6497    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6498    defined), say it is passed in the stack (function_prologue will
6499    indeed make it pass in the stack if necessary).  */
6500
6501 static rtx
6502 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6503                   const_tree type, bool named)
6504 {
6505   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6506   int nregs;
6507
6508   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6509      a call insn (op3 of a call_value insn).  */
6510   if (mode == VOIDmode)
6511     return const0_rtx;
6512
6513   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6514     {
6515       aapcs_layout_arg (pcum, mode, type, named);
6516       return pcum->aapcs_reg;
6517     }
6518
6519   /* Varargs vectors are treated the same as long long.
6520      named_count avoids having to change the way arm handles 'named' */
6521   if (TARGET_IWMMXT_ABI
6522       && arm_vector_mode_supported_p (mode)
6523       && pcum->named_count > pcum->nargs + 1)
6524     {
6525       if (pcum->iwmmxt_nregs <= 9)
6526         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6527       else
6528         {
6529           pcum->can_split = false;
6530           return NULL_RTX;
6531         }
6532     }
6533
6534   /* Put doubleword aligned quantities in even register pairs.  */
6535   if (pcum->nregs & 1
6536       && ARM_DOUBLEWORD_ALIGN
6537       && arm_needs_doubleword_align (mode, type))
6538     pcum->nregs++;
6539
6540   /* Only allow splitting an arg between regs and memory if all preceding
6541      args were allocated to regs.  For args passed by reference we only count
6542      the reference pointer.  */
6543   if (pcum->can_split)
6544     nregs = 1;
6545   else
6546     nregs = ARM_NUM_REGS2 (mode, type);
6547
6548   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6549     return NULL_RTX;
6550
6551   return gen_rtx_REG (mode, pcum->nregs);
6552 }
6553
6554 static unsigned int
6555 arm_function_arg_boundary (machine_mode mode, const_tree type)
6556 {
6557   return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6558           ? DOUBLEWORD_ALIGNMENT
6559           : PARM_BOUNDARY);
6560 }
6561
6562 static int
6563 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6564                        tree type, bool named)
6565 {
6566   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6567   int nregs = pcum->nregs;
6568
6569   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6570     {
6571       aapcs_layout_arg (pcum, mode, type, named);
6572       return pcum->aapcs_partial;
6573     }
6574
6575   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6576     return 0;
6577
6578   if (NUM_ARG_REGS > nregs
6579       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6580       && pcum->can_split)
6581     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6582
6583   return 0;
6584 }
6585
6586 /* Update the data in PCUM to advance over an argument
6587    of mode MODE and data type TYPE.
6588    (TYPE is null for libcalls where that information may not be available.)  */
6589
6590 static void
6591 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6592                           const_tree type, bool named)
6593 {
6594   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6595
6596   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6597     {
6598       aapcs_layout_arg (pcum, mode, type, named);
6599
6600       if (pcum->aapcs_cprc_slot >= 0)
6601         {
6602           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6603                                                               type);
6604           pcum->aapcs_cprc_slot = -1;
6605         }
6606
6607       /* Generic stuff.  */
6608       pcum->aapcs_arg_processed = false;
6609       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6610       pcum->aapcs_reg = NULL_RTX;
6611       pcum->aapcs_partial = 0;
6612     }
6613   else
6614     {
6615       pcum->nargs += 1;
6616       if (arm_vector_mode_supported_p (mode)
6617           && pcum->named_count > pcum->nargs
6618           && TARGET_IWMMXT_ABI)
6619         pcum->iwmmxt_nregs += 1;
6620       else
6621         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6622     }
6623 }
6624
6625 /* Variable sized types are passed by reference.  This is a GCC
6626    extension to the ARM ABI.  */
6627
6628 static bool
6629 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6630                        machine_mode mode ATTRIBUTE_UNUSED,
6631                        const_tree type, bool named ATTRIBUTE_UNUSED)
6632 {
6633   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6634 }
6635 \f
6636 /* Encode the current state of the #pragma [no_]long_calls.  */
6637 typedef enum
6638 {
6639   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6640   LONG,         /* #pragma long_calls is in effect.  */
6641   SHORT         /* #pragma no_long_calls is in effect.  */
6642 } arm_pragma_enum;
6643
6644 static arm_pragma_enum arm_pragma_long_calls = OFF;
6645
6646 void
6647 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6648 {
6649   arm_pragma_long_calls = LONG;
6650 }
6651
6652 void
6653 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6654 {
6655   arm_pragma_long_calls = SHORT;
6656 }
6657
6658 void
6659 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6660 {
6661   arm_pragma_long_calls = OFF;
6662 }
6663 \f
6664 /* Handle an attribute requiring a FUNCTION_DECL;
6665    arguments as in struct attribute_spec.handler.  */
6666 static tree
6667 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6668                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6669 {
6670   if (TREE_CODE (*node) != FUNCTION_DECL)
6671     {
6672       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6673                name);
6674       *no_add_attrs = true;
6675     }
6676
6677   return NULL_TREE;
6678 }
6679
6680 /* Handle an "interrupt" or "isr" attribute;
6681    arguments as in struct attribute_spec.handler.  */
6682 static tree
6683 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6684                           bool *no_add_attrs)
6685 {
6686   if (DECL_P (*node))
6687     {
6688       if (TREE_CODE (*node) != FUNCTION_DECL)
6689         {
6690           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6691                    name);
6692           *no_add_attrs = true;
6693         }
6694       /* FIXME: the argument if any is checked for type attributes;
6695          should it be checked for decl ones?  */
6696     }
6697   else
6698     {
6699       if (TREE_CODE (*node) == FUNCTION_TYPE
6700           || TREE_CODE (*node) == METHOD_TYPE)
6701         {
6702           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6703             {
6704               warning (OPT_Wattributes, "%qE attribute ignored",
6705                        name);
6706               *no_add_attrs = true;
6707             }
6708         }
6709       else if (TREE_CODE (*node) == POINTER_TYPE
6710                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6711                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6712                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6713         {
6714           *node = build_variant_type_copy (*node);
6715           TREE_TYPE (*node) = build_type_attribute_variant
6716             (TREE_TYPE (*node),
6717              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6718           *no_add_attrs = true;
6719         }
6720       else
6721         {
6722           /* Possibly pass this attribute on from the type to a decl.  */
6723           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6724                        | (int) ATTR_FLAG_FUNCTION_NEXT
6725                        | (int) ATTR_FLAG_ARRAY_NEXT))
6726             {
6727               *no_add_attrs = true;
6728               return tree_cons (name, args, NULL_TREE);
6729             }
6730           else
6731             {
6732               warning (OPT_Wattributes, "%qE attribute ignored",
6733                        name);
6734             }
6735         }
6736     }
6737
6738   return NULL_TREE;
6739 }
6740
6741 /* Handle a "pcs" attribute; arguments as in struct
6742    attribute_spec.handler.  */
6743 static tree
6744 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6745                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6746 {
6747   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6748     {
6749       warning (OPT_Wattributes, "%qE attribute ignored", name);
6750       *no_add_attrs = true;
6751     }
6752   return NULL_TREE;
6753 }
6754
6755 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6756 /* Handle the "notshared" attribute.  This attribute is another way of
6757    requesting hidden visibility.  ARM's compiler supports
6758    "__declspec(notshared)"; we support the same thing via an
6759    attribute.  */
6760
6761 static tree
6762 arm_handle_notshared_attribute (tree *node,
6763                                 tree name ATTRIBUTE_UNUSED,
6764                                 tree args ATTRIBUTE_UNUSED,
6765                                 int flags ATTRIBUTE_UNUSED,
6766                                 bool *no_add_attrs)
6767 {
6768   tree decl = TYPE_NAME (*node);
6769
6770   if (decl)
6771     {
6772       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6773       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6774       *no_add_attrs = false;
6775     }
6776   return NULL_TREE;
6777 }
6778 #endif
6779
6780 /* This function returns true if a function with declaration FNDECL and type
6781    FNTYPE uses the stack to pass arguments or return variables and false
6782    otherwise.  This is used for functions with the attributes
6783    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6784    diagnostic messages if the stack is used.  NAME is the name of the attribute
6785    used.  */
6786
6787 static bool
6788 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6789 {
6790   function_args_iterator args_iter;
6791   CUMULATIVE_ARGS args_so_far_v;
6792   cumulative_args_t args_so_far;
6793   bool first_param = true;
6794   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6795
6796   /* Error out if any argument is passed on the stack.  */
6797   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6798   args_so_far = pack_cumulative_args (&args_so_far_v);
6799   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6800     {
6801       rtx arg_rtx;
6802       machine_mode arg_mode = TYPE_MODE (arg_type);
6803
6804       prev_arg_type = arg_type;
6805       if (VOID_TYPE_P (arg_type))
6806         continue;
6807
6808       if (!first_param)
6809         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6810       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6811       if (!arg_rtx
6812           || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6813         {
6814           error ("%qE attribute not available to functions with arguments "
6815                  "passed on the stack", name);
6816           return true;
6817         }
6818       first_param = false;
6819     }
6820
6821   /* Error out for variadic functions since we cannot control how many
6822      arguments will be passed and thus stack could be used.  stdarg_p () is not
6823      used for the checking to avoid browsing arguments twice.  */
6824   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6825     {
6826       error ("%qE attribute not available to functions with variable number "
6827              "of arguments", name);
6828       return true;
6829     }
6830
6831   /* Error out if return value is passed on the stack.  */
6832   ret_type = TREE_TYPE (fntype);
6833   if (arm_return_in_memory (ret_type, fntype))
6834     {
6835       error ("%qE attribute not available to functions that return value on "
6836              "the stack", name);
6837       return true;
6838     }
6839   return false;
6840 }
6841
6842 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6843    function will check whether the attribute is allowed here and will add the
6844    attribute to the function declaration tree or otherwise issue a warning.  */
6845
6846 static tree
6847 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6848                                  tree /* args */,
6849                                  int /* flags */,
6850                                  bool *no_add_attrs)
6851 {
6852   tree fndecl;
6853
6854   if (!use_cmse)
6855     {
6856       *no_add_attrs = true;
6857       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6858                name);
6859       return NULL_TREE;
6860     }
6861
6862   /* Ignore attribute for function types.  */
6863   if (TREE_CODE (*node) != FUNCTION_DECL)
6864     {
6865       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6866                name);
6867       *no_add_attrs = true;
6868       return NULL_TREE;
6869     }
6870
6871   fndecl = *node;
6872
6873   /* Warn for static linkage functions.  */
6874   if (!TREE_PUBLIC (fndecl))
6875     {
6876       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6877                "with static linkage", name);
6878       *no_add_attrs = true;
6879       return NULL_TREE;
6880     }
6881
6882   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6883                                                 TREE_TYPE (fndecl));
6884   return NULL_TREE;
6885 }
6886
6887
6888 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6889    function will check whether the attribute is allowed here and will add the
6890    attribute to the function type tree or otherwise issue a diagnostic.  The
6891    reason we check this at declaration time is to only allow the use of the
6892    attribute with declarations of function pointers and not function
6893    declarations.  This function checks NODE is of the expected type and issues
6894    diagnostics otherwise using NAME.  If it is not of the expected type
6895    *NO_ADD_ATTRS will be set to true.  */
6896
6897 static tree
6898 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6899                                  tree /* args */,
6900                                  int /* flags */,
6901                                  bool *no_add_attrs)
6902 {
6903   tree decl = NULL_TREE, fntype = NULL_TREE;
6904   tree type;
6905
6906   if (!use_cmse)
6907     {
6908       *no_add_attrs = true;
6909       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6910                name);
6911       return NULL_TREE;
6912     }
6913
6914   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6915     {
6916       decl = *node;
6917       fntype = TREE_TYPE (decl);
6918     }
6919
6920   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6921     fntype = TREE_TYPE (fntype);
6922
6923   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6924     {
6925         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6926                  "function pointer", name);
6927         *no_add_attrs = true;
6928         return NULL_TREE;
6929     }
6930
6931   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
6932
6933   if (*no_add_attrs)
6934     return NULL_TREE;
6935
6936   /* Prevent trees being shared among function types with and without
6937      cmse_nonsecure_call attribute.  */
6938   type = TREE_TYPE (decl);
6939
6940   type = build_distinct_type_copy (type);
6941   TREE_TYPE (decl) = type;
6942   fntype = type;
6943
6944   while (TREE_CODE (fntype) != FUNCTION_TYPE)
6945     {
6946       type = fntype;
6947       fntype = TREE_TYPE (fntype);
6948       fntype = build_distinct_type_copy (fntype);
6949       TREE_TYPE (type) = fntype;
6950     }
6951
6952   /* Construct a type attribute and add it to the function type.  */
6953   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
6954                           TYPE_ATTRIBUTES (fntype));
6955   TYPE_ATTRIBUTES (fntype) = attrs;
6956   return NULL_TREE;
6957 }
6958
6959 /* Return 0 if the attributes for two types are incompatible, 1 if they
6960    are compatible, and 2 if they are nearly compatible (which causes a
6961    warning to be generated).  */
6962 static int
6963 arm_comp_type_attributes (const_tree type1, const_tree type2)
6964 {
6965   int l1, l2, s1, s2;
6966
6967   /* Check for mismatch of non-default calling convention.  */
6968   if (TREE_CODE (type1) != FUNCTION_TYPE)
6969     return 1;
6970
6971   /* Check for mismatched call attributes.  */
6972   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6973   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6974   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6975   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6976
6977   /* Only bother to check if an attribute is defined.  */
6978   if (l1 | l2 | s1 | s2)
6979     {
6980       /* If one type has an attribute, the other must have the same attribute.  */
6981       if ((l1 != l2) || (s1 != s2))
6982         return 0;
6983
6984       /* Disallow mixed attributes.  */
6985       if ((l1 & s2) || (l2 & s1))
6986         return 0;
6987     }
6988
6989   /* Check for mismatched ISR attribute.  */
6990   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6991   if (! l1)
6992     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6993   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6994   if (! l2)
6995     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6996   if (l1 != l2)
6997     return 0;
6998
6999   l1 = lookup_attribute ("cmse_nonsecure_call",
7000                          TYPE_ATTRIBUTES (type1)) != NULL;
7001   l2 = lookup_attribute ("cmse_nonsecure_call",
7002                          TYPE_ATTRIBUTES (type2)) != NULL;
7003
7004   if (l1 != l2)
7005     return 0;
7006
7007   return 1;
7008 }
7009
7010 /*  Assigns default attributes to newly defined type.  This is used to
7011     set short_call/long_call attributes for function types of
7012     functions defined inside corresponding #pragma scopes.  */
7013 static void
7014 arm_set_default_type_attributes (tree type)
7015 {
7016   /* Add __attribute__ ((long_call)) to all functions, when
7017      inside #pragma long_calls or __attribute__ ((short_call)),
7018      when inside #pragma no_long_calls.  */
7019   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7020     {
7021       tree type_attr_list, attr_name;
7022       type_attr_list = TYPE_ATTRIBUTES (type);
7023
7024       if (arm_pragma_long_calls == LONG)
7025         attr_name = get_identifier ("long_call");
7026       else if (arm_pragma_long_calls == SHORT)
7027         attr_name = get_identifier ("short_call");
7028       else
7029         return;
7030
7031       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7032       TYPE_ATTRIBUTES (type) = type_attr_list;
7033     }
7034 }
7035 \f
7036 /* Return true if DECL is known to be linked into section SECTION.  */
7037
7038 static bool
7039 arm_function_in_section_p (tree decl, section *section)
7040 {
7041   /* We can only be certain about the prevailing symbol definition.  */
7042   if (!decl_binds_to_current_def_p (decl))
7043     return false;
7044
7045   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7046   if (!DECL_SECTION_NAME (decl))
7047     {
7048       /* Make sure that we will not create a unique section for DECL.  */
7049       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7050         return false;
7051     }
7052
7053   return function_section (decl) == section;
7054 }
7055
7056 /* Return nonzero if a 32-bit "long_call" should be generated for
7057    a call from the current function to DECL.  We generate a long_call
7058    if the function:
7059
7060         a.  has an __attribute__((long call))
7061      or b.  is within the scope of a #pragma long_calls
7062      or c.  the -mlong-calls command line switch has been specified
7063
7064    However we do not generate a long call if the function:
7065
7066         d.  has an __attribute__ ((short_call))
7067      or e.  is inside the scope of a #pragma no_long_calls
7068      or f.  is defined in the same section as the current function.  */
7069
7070 bool
7071 arm_is_long_call_p (tree decl)
7072 {
7073   tree attrs;
7074
7075   if (!decl)
7076     return TARGET_LONG_CALLS;
7077
7078   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7079   if (lookup_attribute ("short_call", attrs))
7080     return false;
7081
7082   /* For "f", be conservative, and only cater for cases in which the
7083      whole of the current function is placed in the same section.  */
7084   if (!flag_reorder_blocks_and_partition
7085       && TREE_CODE (decl) == FUNCTION_DECL
7086       && arm_function_in_section_p (decl, current_function_section ()))
7087     return false;
7088
7089   if (lookup_attribute ("long_call", attrs))
7090     return true;
7091
7092   return TARGET_LONG_CALLS;
7093 }
7094
7095 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7096 static bool
7097 arm_function_ok_for_sibcall (tree decl, tree exp)
7098 {
7099   unsigned long func_type;
7100
7101   if (cfun->machine->sibcall_blocked)
7102     return false;
7103
7104   /* Never tailcall something if we are generating code for Thumb-1.  */
7105   if (TARGET_THUMB1)
7106     return false;
7107
7108   /* The PIC register is live on entry to VxWorks PLT entries, so we
7109      must make the call before restoring the PIC register.  */
7110   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7111     return false;
7112
7113   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7114      may be used both as target of the call and base register for restoring
7115      the VFP registers  */
7116   if (TARGET_APCS_FRAME && TARGET_ARM
7117       && TARGET_HARD_FLOAT
7118       && decl && arm_is_long_call_p (decl))
7119     return false;
7120
7121   /* If we are interworking and the function is not declared static
7122      then we can't tail-call it unless we know that it exists in this
7123      compilation unit (since it might be a Thumb routine).  */
7124   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7125       && !TREE_ASM_WRITTEN (decl))
7126     return false;
7127
7128   func_type = arm_current_func_type ();
7129   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7130   if (IS_INTERRUPT (func_type))
7131     return false;
7132
7133   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7134      generated for entry functions themselves.  */
7135   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7136     return false;
7137
7138   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7139      this would complicate matters for later code generation.  */
7140   if (TREE_CODE (exp) == CALL_EXPR)
7141     {
7142       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7143       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7144         return false;
7145     }
7146
7147   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7148     {
7149       /* Check that the return value locations are the same.  For
7150          example that we aren't returning a value from the sibling in
7151          a VFP register but then need to transfer it to a core
7152          register.  */
7153       rtx a, b;
7154       tree decl_or_type = decl;
7155
7156       /* If it is an indirect function pointer, get the function type.  */
7157       if (!decl)
7158         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7159
7160       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7161       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7162                               cfun->decl, false);
7163       if (!rtx_equal_p (a, b))
7164         return false;
7165     }
7166
7167   /* Never tailcall if function may be called with a misaligned SP.  */
7168   if (IS_STACKALIGN (func_type))
7169     return false;
7170
7171   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7172      references should become a NOP.  Don't convert such calls into
7173      sibling calls.  */
7174   if (TARGET_AAPCS_BASED
7175       && arm_abi == ARM_ABI_AAPCS
7176       && decl
7177       && DECL_WEAK (decl))
7178     return false;
7179
7180   /* We cannot do a tailcall for an indirect call by descriptor if all the
7181      argument registers are used because the only register left to load the
7182      address is IP and it will already contain the static chain.  */
7183   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7184     {
7185       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7186       CUMULATIVE_ARGS cum;
7187       cumulative_args_t cum_v;
7188
7189       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7190       cum_v = pack_cumulative_args (&cum);
7191
7192       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7193         {
7194           tree type = TREE_VALUE (t);
7195           if (!VOID_TYPE_P (type))
7196             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7197         }
7198
7199       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7200         return false;
7201     }
7202
7203   /* Everything else is ok.  */
7204   return true;
7205 }
7206
7207 \f
7208 /* Addressing mode support functions.  */
7209
7210 /* Return nonzero if X is a legitimate immediate operand when compiling
7211    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7212 int
7213 legitimate_pic_operand_p (rtx x)
7214 {
7215   if (GET_CODE (x) == SYMBOL_REF
7216       || (GET_CODE (x) == CONST
7217           && GET_CODE (XEXP (x, 0)) == PLUS
7218           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7219     return 0;
7220
7221   return 1;
7222 }
7223
7224 /* Record that the current function needs a PIC register.  Initialize
7225    cfun->machine->pic_reg if we have not already done so.  */
7226
7227 static void
7228 require_pic_register (void)
7229 {
7230   /* A lot of the logic here is made obscure by the fact that this
7231      routine gets called as part of the rtx cost estimation process.
7232      We don't want those calls to affect any assumptions about the real
7233      function; and further, we can't call entry_of_function() until we
7234      start the real expansion process.  */
7235   if (!crtl->uses_pic_offset_table)
7236     {
7237       gcc_assert (can_create_pseudo_p ());
7238       if (arm_pic_register != INVALID_REGNUM
7239           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7240         {
7241           if (!cfun->machine->pic_reg)
7242             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7243
7244           /* Play games to avoid marking the function as needing pic
7245              if we are being called as part of the cost-estimation
7246              process.  */
7247           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7248             crtl->uses_pic_offset_table = 1;
7249         }
7250       else
7251         {
7252           rtx_insn *seq, *insn;
7253
7254           if (!cfun->machine->pic_reg)
7255             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7256
7257           /* Play games to avoid marking the function as needing pic
7258              if we are being called as part of the cost-estimation
7259              process.  */
7260           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7261             {
7262               crtl->uses_pic_offset_table = 1;
7263               start_sequence ();
7264
7265               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7266                   && arm_pic_register > LAST_LO_REGNUM)
7267                 emit_move_insn (cfun->machine->pic_reg,
7268                                 gen_rtx_REG (Pmode, arm_pic_register));
7269               else
7270                 arm_load_pic_register (0UL);
7271
7272               seq = get_insns ();
7273               end_sequence ();
7274
7275               for (insn = seq; insn; insn = NEXT_INSN (insn))
7276                 if (INSN_P (insn))
7277                   INSN_LOCATION (insn) = prologue_location;
7278
7279               /* We can be called during expansion of PHI nodes, where
7280                  we can't yet emit instructions directly in the final
7281                  insn stream.  Queue the insns on the entry edge, they will
7282                  be committed after everything else is expanded.  */
7283               insert_insn_on_edge (seq,
7284                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7285             }
7286         }
7287     }
7288 }
7289
7290 rtx
7291 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7292 {
7293   if (GET_CODE (orig) == SYMBOL_REF
7294       || GET_CODE (orig) == LABEL_REF)
7295     {
7296       if (reg == 0)
7297         {
7298           gcc_assert (can_create_pseudo_p ());
7299           reg = gen_reg_rtx (Pmode);
7300         }
7301
7302       /* VxWorks does not impose a fixed gap between segments; the run-time
7303          gap can be different from the object-file gap.  We therefore can't
7304          use GOTOFF unless we are absolutely sure that the symbol is in the
7305          same segment as the GOT.  Unfortunately, the flexibility of linker
7306          scripts means that we can't be sure of that in general, so assume
7307          that GOTOFF is never valid on VxWorks.  */
7308       /* References to weak symbols cannot be resolved locally: they
7309          may be overridden by a non-weak definition at link time.  */
7310       rtx_insn *insn;
7311       if ((GET_CODE (orig) == LABEL_REF
7312            || (GET_CODE (orig) == SYMBOL_REF
7313                && SYMBOL_REF_LOCAL_P (orig)
7314                && (SYMBOL_REF_DECL (orig)
7315                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7316           && NEED_GOT_RELOC
7317           && arm_pic_data_is_text_relative)
7318         insn = arm_pic_static_addr (orig, reg);
7319       else
7320         {
7321           rtx pat;
7322           rtx mem;
7323
7324           /* If this function doesn't have a pic register, create one now.  */
7325           require_pic_register ();
7326
7327           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7328
7329           /* Make the MEM as close to a constant as possible.  */
7330           mem = SET_SRC (pat);
7331           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7332           MEM_READONLY_P (mem) = 1;
7333           MEM_NOTRAP_P (mem) = 1;
7334
7335           insn = emit_insn (pat);
7336         }
7337
7338       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7339          by loop.  */
7340       set_unique_reg_note (insn, REG_EQUAL, orig);
7341
7342       return reg;
7343     }
7344   else if (GET_CODE (orig) == CONST)
7345     {
7346       rtx base, offset;
7347
7348       if (GET_CODE (XEXP (orig, 0)) == PLUS
7349           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7350         return orig;
7351
7352       /* Handle the case where we have: const (UNSPEC_TLS).  */
7353       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7354           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7355         return orig;
7356
7357       /* Handle the case where we have:
7358          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7359          CONST_INT.  */
7360       if (GET_CODE (XEXP (orig, 0)) == PLUS
7361           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7362           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7363         {
7364           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7365           return orig;
7366         }
7367
7368       if (reg == 0)
7369         {
7370           gcc_assert (can_create_pseudo_p ());
7371           reg = gen_reg_rtx (Pmode);
7372         }
7373
7374       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7375
7376       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7377       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7378                                        base == reg ? 0 : reg);
7379
7380       if (CONST_INT_P (offset))
7381         {
7382           /* The base register doesn't really matter, we only want to
7383              test the index for the appropriate mode.  */
7384           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7385             {
7386               gcc_assert (can_create_pseudo_p ());
7387               offset = force_reg (Pmode, offset);
7388             }
7389
7390           if (CONST_INT_P (offset))
7391             return plus_constant (Pmode, base, INTVAL (offset));
7392         }
7393
7394       if (GET_MODE_SIZE (mode) > 4
7395           && (GET_MODE_CLASS (mode) == MODE_INT
7396               || TARGET_SOFT_FLOAT))
7397         {
7398           emit_insn (gen_addsi3 (reg, base, offset));
7399           return reg;
7400         }
7401
7402       return gen_rtx_PLUS (Pmode, base, offset);
7403     }
7404
7405   return orig;
7406 }
7407
7408
7409 /* Find a spare register to use during the prolog of a function.  */
7410
7411 static int
7412 thumb_find_work_register (unsigned long pushed_regs_mask)
7413 {
7414   int reg;
7415
7416   /* Check the argument registers first as these are call-used.  The
7417      register allocation order means that sometimes r3 might be used
7418      but earlier argument registers might not, so check them all.  */
7419   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7420     if (!df_regs_ever_live_p (reg))
7421       return reg;
7422
7423   /* Before going on to check the call-saved registers we can try a couple
7424      more ways of deducing that r3 is available.  The first is when we are
7425      pushing anonymous arguments onto the stack and we have less than 4
7426      registers worth of fixed arguments(*).  In this case r3 will be part of
7427      the variable argument list and so we can be sure that it will be
7428      pushed right at the start of the function.  Hence it will be available
7429      for the rest of the prologue.
7430      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7431   if (cfun->machine->uses_anonymous_args
7432       && crtl->args.pretend_args_size > 0)
7433     return LAST_ARG_REGNUM;
7434
7435   /* The other case is when we have fixed arguments but less than 4 registers
7436      worth.  In this case r3 might be used in the body of the function, but
7437      it is not being used to convey an argument into the function.  In theory
7438      we could just check crtl->args.size to see how many bytes are
7439      being passed in argument registers, but it seems that it is unreliable.
7440      Sometimes it will have the value 0 when in fact arguments are being
7441      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7442      check the args_info.nregs field as well.  The problem with this field is
7443      that it makes no allowances for arguments that are passed to the
7444      function but which are not used.  Hence we could miss an opportunity
7445      when a function has an unused argument in r3.  But it is better to be
7446      safe than to be sorry.  */
7447   if (! cfun->machine->uses_anonymous_args
7448       && crtl->args.size >= 0
7449       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7450       && (TARGET_AAPCS_BASED
7451           ? crtl->args.info.aapcs_ncrn < 4
7452           : crtl->args.info.nregs < 4))
7453     return LAST_ARG_REGNUM;
7454
7455   /* Otherwise look for a call-saved register that is going to be pushed.  */
7456   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7457     if (pushed_regs_mask & (1 << reg))
7458       return reg;
7459
7460   if (TARGET_THUMB2)
7461     {
7462       /* Thumb-2 can use high regs.  */
7463       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7464         if (pushed_regs_mask & (1 << reg))
7465           return reg;
7466     }
7467   /* Something went wrong - thumb_compute_save_reg_mask()
7468      should have arranged for a suitable register to be pushed.  */
7469   gcc_unreachable ();
7470 }
7471
7472 static GTY(()) int pic_labelno;
7473
7474 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7475    low register.  */
7476
7477 void
7478 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7479 {
7480   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7481
7482   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7483     return;
7484
7485   gcc_assert (flag_pic);
7486
7487   pic_reg = cfun->machine->pic_reg;
7488   if (TARGET_VXWORKS_RTP)
7489     {
7490       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7491       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7492       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7493
7494       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7495
7496       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7497       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7498     }
7499   else
7500     {
7501       /* We use an UNSPEC rather than a LABEL_REF because this label
7502          never appears in the code stream.  */
7503
7504       labelno = GEN_INT (pic_labelno++);
7505       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7506       l1 = gen_rtx_CONST (VOIDmode, l1);
7507
7508       /* On the ARM the PC register contains 'dot + 8' at the time of the
7509          addition, on the Thumb it is 'dot + 4'.  */
7510       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7511       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7512                                 UNSPEC_GOTSYM_OFF);
7513       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7514
7515       if (TARGET_32BIT)
7516         {
7517           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7518         }
7519       else /* TARGET_THUMB1 */
7520         {
7521           if (arm_pic_register != INVALID_REGNUM
7522               && REGNO (pic_reg) > LAST_LO_REGNUM)
7523             {
7524               /* We will have pushed the pic register, so we should always be
7525                  able to find a work register.  */
7526               pic_tmp = gen_rtx_REG (SImode,
7527                                      thumb_find_work_register (saved_regs));
7528               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7529               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7530               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7531             }
7532           else if (arm_pic_register != INVALID_REGNUM
7533                    && arm_pic_register > LAST_LO_REGNUM
7534                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7535             {
7536               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7537               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7538               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7539             }
7540           else
7541             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7542         }
7543     }
7544
7545   /* Need to emit this whether or not we obey regdecls,
7546      since setjmp/longjmp can cause life info to screw up.  */
7547   emit_use (pic_reg);
7548 }
7549
7550 /* Generate code to load the address of a static var when flag_pic is set.  */
7551 static rtx_insn *
7552 arm_pic_static_addr (rtx orig, rtx reg)
7553 {
7554   rtx l1, labelno, offset_rtx;
7555
7556   gcc_assert (flag_pic);
7557
7558   /* We use an UNSPEC rather than a LABEL_REF because this label
7559      never appears in the code stream.  */
7560   labelno = GEN_INT (pic_labelno++);
7561   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7562   l1 = gen_rtx_CONST (VOIDmode, l1);
7563
7564   /* On the ARM the PC register contains 'dot + 8' at the time of the
7565      addition, on the Thumb it is 'dot + 4'.  */
7566   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7567   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7568                                UNSPEC_SYMBOL_OFFSET);
7569   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7570
7571   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7572 }
7573
7574 /* Return nonzero if X is valid as an ARM state addressing register.  */
7575 static int
7576 arm_address_register_rtx_p (rtx x, int strict_p)
7577 {
7578   int regno;
7579
7580   if (!REG_P (x))
7581     return 0;
7582
7583   regno = REGNO (x);
7584
7585   if (strict_p)
7586     return ARM_REGNO_OK_FOR_BASE_P (regno);
7587
7588   return (regno <= LAST_ARM_REGNUM
7589           || regno >= FIRST_PSEUDO_REGISTER
7590           || regno == FRAME_POINTER_REGNUM
7591           || regno == ARG_POINTER_REGNUM);
7592 }
7593
7594 /* Return TRUE if this rtx is the difference of a symbol and a label,
7595    and will reduce to a PC-relative relocation in the object file.
7596    Expressions like this can be left alone when generating PIC, rather
7597    than forced through the GOT.  */
7598 static int
7599 pcrel_constant_p (rtx x)
7600 {
7601   if (GET_CODE (x) == MINUS)
7602     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7603
7604   return FALSE;
7605 }
7606
7607 /* Return true if X will surely end up in an index register after next
7608    splitting pass.  */
7609 static bool
7610 will_be_in_index_register (const_rtx x)
7611 {
7612   /* arm.md: calculate_pic_address will split this into a register.  */
7613   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7614 }
7615
7616 /* Return nonzero if X is a valid ARM state address operand.  */
7617 int
7618 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7619                                 int strict_p)
7620 {
7621   bool use_ldrd;
7622   enum rtx_code code = GET_CODE (x);
7623
7624   if (arm_address_register_rtx_p (x, strict_p))
7625     return 1;
7626
7627   use_ldrd = (TARGET_LDRD
7628               && (mode == DImode || mode == DFmode));
7629
7630   if (code == POST_INC || code == PRE_DEC
7631       || ((code == PRE_INC || code == POST_DEC)
7632           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7633     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7634
7635   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7636            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7637            && GET_CODE (XEXP (x, 1)) == PLUS
7638            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7639     {
7640       rtx addend = XEXP (XEXP (x, 1), 1);
7641
7642       /* Don't allow ldrd post increment by register because it's hard
7643          to fixup invalid register choices.  */
7644       if (use_ldrd
7645           && GET_CODE (x) == POST_MODIFY
7646           && REG_P (addend))
7647         return 0;
7648
7649       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7650               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7651     }
7652
7653   /* After reload constants split into minipools will have addresses
7654      from a LABEL_REF.  */
7655   else if (reload_completed
7656            && (code == LABEL_REF
7657                || (code == CONST
7658                    && GET_CODE (XEXP (x, 0)) == PLUS
7659                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7660                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7661     return 1;
7662
7663   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7664     return 0;
7665
7666   else if (code == PLUS)
7667     {
7668       rtx xop0 = XEXP (x, 0);
7669       rtx xop1 = XEXP (x, 1);
7670
7671       return ((arm_address_register_rtx_p (xop0, strict_p)
7672                && ((CONST_INT_P (xop1)
7673                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7674                    || (!strict_p && will_be_in_index_register (xop1))))
7675               || (arm_address_register_rtx_p (xop1, strict_p)
7676                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7677     }
7678
7679 #if 0
7680   /* Reload currently can't handle MINUS, so disable this for now */
7681   else if (GET_CODE (x) == MINUS)
7682     {
7683       rtx xop0 = XEXP (x, 0);
7684       rtx xop1 = XEXP (x, 1);
7685
7686       return (arm_address_register_rtx_p (xop0, strict_p)
7687               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7688     }
7689 #endif
7690
7691   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7692            && code == SYMBOL_REF
7693            && CONSTANT_POOL_ADDRESS_P (x)
7694            && ! (flag_pic
7695                  && symbol_mentioned_p (get_pool_constant (x))
7696                  && ! pcrel_constant_p (get_pool_constant (x))))
7697     return 1;
7698
7699   return 0;
7700 }
7701
7702 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7703 static int
7704 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7705 {
7706   bool use_ldrd;
7707   enum rtx_code code = GET_CODE (x);
7708
7709   if (arm_address_register_rtx_p (x, strict_p))
7710     return 1;
7711
7712   use_ldrd = (TARGET_LDRD
7713               && (mode == DImode || mode == DFmode));
7714
7715   if (code == POST_INC || code == PRE_DEC
7716       || ((code == PRE_INC || code == POST_DEC)
7717           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7718     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7719
7720   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7721            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7722            && GET_CODE (XEXP (x, 1)) == PLUS
7723            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7724     {
7725       /* Thumb-2 only has autoincrement by constant.  */
7726       rtx addend = XEXP (XEXP (x, 1), 1);
7727       HOST_WIDE_INT offset;
7728
7729       if (!CONST_INT_P (addend))
7730         return 0;
7731
7732       offset = INTVAL(addend);
7733       if (GET_MODE_SIZE (mode) <= 4)
7734         return (offset > -256 && offset < 256);
7735
7736       return (use_ldrd && offset > -1024 && offset < 1024
7737               && (offset & 3) == 0);
7738     }
7739
7740   /* After reload constants split into minipools will have addresses
7741      from a LABEL_REF.  */
7742   else if (reload_completed
7743            && (code == LABEL_REF
7744                || (code == CONST
7745                    && GET_CODE (XEXP (x, 0)) == PLUS
7746                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7747                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7748     return 1;
7749
7750   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7751     return 0;
7752
7753   else if (code == PLUS)
7754     {
7755       rtx xop0 = XEXP (x, 0);
7756       rtx xop1 = XEXP (x, 1);
7757
7758       return ((arm_address_register_rtx_p (xop0, strict_p)
7759                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7760                    || (!strict_p && will_be_in_index_register (xop1))))
7761               || (arm_address_register_rtx_p (xop1, strict_p)
7762                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7763     }
7764
7765   /* Normally we can assign constant values to target registers without
7766      the help of constant pool.  But there are cases we have to use constant
7767      pool like:
7768      1) assign a label to register.
7769      2) sign-extend a 8bit value to 32bit and then assign to register.
7770
7771      Constant pool access in format:
7772      (set (reg r0) (mem (symbol_ref (".LC0"))))
7773      will cause the use of literal pool (later in function arm_reorg).
7774      So here we mark such format as an invalid format, then the compiler
7775      will adjust it into:
7776      (set (reg r0) (symbol_ref (".LC0")))
7777      (set (reg r0) (mem (reg r0))).
7778      No extra register is required, and (mem (reg r0)) won't cause the use
7779      of literal pools.  */
7780   else if (arm_disable_literal_pool && code == SYMBOL_REF
7781            && CONSTANT_POOL_ADDRESS_P (x))
7782     return 0;
7783
7784   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7785            && code == SYMBOL_REF
7786            && CONSTANT_POOL_ADDRESS_P (x)
7787            && ! (flag_pic
7788                  && symbol_mentioned_p (get_pool_constant (x))
7789                  && ! pcrel_constant_p (get_pool_constant (x))))
7790     return 1;
7791
7792   return 0;
7793 }
7794
7795 /* Return nonzero if INDEX is valid for an address index operand in
7796    ARM state.  */
7797 static int
7798 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7799                         int strict_p)
7800 {
7801   HOST_WIDE_INT range;
7802   enum rtx_code code = GET_CODE (index);
7803
7804   /* Standard coprocessor addressing modes.  */
7805   if (TARGET_HARD_FLOAT
7806       && (mode == SFmode || mode == DFmode))
7807     return (code == CONST_INT && INTVAL (index) < 1024
7808             && INTVAL (index) > -1024
7809             && (INTVAL (index) & 3) == 0);
7810
7811   /* For quad modes, we restrict the constant offset to be slightly less
7812      than what the instruction format permits.  We do this because for
7813      quad mode moves, we will actually decompose them into two separate
7814      double-mode reads or writes.  INDEX must therefore be a valid
7815      (double-mode) offset and so should INDEX+8.  */
7816   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7817     return (code == CONST_INT
7818             && INTVAL (index) < 1016
7819             && INTVAL (index) > -1024
7820             && (INTVAL (index) & 3) == 0);
7821
7822   /* We have no such constraint on double mode offsets, so we permit the
7823      full range of the instruction format.  */
7824   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7825     return (code == CONST_INT
7826             && INTVAL (index) < 1024
7827             && INTVAL (index) > -1024
7828             && (INTVAL (index) & 3) == 0);
7829
7830   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7831     return (code == CONST_INT
7832             && INTVAL (index) < 1024
7833             && INTVAL (index) > -1024
7834             && (INTVAL (index) & 3) == 0);
7835
7836   if (arm_address_register_rtx_p (index, strict_p)
7837       && (GET_MODE_SIZE (mode) <= 4))
7838     return 1;
7839
7840   if (mode == DImode || mode == DFmode)
7841     {
7842       if (code == CONST_INT)
7843         {
7844           HOST_WIDE_INT val = INTVAL (index);
7845
7846           if (TARGET_LDRD)
7847             return val > -256 && val < 256;
7848           else
7849             return val > -4096 && val < 4092;
7850         }
7851
7852       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7853     }
7854
7855   if (GET_MODE_SIZE (mode) <= 4
7856       && ! (arm_arch4
7857             && (mode == HImode
7858                 || mode == HFmode
7859                 || (mode == QImode && outer == SIGN_EXTEND))))
7860     {
7861       if (code == MULT)
7862         {
7863           rtx xiop0 = XEXP (index, 0);
7864           rtx xiop1 = XEXP (index, 1);
7865
7866           return ((arm_address_register_rtx_p (xiop0, strict_p)
7867                    && power_of_two_operand (xiop1, SImode))
7868                   || (arm_address_register_rtx_p (xiop1, strict_p)
7869                       && power_of_two_operand (xiop0, SImode)));
7870         }
7871       else if (code == LSHIFTRT || code == ASHIFTRT
7872                || code == ASHIFT || code == ROTATERT)
7873         {
7874           rtx op = XEXP (index, 1);
7875
7876           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7877                   && CONST_INT_P (op)
7878                   && INTVAL (op) > 0
7879                   && INTVAL (op) <= 31);
7880         }
7881     }
7882
7883   /* For ARM v4 we may be doing a sign-extend operation during the
7884      load.  */
7885   if (arm_arch4)
7886     {
7887       if (mode == HImode
7888           || mode == HFmode
7889           || (outer == SIGN_EXTEND && mode == QImode))
7890         range = 256;
7891       else
7892         range = 4096;
7893     }
7894   else
7895     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7896
7897   return (code == CONST_INT
7898           && INTVAL (index) < range
7899           && INTVAL (index) > -range);
7900 }
7901
7902 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7903    index operand.  i.e. 1, 2, 4 or 8.  */
7904 static bool
7905 thumb2_index_mul_operand (rtx op)
7906 {
7907   HOST_WIDE_INT val;
7908
7909   if (!CONST_INT_P (op))
7910     return false;
7911
7912   val = INTVAL(op);
7913   return (val == 1 || val == 2 || val == 4 || val == 8);
7914 }
7915
7916 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
7917 static int
7918 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7919 {
7920   enum rtx_code code = GET_CODE (index);
7921
7922   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
7923   /* Standard coprocessor addressing modes.  */
7924   if (TARGET_HARD_FLOAT
7925       && (mode == SFmode || mode == DFmode))
7926     return (code == CONST_INT && INTVAL (index) < 1024
7927             /* Thumb-2 allows only > -256 index range for it's core register
7928                load/stores. Since we allow SF/DF in core registers, we have
7929                to use the intersection between -256~4096 (core) and -1024~1024
7930                (coprocessor).  */
7931             && INTVAL (index) > -256
7932             && (INTVAL (index) & 3) == 0);
7933
7934   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7935     {
7936       /* For DImode assume values will usually live in core regs
7937          and only allow LDRD addressing modes.  */
7938       if (!TARGET_LDRD || mode != DImode)
7939         return (code == CONST_INT
7940                 && INTVAL (index) < 1024
7941                 && INTVAL (index) > -1024
7942                 && (INTVAL (index) & 3) == 0);
7943     }
7944
7945   /* For quad modes, we restrict the constant offset to be slightly less
7946      than what the instruction format permits.  We do this because for
7947      quad mode moves, we will actually decompose them into two separate
7948      double-mode reads or writes.  INDEX must therefore be a valid
7949      (double-mode) offset and so should INDEX+8.  */
7950   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7951     return (code == CONST_INT
7952             && INTVAL (index) < 1016
7953             && INTVAL (index) > -1024
7954             && (INTVAL (index) & 3) == 0);
7955
7956   /* We have no such constraint on double mode offsets, so we permit the
7957      full range of the instruction format.  */
7958   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7959     return (code == CONST_INT
7960             && INTVAL (index) < 1024
7961             && INTVAL (index) > -1024
7962             && (INTVAL (index) & 3) == 0);
7963
7964   if (arm_address_register_rtx_p (index, strict_p)
7965       && (GET_MODE_SIZE (mode) <= 4))
7966     return 1;
7967
7968   if (mode == DImode || mode == DFmode)
7969     {
7970       if (code == CONST_INT)
7971         {
7972           HOST_WIDE_INT val = INTVAL (index);
7973           /* ??? Can we assume ldrd for thumb2?  */
7974           /* Thumb-2 ldrd only has reg+const addressing modes.  */
7975           /* ldrd supports offsets of +-1020.
7976              However the ldr fallback does not.  */
7977           return val > -256 && val < 256 && (val & 3) == 0;
7978         }
7979       else
7980         return 0;
7981     }
7982
7983   if (code == MULT)
7984     {
7985       rtx xiop0 = XEXP (index, 0);
7986       rtx xiop1 = XEXP (index, 1);
7987
7988       return ((arm_address_register_rtx_p (xiop0, strict_p)
7989                && thumb2_index_mul_operand (xiop1))
7990               || (arm_address_register_rtx_p (xiop1, strict_p)
7991                   && thumb2_index_mul_operand (xiop0)));
7992     }
7993   else if (code == ASHIFT)
7994     {
7995       rtx op = XEXP (index, 1);
7996
7997       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7998               && CONST_INT_P (op)
7999               && INTVAL (op) > 0
8000               && INTVAL (op) <= 3);
8001     }
8002
8003   return (code == CONST_INT
8004           && INTVAL (index) < 4096
8005           && INTVAL (index) > -256);
8006 }
8007
8008 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8009 static int
8010 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8011 {
8012   int regno;
8013
8014   if (!REG_P (x))
8015     return 0;
8016
8017   regno = REGNO (x);
8018
8019   if (strict_p)
8020     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8021
8022   return (regno <= LAST_LO_REGNUM
8023           || regno > LAST_VIRTUAL_REGISTER
8024           || regno == FRAME_POINTER_REGNUM
8025           || (GET_MODE_SIZE (mode) >= 4
8026               && (regno == STACK_POINTER_REGNUM
8027                   || regno >= FIRST_PSEUDO_REGISTER
8028                   || x == hard_frame_pointer_rtx
8029                   || x == arg_pointer_rtx)));
8030 }
8031
8032 /* Return nonzero if x is a legitimate index register.  This is the case
8033    for any base register that can access a QImode object.  */
8034 inline static int
8035 thumb1_index_register_rtx_p (rtx x, int strict_p)
8036 {
8037   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8038 }
8039
8040 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8041
8042    The AP may be eliminated to either the SP or the FP, so we use the
8043    least common denominator, e.g. SImode, and offsets from 0 to 64.
8044
8045    ??? Verify whether the above is the right approach.
8046
8047    ??? Also, the FP may be eliminated to the SP, so perhaps that
8048    needs special handling also.
8049
8050    ??? Look at how the mips16 port solves this problem.  It probably uses
8051    better ways to solve some of these problems.
8052
8053    Although it is not incorrect, we don't accept QImode and HImode
8054    addresses based on the frame pointer or arg pointer until the
8055    reload pass starts.  This is so that eliminating such addresses
8056    into stack based ones won't produce impossible code.  */
8057 int
8058 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8059 {
8060   /* ??? Not clear if this is right.  Experiment.  */
8061   if (GET_MODE_SIZE (mode) < 4
8062       && !(reload_in_progress || reload_completed)
8063       && (reg_mentioned_p (frame_pointer_rtx, x)
8064           || reg_mentioned_p (arg_pointer_rtx, x)
8065           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8066           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8067           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8068           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8069     return 0;
8070
8071   /* Accept any base register.  SP only in SImode or larger.  */
8072   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8073     return 1;
8074
8075   /* This is PC relative data before arm_reorg runs.  */
8076   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8077            && GET_CODE (x) == SYMBOL_REF
8078            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8079     return 1;
8080
8081   /* This is PC relative data after arm_reorg runs.  */
8082   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8083            && reload_completed
8084            && (GET_CODE (x) == LABEL_REF
8085                || (GET_CODE (x) == CONST
8086                    && GET_CODE (XEXP (x, 0)) == PLUS
8087                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8088                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8089     return 1;
8090
8091   /* Post-inc indexing only supported for SImode and larger.  */
8092   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8093            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8094     return 1;
8095
8096   else if (GET_CODE (x) == PLUS)
8097     {
8098       /* REG+REG address can be any two index registers.  */
8099       /* We disallow FRAME+REG addressing since we know that FRAME
8100          will be replaced with STACK, and SP relative addressing only
8101          permits SP+OFFSET.  */
8102       if (GET_MODE_SIZE (mode) <= 4
8103           && XEXP (x, 0) != frame_pointer_rtx
8104           && XEXP (x, 1) != frame_pointer_rtx
8105           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8106           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8107               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8108         return 1;
8109
8110       /* REG+const has 5-7 bit offset for non-SP registers.  */
8111       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8112                 || XEXP (x, 0) == arg_pointer_rtx)
8113                && CONST_INT_P (XEXP (x, 1))
8114                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8115         return 1;
8116
8117       /* REG+const has 10-bit offset for SP, but only SImode and
8118          larger is supported.  */
8119       /* ??? Should probably check for DI/DFmode overflow here
8120          just like GO_IF_LEGITIMATE_OFFSET does.  */
8121       else if (REG_P (XEXP (x, 0))
8122                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8123                && GET_MODE_SIZE (mode) >= 4
8124                && CONST_INT_P (XEXP (x, 1))
8125                && INTVAL (XEXP (x, 1)) >= 0
8126                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8127                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8128         return 1;
8129
8130       else if (REG_P (XEXP (x, 0))
8131                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8132                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8133                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8134                        && REGNO (XEXP (x, 0))
8135                           <= LAST_VIRTUAL_POINTER_REGISTER))
8136                && GET_MODE_SIZE (mode) >= 4
8137                && CONST_INT_P (XEXP (x, 1))
8138                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8139         return 1;
8140     }
8141
8142   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8143            && GET_MODE_SIZE (mode) == 4
8144            && GET_CODE (x) == SYMBOL_REF
8145            && CONSTANT_POOL_ADDRESS_P (x)
8146            && ! (flag_pic
8147                  && symbol_mentioned_p (get_pool_constant (x))
8148                  && ! pcrel_constant_p (get_pool_constant (x))))
8149     return 1;
8150
8151   return 0;
8152 }
8153
8154 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8155    instruction of mode MODE.  */
8156 int
8157 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8158 {
8159   switch (GET_MODE_SIZE (mode))
8160     {
8161     case 1:
8162       return val >= 0 && val < 32;
8163
8164     case 2:
8165       return val >= 0 && val < 64 && (val & 1) == 0;
8166
8167     default:
8168       return (val >= 0
8169               && (val + GET_MODE_SIZE (mode)) <= 128
8170               && (val & 3) == 0);
8171     }
8172 }
8173
8174 bool
8175 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8176 {
8177   if (TARGET_ARM)
8178     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8179   else if (TARGET_THUMB2)
8180     return thumb2_legitimate_address_p (mode, x, strict_p);
8181   else /* if (TARGET_THUMB1) */
8182     return thumb1_legitimate_address_p (mode, x, strict_p);
8183 }
8184
8185 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8186
8187    Given an rtx X being reloaded into a reg required to be
8188    in class CLASS, return the class of reg to actually use.
8189    In general this is just CLASS, but for the Thumb core registers and
8190    immediate constants we prefer a LO_REGS class or a subset.  */
8191
8192 static reg_class_t
8193 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8194 {
8195   if (TARGET_32BIT)
8196     return rclass;
8197   else
8198     {
8199       if (rclass == GENERAL_REGS)
8200         return LO_REGS;
8201       else
8202         return rclass;
8203     }
8204 }
8205
8206 /* Build the SYMBOL_REF for __tls_get_addr.  */
8207
8208 static GTY(()) rtx tls_get_addr_libfunc;
8209
8210 static rtx
8211 get_tls_get_addr (void)
8212 {
8213   if (!tls_get_addr_libfunc)
8214     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8215   return tls_get_addr_libfunc;
8216 }
8217
8218 rtx
8219 arm_load_tp (rtx target)
8220 {
8221   if (!target)
8222     target = gen_reg_rtx (SImode);
8223
8224   if (TARGET_HARD_TP)
8225     {
8226       /* Can return in any reg.  */
8227       emit_insn (gen_load_tp_hard (target));
8228     }
8229   else
8230     {
8231       /* Always returned in r0.  Immediately copy the result into a pseudo,
8232          otherwise other uses of r0 (e.g. setting up function arguments) may
8233          clobber the value.  */
8234
8235       rtx tmp;
8236
8237       emit_insn (gen_load_tp_soft ());
8238
8239       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8240       emit_move_insn (target, tmp);
8241     }
8242   return target;
8243 }
8244
8245 static rtx
8246 load_tls_operand (rtx x, rtx reg)
8247 {
8248   rtx tmp;
8249
8250   if (reg == NULL_RTX)
8251     reg = gen_reg_rtx (SImode);
8252
8253   tmp = gen_rtx_CONST (SImode, x);
8254
8255   emit_move_insn (reg, tmp);
8256
8257   return reg;
8258 }
8259
8260 static rtx_insn *
8261 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8262 {
8263   rtx label, labelno, sum;
8264
8265   gcc_assert (reloc != TLS_DESCSEQ);
8266   start_sequence ();
8267
8268   labelno = GEN_INT (pic_labelno++);
8269   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8270   label = gen_rtx_CONST (VOIDmode, label);
8271
8272   sum = gen_rtx_UNSPEC (Pmode,
8273                         gen_rtvec (4, x, GEN_INT (reloc), label,
8274                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8275                         UNSPEC_TLS);
8276   reg = load_tls_operand (sum, reg);
8277
8278   if (TARGET_ARM)
8279     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8280   else
8281     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8282
8283   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8284                                      LCT_PURE, /* LCT_CONST?  */
8285                                      Pmode, 1, reg, Pmode);
8286
8287   rtx_insn *insns = get_insns ();
8288   end_sequence ();
8289
8290   return insns;
8291 }
8292
8293 static rtx
8294 arm_tls_descseq_addr (rtx x, rtx reg)
8295 {
8296   rtx labelno = GEN_INT (pic_labelno++);
8297   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8298   rtx sum = gen_rtx_UNSPEC (Pmode,
8299                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8300                                        gen_rtx_CONST (VOIDmode, label),
8301                                        GEN_INT (!TARGET_ARM)),
8302                             UNSPEC_TLS);
8303   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8304
8305   emit_insn (gen_tlscall (x, labelno));
8306   if (!reg)
8307     reg = gen_reg_rtx (SImode);
8308   else
8309     gcc_assert (REGNO (reg) != R0_REGNUM);
8310
8311   emit_move_insn (reg, reg0);
8312
8313   return reg;
8314 }
8315
8316 rtx
8317 legitimize_tls_address (rtx x, rtx reg)
8318 {
8319   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8320   rtx_insn *insns;
8321   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8322
8323   switch (model)
8324     {
8325     case TLS_MODEL_GLOBAL_DYNAMIC:
8326       if (TARGET_GNU2_TLS)
8327         {
8328           reg = arm_tls_descseq_addr (x, reg);
8329
8330           tp = arm_load_tp (NULL_RTX);
8331
8332           dest = gen_rtx_PLUS (Pmode, tp, reg);
8333         }
8334       else
8335         {
8336           /* Original scheme */
8337           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8338           dest = gen_reg_rtx (Pmode);
8339           emit_libcall_block (insns, dest, ret, x);
8340         }
8341       return dest;
8342
8343     case TLS_MODEL_LOCAL_DYNAMIC:
8344       if (TARGET_GNU2_TLS)
8345         {
8346           reg = arm_tls_descseq_addr (x, reg);
8347
8348           tp = arm_load_tp (NULL_RTX);
8349
8350           dest = gen_rtx_PLUS (Pmode, tp, reg);
8351         }
8352       else
8353         {
8354           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8355
8356           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8357              share the LDM result with other LD model accesses.  */
8358           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8359                                 UNSPEC_TLS);
8360           dest = gen_reg_rtx (Pmode);
8361           emit_libcall_block (insns, dest, ret, eqv);
8362
8363           /* Load the addend.  */
8364           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8365                                                      GEN_INT (TLS_LDO32)),
8366                                    UNSPEC_TLS);
8367           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8368           dest = gen_rtx_PLUS (Pmode, dest, addend);
8369         }
8370       return dest;
8371
8372     case TLS_MODEL_INITIAL_EXEC:
8373       labelno = GEN_INT (pic_labelno++);
8374       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8375       label = gen_rtx_CONST (VOIDmode, label);
8376       sum = gen_rtx_UNSPEC (Pmode,
8377                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8378                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8379                             UNSPEC_TLS);
8380       reg = load_tls_operand (sum, reg);
8381
8382       if (TARGET_ARM)
8383         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8384       else if (TARGET_THUMB2)
8385         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8386       else
8387         {
8388           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8389           emit_move_insn (reg, gen_const_mem (SImode, reg));
8390         }
8391
8392       tp = arm_load_tp (NULL_RTX);
8393
8394       return gen_rtx_PLUS (Pmode, tp, reg);
8395
8396     case TLS_MODEL_LOCAL_EXEC:
8397       tp = arm_load_tp (NULL_RTX);
8398
8399       reg = gen_rtx_UNSPEC (Pmode,
8400                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8401                             UNSPEC_TLS);
8402       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8403
8404       return gen_rtx_PLUS (Pmode, tp, reg);
8405
8406     default:
8407       abort ();
8408     }
8409 }
8410
8411 /* Try machine-dependent ways of modifying an illegitimate address
8412    to be legitimate.  If we find one, return the new, valid address.  */
8413 rtx
8414 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8415 {
8416   if (arm_tls_referenced_p (x))
8417     {
8418       rtx addend = NULL;
8419
8420       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8421         {
8422           addend = XEXP (XEXP (x, 0), 1);
8423           x = XEXP (XEXP (x, 0), 0);
8424         }
8425
8426       if (GET_CODE (x) != SYMBOL_REF)
8427         return x;
8428
8429       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8430
8431       x = legitimize_tls_address (x, NULL_RTX);
8432
8433       if (addend)
8434         {
8435           x = gen_rtx_PLUS (SImode, x, addend);
8436           orig_x = x;
8437         }
8438       else
8439         return x;
8440     }
8441
8442   if (!TARGET_ARM)
8443     {
8444       /* TODO: legitimize_address for Thumb2.  */
8445       if (TARGET_THUMB2)
8446         return x;
8447       return thumb_legitimize_address (x, orig_x, mode);
8448     }
8449
8450   if (GET_CODE (x) == PLUS)
8451     {
8452       rtx xop0 = XEXP (x, 0);
8453       rtx xop1 = XEXP (x, 1);
8454
8455       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8456         xop0 = force_reg (SImode, xop0);
8457
8458       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8459           && !symbol_mentioned_p (xop1))
8460         xop1 = force_reg (SImode, xop1);
8461
8462       if (ARM_BASE_REGISTER_RTX_P (xop0)
8463           && CONST_INT_P (xop1))
8464         {
8465           HOST_WIDE_INT n, low_n;
8466           rtx base_reg, val;
8467           n = INTVAL (xop1);
8468
8469           /* VFP addressing modes actually allow greater offsets, but for
8470              now we just stick with the lowest common denominator.  */
8471           if (mode == DImode || mode == DFmode)
8472             {
8473               low_n = n & 0x0f;
8474               n &= ~0x0f;
8475               if (low_n > 4)
8476                 {
8477                   n += 16;
8478                   low_n -= 16;
8479                 }
8480             }
8481           else
8482             {
8483               low_n = ((mode) == TImode ? 0
8484                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8485               n -= low_n;
8486             }
8487
8488           base_reg = gen_reg_rtx (SImode);
8489           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8490           emit_move_insn (base_reg, val);
8491           x = plus_constant (Pmode, base_reg, low_n);
8492         }
8493       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8494         x = gen_rtx_PLUS (SImode, xop0, xop1);
8495     }
8496
8497   /* XXX We don't allow MINUS any more -- see comment in
8498      arm_legitimate_address_outer_p ().  */
8499   else if (GET_CODE (x) == MINUS)
8500     {
8501       rtx xop0 = XEXP (x, 0);
8502       rtx xop1 = XEXP (x, 1);
8503
8504       if (CONSTANT_P (xop0))
8505         xop0 = force_reg (SImode, xop0);
8506
8507       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8508         xop1 = force_reg (SImode, xop1);
8509
8510       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8511         x = gen_rtx_MINUS (SImode, xop0, xop1);
8512     }
8513
8514   /* Make sure to take full advantage of the pre-indexed addressing mode
8515      with absolute addresses which often allows for the base register to
8516      be factorized for multiple adjacent memory references, and it might
8517      even allows for the mini pool to be avoided entirely. */
8518   else if (CONST_INT_P (x) && optimize > 0)
8519     {
8520       unsigned int bits;
8521       HOST_WIDE_INT mask, base, index;
8522       rtx base_reg;
8523
8524       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8525          use a 8-bit index. So let's use a 12-bit index for SImode only and
8526          hope that arm_gen_constant will enable ldrb to use more bits. */
8527       bits = (mode == SImode) ? 12 : 8;
8528       mask = (1 << bits) - 1;
8529       base = INTVAL (x) & ~mask;
8530       index = INTVAL (x) & mask;
8531       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8532         {
8533           /* It'll most probably be more efficient to generate the base
8534              with more bits set and use a negative index instead. */
8535           base |= mask;
8536           index -= mask;
8537         }
8538       base_reg = force_reg (SImode, GEN_INT (base));
8539       x = plus_constant (Pmode, base_reg, index);
8540     }
8541
8542   if (flag_pic)
8543     {
8544       /* We need to find and carefully transform any SYMBOL and LABEL
8545          references; so go back to the original address expression.  */
8546       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8547
8548       if (new_x != orig_x)
8549         x = new_x;
8550     }
8551
8552   return x;
8553 }
8554
8555
8556 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8557    to be legitimate.  If we find one, return the new, valid address.  */
8558 rtx
8559 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8560 {
8561   if (GET_CODE (x) == PLUS
8562       && CONST_INT_P (XEXP (x, 1))
8563       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8564           || INTVAL (XEXP (x, 1)) < 0))
8565     {
8566       rtx xop0 = XEXP (x, 0);
8567       rtx xop1 = XEXP (x, 1);
8568       HOST_WIDE_INT offset = INTVAL (xop1);
8569
8570       /* Try and fold the offset into a biasing of the base register and
8571          then offsetting that.  Don't do this when optimizing for space
8572          since it can cause too many CSEs.  */
8573       if (optimize_size && offset >= 0
8574           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8575         {
8576           HOST_WIDE_INT delta;
8577
8578           if (offset >= 256)
8579             delta = offset - (256 - GET_MODE_SIZE (mode));
8580           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8581             delta = 31 * GET_MODE_SIZE (mode);
8582           else
8583             delta = offset & (~31 * GET_MODE_SIZE (mode));
8584
8585           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8586                                 NULL_RTX);
8587           x = plus_constant (Pmode, xop0, delta);
8588         }
8589       else if (offset < 0 && offset > -256)
8590         /* Small negative offsets are best done with a subtract before the
8591            dereference, forcing these into a register normally takes two
8592            instructions.  */
8593         x = force_operand (x, NULL_RTX);
8594       else
8595         {
8596           /* For the remaining cases, force the constant into a register.  */
8597           xop1 = force_reg (SImode, xop1);
8598           x = gen_rtx_PLUS (SImode, xop0, xop1);
8599         }
8600     }
8601   else if (GET_CODE (x) == PLUS
8602            && s_register_operand (XEXP (x, 1), SImode)
8603            && !s_register_operand (XEXP (x, 0), SImode))
8604     {
8605       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8606
8607       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8608     }
8609
8610   if (flag_pic)
8611     {
8612       /* We need to find and carefully transform any SYMBOL and LABEL
8613          references; so go back to the original address expression.  */
8614       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8615
8616       if (new_x != orig_x)
8617         x = new_x;
8618     }
8619
8620   return x;
8621 }
8622
8623 /* Return TRUE if X contains any TLS symbol references.  */
8624
8625 bool
8626 arm_tls_referenced_p (rtx x)
8627 {
8628   if (! TARGET_HAVE_TLS)
8629     return false;
8630
8631   subrtx_iterator::array_type array;
8632   FOR_EACH_SUBRTX (iter, array, x, ALL)
8633     {
8634       const_rtx x = *iter;
8635       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8636         return true;
8637
8638       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8639          TLS offsets, not real symbol references.  */
8640       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8641         iter.skip_subrtxes ();
8642     }
8643   return false;
8644 }
8645
8646 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8647
8648    On the ARM, allow any integer (invalid ones are removed later by insn
8649    patterns), nice doubles and symbol_refs which refer to the function's
8650    constant pool XXX.
8651
8652    When generating pic allow anything.  */
8653
8654 static bool
8655 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8656 {
8657   return flag_pic || !label_mentioned_p (x);
8658 }
8659
8660 static bool
8661 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8662 {
8663   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8664      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8665      for ARMv8-M Baseline or later the result is valid.  */
8666   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8667     x = XEXP (x, 0);
8668
8669   return (CONST_INT_P (x)
8670           || CONST_DOUBLE_P (x)
8671           || CONSTANT_ADDRESS_P (x)
8672           || flag_pic);
8673 }
8674
8675 static bool
8676 arm_legitimate_constant_p (machine_mode mode, rtx x)
8677 {
8678   return (!arm_cannot_force_const_mem (mode, x)
8679           && (TARGET_32BIT
8680               ? arm_legitimate_constant_p_1 (mode, x)
8681               : thumb_legitimate_constant_p (mode, x)));
8682 }
8683
8684 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8685
8686 static bool
8687 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8688 {
8689   rtx base, offset;
8690
8691   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8692     {
8693       split_const (x, &base, &offset);
8694       if (GET_CODE (base) == SYMBOL_REF
8695           && !offset_within_block_p (base, INTVAL (offset)))
8696         return true;
8697     }
8698   return arm_tls_referenced_p (x);
8699 }
8700 \f
8701 #define REG_OR_SUBREG_REG(X)                                            \
8702   (REG_P (X)                                                    \
8703    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8704
8705 #define REG_OR_SUBREG_RTX(X)                    \
8706    (REG_P (X) ? (X) : SUBREG_REG (X))
8707
8708 static inline int
8709 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8710 {
8711   machine_mode mode = GET_MODE (x);
8712   int total, words;
8713
8714   switch (code)
8715     {
8716     case ASHIFT:
8717     case ASHIFTRT:
8718     case LSHIFTRT:
8719     case ROTATERT:
8720       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8721
8722     case PLUS:
8723     case MINUS:
8724     case COMPARE:
8725     case NEG:
8726     case NOT:
8727       return COSTS_N_INSNS (1);
8728
8729     case MULT:
8730       if (CONST_INT_P (XEXP (x, 1)))
8731         {
8732           int cycles = 0;
8733           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8734
8735           while (i)
8736             {
8737               i >>= 2;
8738               cycles++;
8739             }
8740           return COSTS_N_INSNS (2) + cycles;
8741         }
8742       return COSTS_N_INSNS (1) + 16;
8743
8744     case SET:
8745       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8746          the mode.  */
8747       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8748       return (COSTS_N_INSNS (words)
8749               + 4 * ((MEM_P (SET_SRC (x)))
8750                      + MEM_P (SET_DEST (x))));
8751
8752     case CONST_INT:
8753       if (outer == SET)
8754         {
8755           if (UINTVAL (x) < 256
8756               /* 16-bit constant.  */
8757               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8758             return 0;
8759           if (thumb_shiftable_const (INTVAL (x)))
8760             return COSTS_N_INSNS (2);
8761           return COSTS_N_INSNS (3);
8762         }
8763       else if ((outer == PLUS || outer == COMPARE)
8764                && INTVAL (x) < 256 && INTVAL (x) > -256)
8765         return 0;
8766       else if ((outer == IOR || outer == XOR || outer == AND)
8767                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8768         return COSTS_N_INSNS (1);
8769       else if (outer == AND)
8770         {
8771           int i;
8772           /* This duplicates the tests in the andsi3 expander.  */
8773           for (i = 9; i <= 31; i++)
8774             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8775                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8776               return COSTS_N_INSNS (2);
8777         }
8778       else if (outer == ASHIFT || outer == ASHIFTRT
8779                || outer == LSHIFTRT)
8780         return 0;
8781       return COSTS_N_INSNS (2);
8782
8783     case CONST:
8784     case CONST_DOUBLE:
8785     case LABEL_REF:
8786     case SYMBOL_REF:
8787       return COSTS_N_INSNS (3);
8788
8789     case UDIV:
8790     case UMOD:
8791     case DIV:
8792     case MOD:
8793       return 100;
8794
8795     case TRUNCATE:
8796       return 99;
8797
8798     case AND:
8799     case XOR:
8800     case IOR:
8801       /* XXX guess.  */
8802       return 8;
8803
8804     case MEM:
8805       /* XXX another guess.  */
8806       /* Memory costs quite a lot for the first word, but subsequent words
8807          load at the equivalent of a single insn each.  */
8808       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8809               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8810                  ? 4 : 0));
8811
8812     case IF_THEN_ELSE:
8813       /* XXX a guess.  */
8814       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8815         return 14;
8816       return 2;
8817
8818     case SIGN_EXTEND:
8819     case ZERO_EXTEND:
8820       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8821       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8822
8823       if (mode == SImode)
8824         return total;
8825
8826       if (arm_arch6)
8827         return total + COSTS_N_INSNS (1);
8828
8829       /* Assume a two-shift sequence.  Increase the cost slightly so
8830          we prefer actual shifts over an extend operation.  */
8831       return total + 1 + COSTS_N_INSNS (2);
8832
8833     default:
8834       return 99;
8835     }
8836 }
8837
8838 /* Estimates the size cost of thumb1 instructions.
8839    For now most of the code is copied from thumb1_rtx_costs. We need more
8840    fine grain tuning when we have more related test cases.  */
8841 static inline int
8842 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8843 {
8844   machine_mode mode = GET_MODE (x);
8845   int words, cost;
8846
8847   switch (code)
8848     {
8849     case ASHIFT:
8850     case ASHIFTRT:
8851     case LSHIFTRT:
8852     case ROTATERT:
8853       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8854
8855     case PLUS:
8856     case MINUS:
8857       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8858          defined by RTL expansion, especially for the expansion of
8859          multiplication.  */
8860       if ((GET_CODE (XEXP (x, 0)) == MULT
8861            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8862           || (GET_CODE (XEXP (x, 1)) == MULT
8863               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8864         return COSTS_N_INSNS (2);
8865       /* Fall through.  */
8866     case COMPARE:
8867     case NEG:
8868     case NOT:
8869       return COSTS_N_INSNS (1);
8870
8871     case MULT:
8872       if (CONST_INT_P (XEXP (x, 1)))
8873         {
8874           /* Thumb1 mul instruction can't operate on const. We must Load it
8875              into a register first.  */
8876           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8877           /* For the targets which have a very small and high-latency multiply
8878              unit, we prefer to synthesize the mult with up to 5 instructions,
8879              giving a good balance between size and performance.  */
8880           if (arm_arch6m && arm_m_profile_small_mul)
8881             return COSTS_N_INSNS (5);
8882           else
8883             return COSTS_N_INSNS (1) + const_size;
8884         }
8885       return COSTS_N_INSNS (1);
8886
8887     case SET:
8888       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8889          the mode.  */
8890       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8891       cost = COSTS_N_INSNS (words);
8892       if (satisfies_constraint_J (SET_SRC (x))
8893           || satisfies_constraint_K (SET_SRC (x))
8894              /* Too big an immediate for a 2-byte mov, using MOVT.  */
8895           || (CONST_INT_P (SET_SRC (x))
8896               && UINTVAL (SET_SRC (x)) >= 256
8897               && TARGET_HAVE_MOVT
8898               && satisfies_constraint_j (SET_SRC (x)))
8899              /* thumb1_movdi_insn.  */
8900           || ((words > 1) && MEM_P (SET_SRC (x))))
8901         cost += COSTS_N_INSNS (1);
8902       return cost;
8903
8904     case CONST_INT:
8905       if (outer == SET)
8906         {
8907           if (UINTVAL (x) < 256)
8908             return COSTS_N_INSNS (1);
8909           /* movw is 4byte long.  */
8910           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
8911             return COSTS_N_INSNS (2);
8912           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
8913           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8914             return COSTS_N_INSNS (2);
8915           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
8916           if (thumb_shiftable_const (INTVAL (x)))
8917             return COSTS_N_INSNS (2);
8918           return COSTS_N_INSNS (3);
8919         }
8920       else if ((outer == PLUS || outer == COMPARE)
8921                && INTVAL (x) < 256 && INTVAL (x) > -256)
8922         return 0;
8923       else if ((outer == IOR || outer == XOR || outer == AND)
8924                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8925         return COSTS_N_INSNS (1);
8926       else if (outer == AND)
8927         {
8928           int i;
8929           /* This duplicates the tests in the andsi3 expander.  */
8930           for (i = 9; i <= 31; i++)
8931             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8932                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8933               return COSTS_N_INSNS (2);
8934         }
8935       else if (outer == ASHIFT || outer == ASHIFTRT
8936                || outer == LSHIFTRT)
8937         return 0;
8938       return COSTS_N_INSNS (2);
8939
8940     case CONST:
8941     case CONST_DOUBLE:
8942     case LABEL_REF:
8943     case SYMBOL_REF:
8944       return COSTS_N_INSNS (3);
8945
8946     case UDIV:
8947     case UMOD:
8948     case DIV:
8949     case MOD:
8950       return 100;
8951
8952     case TRUNCATE:
8953       return 99;
8954
8955     case AND:
8956     case XOR:
8957     case IOR:
8958       return COSTS_N_INSNS (1);
8959
8960     case MEM:
8961       return (COSTS_N_INSNS (1)
8962               + COSTS_N_INSNS (1)
8963                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8964               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8965                  ? COSTS_N_INSNS (1) : 0));
8966
8967     case IF_THEN_ELSE:
8968       /* XXX a guess.  */
8969       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8970         return 14;
8971       return 2;
8972
8973     case ZERO_EXTEND:
8974       /* XXX still guessing.  */
8975       switch (GET_MODE (XEXP (x, 0)))
8976         {
8977           case QImode:
8978             return (1 + (mode == DImode ? 4 : 0)
8979                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8980
8981           case HImode:
8982             return (4 + (mode == DImode ? 4 : 0)
8983                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8984
8985           case SImode:
8986             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8987
8988           default:
8989             return 99;
8990         }
8991
8992     default:
8993       return 99;
8994     }
8995 }
8996
8997 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
8998    operand, then return the operand that is being shifted.  If the shift
8999    is not by a constant, then set SHIFT_REG to point to the operand.
9000    Return NULL if OP is not a shifter operand.  */
9001 static rtx
9002 shifter_op_p (rtx op, rtx *shift_reg)
9003 {
9004   enum rtx_code code = GET_CODE (op);
9005
9006   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9007       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9008     return XEXP (op, 0);
9009   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9010     return XEXP (op, 0);
9011   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9012            || code == ASHIFTRT)
9013     {
9014       if (!CONST_INT_P (XEXP (op, 1)))
9015         *shift_reg = XEXP (op, 1);
9016       return XEXP (op, 0);
9017     }
9018
9019   return NULL;
9020 }
9021
9022 static bool
9023 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9024 {
9025   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9026   rtx_code code = GET_CODE (x);
9027   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9028
9029   switch (XINT (x, 1))
9030     {
9031     case UNSPEC_UNALIGNED_LOAD:
9032       /* We can only do unaligned loads into the integer unit, and we can't
9033          use LDM or LDRD.  */
9034       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9035       if (speed_p)
9036         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9037                   + extra_cost->ldst.load_unaligned);
9038
9039 #ifdef NOT_YET
9040       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9041                                  ADDR_SPACE_GENERIC, speed_p);
9042 #endif
9043       return true;
9044
9045     case UNSPEC_UNALIGNED_STORE:
9046       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9047       if (speed_p)
9048         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9049                   + extra_cost->ldst.store_unaligned);
9050
9051       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9052 #ifdef NOT_YET
9053       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9054                                  ADDR_SPACE_GENERIC, speed_p);
9055 #endif
9056       return true;
9057
9058     case UNSPEC_VRINTZ:
9059     case UNSPEC_VRINTP:
9060     case UNSPEC_VRINTM:
9061     case UNSPEC_VRINTR:
9062     case UNSPEC_VRINTX:
9063     case UNSPEC_VRINTA:
9064       if (speed_p)
9065         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9066
9067       return true;
9068     default:
9069       *cost = COSTS_N_INSNS (2);
9070       break;
9071     }
9072   return true;
9073 }
9074
9075 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9076    call (one insn for -Os) and then one for processing the result.  */
9077 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9078
9079 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9080         do                                                              \
9081           {                                                             \
9082             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9083             if (shift_op != NULL                                        \
9084                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9085               {                                                         \
9086                 if (shift_reg)                                          \
9087                   {                                                     \
9088                     if (speed_p)                                        \
9089                       *cost += extra_cost->alu.arith_shift_reg;         \
9090                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9091                                        ASHIFT, 1, speed_p);             \
9092                   }                                                     \
9093                 else if (speed_p)                                       \
9094                   *cost += extra_cost->alu.arith_shift;                 \
9095                                                                         \
9096                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9097                                     ASHIFT, 0, speed_p)                 \
9098                           + rtx_cost (XEXP (x, 1 - IDX),                \
9099                                       GET_MODE (shift_op),              \
9100                                       OP, 1, speed_p));                 \
9101                 return true;                                            \
9102               }                                                         \
9103           }                                                             \
9104         while (0);
9105
9106 /* RTX costs.  Make an estimate of the cost of executing the operation
9107    X, which is contained with an operation with code OUTER_CODE.
9108    SPEED_P indicates whether the cost desired is the performance cost,
9109    or the size cost.  The estimate is stored in COST and the return
9110    value is TRUE if the cost calculation is final, or FALSE if the
9111    caller should recurse through the operands of X to add additional
9112    costs.
9113
9114    We currently make no attempt to model the size savings of Thumb-2
9115    16-bit instructions.  At the normal points in compilation where
9116    this code is called we have no measure of whether the condition
9117    flags are live or not, and thus no realistic way to determine what
9118    the size will eventually be.  */
9119 static bool
9120 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9121                    const struct cpu_cost_table *extra_cost,
9122                    int *cost, bool speed_p)
9123 {
9124   machine_mode mode = GET_MODE (x);
9125
9126   *cost = COSTS_N_INSNS (1);
9127
9128   if (TARGET_THUMB1)
9129     {
9130       if (speed_p)
9131         *cost = thumb1_rtx_costs (x, code, outer_code);
9132       else
9133         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9134       return true;
9135     }
9136
9137   switch (code)
9138     {
9139     case SET:
9140       *cost = 0;
9141       /* SET RTXs don't have a mode so we get it from the destination.  */
9142       mode = GET_MODE (SET_DEST (x));
9143
9144       if (REG_P (SET_SRC (x))
9145           && REG_P (SET_DEST (x)))
9146         {
9147           /* Assume that most copies can be done with a single insn,
9148              unless we don't have HW FP, in which case everything
9149              larger than word mode will require two insns.  */
9150           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9151                                    && GET_MODE_SIZE (mode) > 4)
9152                                   || mode == DImode)
9153                                  ? 2 : 1);
9154           /* Conditional register moves can be encoded
9155              in 16 bits in Thumb mode.  */
9156           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9157             *cost >>= 1;
9158
9159           return true;
9160         }
9161
9162       if (CONST_INT_P (SET_SRC (x)))
9163         {
9164           /* Handle CONST_INT here, since the value doesn't have a mode
9165              and we would otherwise be unable to work out the true cost.  */
9166           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9167                             0, speed_p);
9168           outer_code = SET;
9169           /* Slightly lower the cost of setting a core reg to a constant.
9170              This helps break up chains and allows for better scheduling.  */
9171           if (REG_P (SET_DEST (x))
9172               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9173             *cost -= 1;
9174           x = SET_SRC (x);
9175           /* Immediate moves with an immediate in the range [0, 255] can be
9176              encoded in 16 bits in Thumb mode.  */
9177           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9178               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9179             *cost >>= 1;
9180           goto const_int_cost;
9181         }
9182
9183       return false;
9184
9185     case MEM:
9186       /* A memory access costs 1 insn if the mode is small, or the address is
9187          a single register, otherwise it costs one insn per word.  */
9188       if (REG_P (XEXP (x, 0)))
9189         *cost = COSTS_N_INSNS (1);
9190       else if (flag_pic
9191                && GET_CODE (XEXP (x, 0)) == PLUS
9192                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9193         /* This will be split into two instructions.
9194            See arm.md:calculate_pic_address.  */
9195         *cost = COSTS_N_INSNS (2);
9196       else
9197         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9198
9199       /* For speed optimizations, add the costs of the address and
9200          accessing memory.  */
9201       if (speed_p)
9202 #ifdef NOT_YET
9203         *cost += (extra_cost->ldst.load
9204                   + arm_address_cost (XEXP (x, 0), mode,
9205                                       ADDR_SPACE_GENERIC, speed_p));
9206 #else
9207         *cost += extra_cost->ldst.load;
9208 #endif
9209       return true;
9210
9211     case PARALLEL:
9212     {
9213    /* Calculations of LDM costs are complex.  We assume an initial cost
9214    (ldm_1st) which will load the number of registers mentioned in
9215    ldm_regs_per_insn_1st registers; then each additional
9216    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9217    formula for N regs is thus:
9218
9219    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9220                              + ldm_regs_per_insn_subsequent - 1)
9221                             / ldm_regs_per_insn_subsequent).
9222
9223    Additional costs may also be added for addressing.  A similar
9224    formula is used for STM.  */
9225
9226       bool is_ldm = load_multiple_operation (x, SImode);
9227       bool is_stm = store_multiple_operation (x, SImode);
9228
9229       if (is_ldm || is_stm)
9230         {
9231           if (speed_p)
9232             {
9233               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9234               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9235                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9236                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9237               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9238                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9239                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9240
9241               *cost += regs_per_insn_1st
9242                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9243                                             + regs_per_insn_sub - 1)
9244                                           / regs_per_insn_sub);
9245               return true;
9246             }
9247
9248         }
9249       return false;
9250     }
9251     case DIV:
9252     case UDIV:
9253       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9254           && (mode == SFmode || !TARGET_VFP_SINGLE))
9255         *cost += COSTS_N_INSNS (speed_p
9256                                ? extra_cost->fp[mode != SFmode].div : 0);
9257       else if (mode == SImode && TARGET_IDIV)
9258         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9259       else
9260         *cost = LIBCALL_COST (2);
9261       return false;     /* All arguments must be in registers.  */
9262
9263     case MOD:
9264       /* MOD by a power of 2 can be expanded as:
9265          rsbs    r1, r0, #0
9266          and     r0, r0, #(n - 1)
9267          and     r1, r1, #(n - 1)
9268          rsbpl   r0, r1, #0.  */
9269       if (CONST_INT_P (XEXP (x, 1))
9270           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9271           && mode == SImode)
9272         {
9273           *cost += COSTS_N_INSNS (3);
9274
9275           if (speed_p)
9276             *cost += 2 * extra_cost->alu.logical
9277                      + extra_cost->alu.arith;
9278           return true;
9279         }
9280
9281     /* Fall-through.  */
9282     case UMOD:
9283       *cost = LIBCALL_COST (2);
9284       return false;     /* All arguments must be in registers.  */
9285
9286     case ROTATE:
9287       if (mode == SImode && REG_P (XEXP (x, 1)))
9288         {
9289           *cost += (COSTS_N_INSNS (1)
9290                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9291           if (speed_p)
9292             *cost += extra_cost->alu.shift_reg;
9293           return true;
9294         }
9295       /* Fall through */
9296     case ROTATERT:
9297     case ASHIFT:
9298     case LSHIFTRT:
9299     case ASHIFTRT:
9300       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9301         {
9302           *cost += (COSTS_N_INSNS (2)
9303                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9304           if (speed_p)
9305             *cost += 2 * extra_cost->alu.shift;
9306           return true;
9307         }
9308       else if (mode == SImode)
9309         {
9310           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9311           /* Slightly disparage register shifts at -Os, but not by much.  */
9312           if (!CONST_INT_P (XEXP (x, 1)))
9313             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9314                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9315           return true;
9316         }
9317       else if (GET_MODE_CLASS (mode) == MODE_INT
9318                && GET_MODE_SIZE (mode) < 4)
9319         {
9320           if (code == ASHIFT)
9321             {
9322               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9323               /* Slightly disparage register shifts at -Os, but not by
9324                  much.  */
9325               if (!CONST_INT_P (XEXP (x, 1)))
9326                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9327                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9328             }
9329           else if (code == LSHIFTRT || code == ASHIFTRT)
9330             {
9331               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9332                 {
9333                   /* Can use SBFX/UBFX.  */
9334                   if (speed_p)
9335                     *cost += extra_cost->alu.bfx;
9336                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9337                 }
9338               else
9339                 {
9340                   *cost += COSTS_N_INSNS (1);
9341                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9342                   if (speed_p)
9343                     {
9344                       if (CONST_INT_P (XEXP (x, 1)))
9345                         *cost += 2 * extra_cost->alu.shift;
9346                       else
9347                         *cost += (extra_cost->alu.shift
9348                                   + extra_cost->alu.shift_reg);
9349                     }
9350                   else
9351                     /* Slightly disparage register shifts.  */
9352                     *cost += !CONST_INT_P (XEXP (x, 1));
9353                 }
9354             }
9355           else /* Rotates.  */
9356             {
9357               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9358               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9359               if (speed_p)
9360                 {
9361                   if (CONST_INT_P (XEXP (x, 1)))
9362                     *cost += (2 * extra_cost->alu.shift
9363                               + extra_cost->alu.log_shift);
9364                   else
9365                     *cost += (extra_cost->alu.shift
9366                               + extra_cost->alu.shift_reg
9367                               + extra_cost->alu.log_shift_reg);
9368                 }
9369             }
9370           return true;
9371         }
9372
9373       *cost = LIBCALL_COST (2);
9374       return false;
9375
9376     case BSWAP:
9377       if (arm_arch6)
9378         {
9379           if (mode == SImode)
9380             {
9381               if (speed_p)
9382                 *cost += extra_cost->alu.rev;
9383
9384               return false;
9385             }
9386         }
9387       else
9388         {
9389         /* No rev instruction available.  Look at arm_legacy_rev
9390            and thumb_legacy_rev for the form of RTL used then.  */
9391           if (TARGET_THUMB)
9392             {
9393               *cost += COSTS_N_INSNS (9);
9394
9395               if (speed_p)
9396                 {
9397                   *cost += 6 * extra_cost->alu.shift;
9398                   *cost += 3 * extra_cost->alu.logical;
9399                 }
9400             }
9401           else
9402             {
9403               *cost += COSTS_N_INSNS (4);
9404
9405               if (speed_p)
9406                 {
9407                   *cost += 2 * extra_cost->alu.shift;
9408                   *cost += extra_cost->alu.arith_shift;
9409                   *cost += 2 * extra_cost->alu.logical;
9410                 }
9411             }
9412           return true;
9413         }
9414       return false;
9415
9416     case MINUS:
9417       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9418           && (mode == SFmode || !TARGET_VFP_SINGLE))
9419         {
9420           if (GET_CODE (XEXP (x, 0)) == MULT
9421               || GET_CODE (XEXP (x, 1)) == MULT)
9422             {
9423               rtx mul_op0, mul_op1, sub_op;
9424
9425               if (speed_p)
9426                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9427
9428               if (GET_CODE (XEXP (x, 0)) == MULT)
9429                 {
9430                   mul_op0 = XEXP (XEXP (x, 0), 0);
9431                   mul_op1 = XEXP (XEXP (x, 0), 1);
9432                   sub_op = XEXP (x, 1);
9433                 }
9434               else
9435                 {
9436                   mul_op0 = XEXP (XEXP (x, 1), 0);
9437                   mul_op1 = XEXP (XEXP (x, 1), 1);
9438                   sub_op = XEXP (x, 0);
9439                 }
9440
9441               /* The first operand of the multiply may be optionally
9442                  negated.  */
9443               if (GET_CODE (mul_op0) == NEG)
9444                 mul_op0 = XEXP (mul_op0, 0);
9445
9446               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9447                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9448                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9449
9450               return true;
9451             }
9452
9453           if (speed_p)
9454             *cost += extra_cost->fp[mode != SFmode].addsub;
9455           return false;
9456         }
9457
9458       if (mode == SImode)
9459         {
9460           rtx shift_by_reg = NULL;
9461           rtx shift_op;
9462           rtx non_shift_op;
9463
9464           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9465           if (shift_op == NULL)
9466             {
9467               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9468               non_shift_op = XEXP (x, 0);
9469             }
9470           else
9471             non_shift_op = XEXP (x, 1);
9472
9473           if (shift_op != NULL)
9474             {
9475               if (shift_by_reg != NULL)
9476                 {
9477                   if (speed_p)
9478                     *cost += extra_cost->alu.arith_shift_reg;
9479                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9480                 }
9481               else if (speed_p)
9482                 *cost += extra_cost->alu.arith_shift;
9483
9484               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9485               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9486               return true;
9487             }
9488
9489           if (arm_arch_thumb2
9490               && GET_CODE (XEXP (x, 1)) == MULT)
9491             {
9492               /* MLS.  */
9493               if (speed_p)
9494                 *cost += extra_cost->mult[0].add;
9495               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9496               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9497               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9498               return true;
9499             }
9500
9501           if (CONST_INT_P (XEXP (x, 0)))
9502             {
9503               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9504                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9505                                             NULL_RTX, 1, 0);
9506               *cost = COSTS_N_INSNS (insns);
9507               if (speed_p)
9508                 *cost += insns * extra_cost->alu.arith;
9509               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9510               return true;
9511             }
9512           else if (speed_p)
9513             *cost += extra_cost->alu.arith;
9514
9515           return false;
9516         }
9517
9518       if (GET_MODE_CLASS (mode) == MODE_INT
9519           && GET_MODE_SIZE (mode) < 4)
9520         {
9521           rtx shift_op, shift_reg;
9522           shift_reg = NULL;
9523
9524           /* We check both sides of the MINUS for shifter operands since,
9525              unlike PLUS, it's not commutative.  */
9526
9527           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9528           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9529
9530           /* Slightly disparage, as we might need to widen the result.  */
9531           *cost += 1;
9532           if (speed_p)
9533             *cost += extra_cost->alu.arith;
9534
9535           if (CONST_INT_P (XEXP (x, 0)))
9536             {
9537               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9538               return true;
9539             }
9540
9541           return false;
9542         }
9543
9544       if (mode == DImode)
9545         {
9546           *cost += COSTS_N_INSNS (1);
9547
9548           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9549             {
9550               rtx op1 = XEXP (x, 1);
9551
9552               if (speed_p)
9553                 *cost += 2 * extra_cost->alu.arith;
9554
9555               if (GET_CODE (op1) == ZERO_EXTEND)
9556                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9557                                    0, speed_p);
9558               else
9559                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9560               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9561                                  0, speed_p);
9562               return true;
9563             }
9564           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9565             {
9566               if (speed_p)
9567                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9568               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9569                                   0, speed_p)
9570                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9571               return true;
9572             }
9573           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9574                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9575             {
9576               if (speed_p)
9577                 *cost += (extra_cost->alu.arith
9578                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9579                              ? extra_cost->alu.arith
9580                              : extra_cost->alu.arith_shift));
9581               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9582                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9583                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9584               return true;
9585             }
9586
9587           if (speed_p)
9588             *cost += 2 * extra_cost->alu.arith;
9589           return false;
9590         }
9591
9592       /* Vector mode?  */
9593
9594       *cost = LIBCALL_COST (2);
9595       return false;
9596
9597     case PLUS:
9598       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9599           && (mode == SFmode || !TARGET_VFP_SINGLE))
9600         {
9601           if (GET_CODE (XEXP (x, 0)) == MULT)
9602             {
9603               rtx mul_op0, mul_op1, add_op;
9604
9605               if (speed_p)
9606                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9607
9608               mul_op0 = XEXP (XEXP (x, 0), 0);
9609               mul_op1 = XEXP (XEXP (x, 0), 1);
9610               add_op = XEXP (x, 1);
9611
9612               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9613                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9614                         + rtx_cost (add_op, mode, code, 0, speed_p));
9615
9616               return true;
9617             }
9618
9619           if (speed_p)
9620             *cost += extra_cost->fp[mode != SFmode].addsub;
9621           return false;
9622         }
9623       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9624         {
9625           *cost = LIBCALL_COST (2);
9626           return false;
9627         }
9628
9629         /* Narrow modes can be synthesized in SImode, but the range
9630            of useful sub-operations is limited.  Check for shift operations
9631            on one of the operands.  Only left shifts can be used in the
9632            narrow modes.  */
9633       if (GET_MODE_CLASS (mode) == MODE_INT
9634           && GET_MODE_SIZE (mode) < 4)
9635         {
9636           rtx shift_op, shift_reg;
9637           shift_reg = NULL;
9638
9639           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9640
9641           if (CONST_INT_P (XEXP (x, 1)))
9642             {
9643               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9644                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9645                                             NULL_RTX, 1, 0);
9646               *cost = COSTS_N_INSNS (insns);
9647               if (speed_p)
9648                 *cost += insns * extra_cost->alu.arith;
9649               /* Slightly penalize a narrow operation as the result may
9650                  need widening.  */
9651               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9652               return true;
9653             }
9654
9655           /* Slightly penalize a narrow operation as the result may
9656              need widening.  */
9657           *cost += 1;
9658           if (speed_p)
9659             *cost += extra_cost->alu.arith;
9660
9661           return false;
9662         }
9663
9664       if (mode == SImode)
9665         {
9666           rtx shift_op, shift_reg;
9667
9668           if (TARGET_INT_SIMD
9669               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9670                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9671             {
9672               /* UXTA[BH] or SXTA[BH].  */
9673               if (speed_p)
9674                 *cost += extra_cost->alu.extend_arith;
9675               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9676                                   0, speed_p)
9677                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9678               return true;
9679             }
9680
9681           shift_reg = NULL;
9682           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9683           if (shift_op != NULL)
9684             {
9685               if (shift_reg)
9686                 {
9687                   if (speed_p)
9688                     *cost += extra_cost->alu.arith_shift_reg;
9689                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9690                 }
9691               else if (speed_p)
9692                 *cost += extra_cost->alu.arith_shift;
9693
9694               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9695                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9696               return true;
9697             }
9698           if (GET_CODE (XEXP (x, 0)) == MULT)
9699             {
9700               rtx mul_op = XEXP (x, 0);
9701
9702               if (TARGET_DSP_MULTIPLY
9703                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9704                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9705                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9706                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9707                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9708                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9709                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9710                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9711                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9712                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9713                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9714                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9715                                       == 16))))))
9716                 {
9717                   /* SMLA[BT][BT].  */
9718                   if (speed_p)
9719                     *cost += extra_cost->mult[0].extend_add;
9720                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9721                                       SIGN_EXTEND, 0, speed_p)
9722                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9723                                         SIGN_EXTEND, 0, speed_p)
9724                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9725                   return true;
9726                 }
9727
9728               if (speed_p)
9729                 *cost += extra_cost->mult[0].add;
9730               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9731                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9732                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9733               return true;
9734             }
9735           if (CONST_INT_P (XEXP (x, 1)))
9736             {
9737               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9738                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9739                                             NULL_RTX, 1, 0);
9740               *cost = COSTS_N_INSNS (insns);
9741               if (speed_p)
9742                 *cost += insns * extra_cost->alu.arith;
9743               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9744               return true;
9745             }
9746           else if (speed_p)
9747             *cost += extra_cost->alu.arith;
9748
9749           return false;
9750         }
9751
9752       if (mode == DImode)
9753         {
9754           if (arm_arch3m
9755               && GET_CODE (XEXP (x, 0)) == MULT
9756               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9757                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9758                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9759                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9760             {
9761               if (speed_p)
9762                 *cost += extra_cost->mult[1].extend_add;
9763               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9764                                   ZERO_EXTEND, 0, speed_p)
9765                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9766                                     ZERO_EXTEND, 0, speed_p)
9767                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9768               return true;
9769             }
9770
9771           *cost += COSTS_N_INSNS (1);
9772
9773           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9774               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9775             {
9776               if (speed_p)
9777                 *cost += (extra_cost->alu.arith
9778                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9779                              ? extra_cost->alu.arith
9780                              : extra_cost->alu.arith_shift));
9781
9782               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9783                                   0, speed_p)
9784                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9785               return true;
9786             }
9787
9788           if (speed_p)
9789             *cost += 2 * extra_cost->alu.arith;
9790           return false;
9791         }
9792
9793       /* Vector mode?  */
9794       *cost = LIBCALL_COST (2);
9795       return false;
9796     case IOR:
9797       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9798         {
9799           if (speed_p)
9800             *cost += extra_cost->alu.rev;
9801
9802           return true;
9803         }
9804     /* Fall through.  */
9805     case AND: case XOR:
9806       if (mode == SImode)
9807         {
9808           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9809           rtx op0 = XEXP (x, 0);
9810           rtx shift_op, shift_reg;
9811
9812           if (subcode == NOT
9813               && (code == AND
9814                   || (code == IOR && TARGET_THUMB2)))
9815             op0 = XEXP (op0, 0);
9816
9817           shift_reg = NULL;
9818           shift_op = shifter_op_p (op0, &shift_reg);
9819           if (shift_op != NULL)
9820             {
9821               if (shift_reg)
9822                 {
9823                   if (speed_p)
9824                     *cost += extra_cost->alu.log_shift_reg;
9825                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9826                 }
9827               else if (speed_p)
9828                 *cost += extra_cost->alu.log_shift;
9829
9830               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9831                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9832               return true;
9833             }
9834
9835           if (CONST_INT_P (XEXP (x, 1)))
9836             {
9837               int insns = arm_gen_constant (code, SImode, NULL_RTX,
9838                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9839                                             NULL_RTX, 1, 0);
9840
9841               *cost = COSTS_N_INSNS (insns);
9842               if (speed_p)
9843                 *cost += insns * extra_cost->alu.logical;
9844               *cost += rtx_cost (op0, mode, code, 0, speed_p);
9845               return true;
9846             }
9847
9848           if (speed_p)
9849             *cost += extra_cost->alu.logical;
9850           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9851                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9852           return true;
9853         }
9854
9855       if (mode == DImode)
9856         {
9857           rtx op0 = XEXP (x, 0);
9858           enum rtx_code subcode = GET_CODE (op0);
9859
9860           *cost += COSTS_N_INSNS (1);
9861
9862           if (subcode == NOT
9863               && (code == AND
9864                   || (code == IOR && TARGET_THUMB2)))
9865             op0 = XEXP (op0, 0);
9866
9867           if (GET_CODE (op0) == ZERO_EXTEND)
9868             {
9869               if (speed_p)
9870                 *cost += 2 * extra_cost->alu.logical;
9871
9872               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9873                                   0, speed_p)
9874                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9875               return true;
9876             }
9877           else if (GET_CODE (op0) == SIGN_EXTEND)
9878             {
9879               if (speed_p)
9880                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9881
9882               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9883                                   0, speed_p)
9884                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9885               return true;
9886             }
9887
9888           if (speed_p)
9889             *cost += 2 * extra_cost->alu.logical;
9890
9891           return true;
9892         }
9893       /* Vector mode?  */
9894
9895       *cost = LIBCALL_COST (2);
9896       return false;
9897
9898     case MULT:
9899       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9900           && (mode == SFmode || !TARGET_VFP_SINGLE))
9901         {
9902           rtx op0 = XEXP (x, 0);
9903
9904           if (GET_CODE (op0) == NEG && !flag_rounding_math)
9905             op0 = XEXP (op0, 0);
9906
9907           if (speed_p)
9908             *cost += extra_cost->fp[mode != SFmode].mult;
9909
9910           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
9911                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
9912           return true;
9913         }
9914       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9915         {
9916           *cost = LIBCALL_COST (2);
9917           return false;
9918         }
9919
9920       if (mode == SImode)
9921         {
9922           if (TARGET_DSP_MULTIPLY
9923               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9924                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9925                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9926                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9927                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9928                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9929                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9930                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9931                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9932                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9933                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9934                               && (INTVAL (XEXP (XEXP (x, 1), 1))
9935                                   == 16))))))
9936             {
9937               /* SMUL[TB][TB].  */
9938               if (speed_p)
9939                 *cost += extra_cost->mult[0].extend;
9940               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
9941                                  SIGN_EXTEND, 0, speed_p);
9942               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
9943                                  SIGN_EXTEND, 1, speed_p);
9944               return true;
9945             }
9946           if (speed_p)
9947             *cost += extra_cost->mult[0].simple;
9948           return false;
9949         }
9950
9951       if (mode == DImode)
9952         {
9953           if (arm_arch3m
9954               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9955                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9956                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9957                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9958             {
9959               if (speed_p)
9960                 *cost += extra_cost->mult[1].extend;
9961               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
9962                                   ZERO_EXTEND, 0, speed_p)
9963                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9964                                     ZERO_EXTEND, 0, speed_p));
9965               return true;
9966             }
9967
9968           *cost = LIBCALL_COST (2);
9969           return false;
9970         }
9971
9972       /* Vector mode?  */
9973       *cost = LIBCALL_COST (2);
9974       return false;
9975
9976     case NEG:
9977       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9978           && (mode == SFmode || !TARGET_VFP_SINGLE))
9979         {
9980           if (GET_CODE (XEXP (x, 0)) == MULT)
9981             {
9982               /* VNMUL.  */
9983               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
9984               return true;
9985             }
9986
9987           if (speed_p)
9988             *cost += extra_cost->fp[mode != SFmode].neg;
9989
9990           return false;
9991         }
9992       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9993         {
9994           *cost = LIBCALL_COST (1);
9995           return false;
9996         }
9997
9998       if (mode == SImode)
9999         {
10000           if (GET_CODE (XEXP (x, 0)) == ABS)
10001             {
10002               *cost += COSTS_N_INSNS (1);
10003               /* Assume the non-flag-changing variant.  */
10004               if (speed_p)
10005                 *cost += (extra_cost->alu.log_shift
10006                           + extra_cost->alu.arith_shift);
10007               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10008               return true;
10009             }
10010
10011           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10012               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10013             {
10014               *cost += COSTS_N_INSNS (1);
10015               /* No extra cost for MOV imm and MVN imm.  */
10016               /* If the comparison op is using the flags, there's no further
10017                  cost, otherwise we need to add the cost of the comparison.  */
10018               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10019                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10020                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10021                 {
10022                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10023                   *cost += (COSTS_N_INSNS (1)
10024                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10025                                         0, speed_p)
10026                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10027                                         1, speed_p));
10028                   if (speed_p)
10029                     *cost += extra_cost->alu.arith;
10030                 }
10031               return true;
10032             }
10033
10034           if (speed_p)
10035             *cost += extra_cost->alu.arith;
10036           return false;
10037         }
10038
10039       if (GET_MODE_CLASS (mode) == MODE_INT
10040           && GET_MODE_SIZE (mode) < 4)
10041         {
10042           /* Slightly disparage, as we might need an extend operation.  */
10043           *cost += 1;
10044           if (speed_p)
10045             *cost += extra_cost->alu.arith;
10046           return false;
10047         }
10048
10049       if (mode == DImode)
10050         {
10051           *cost += COSTS_N_INSNS (1);
10052           if (speed_p)
10053             *cost += 2 * extra_cost->alu.arith;
10054           return false;
10055         }
10056
10057       /* Vector mode?  */
10058       *cost = LIBCALL_COST (1);
10059       return false;
10060
10061     case NOT:
10062       if (mode == SImode)
10063         {
10064           rtx shift_op;
10065           rtx shift_reg = NULL;
10066
10067           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10068
10069           if (shift_op)
10070             {
10071               if (shift_reg != NULL)
10072                 {
10073                   if (speed_p)
10074                     *cost += extra_cost->alu.log_shift_reg;
10075                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10076                 }
10077               else if (speed_p)
10078                 *cost += extra_cost->alu.log_shift;
10079               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10080               return true;
10081             }
10082
10083           if (speed_p)
10084             *cost += extra_cost->alu.logical;
10085           return false;
10086         }
10087       if (mode == DImode)
10088         {
10089           *cost += COSTS_N_INSNS (1);
10090           return false;
10091         }
10092
10093       /* Vector mode?  */
10094
10095       *cost += LIBCALL_COST (1);
10096       return false;
10097
10098     case IF_THEN_ELSE:
10099       {
10100         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10101           {
10102             *cost += COSTS_N_INSNS (3);
10103             return true;
10104           }
10105         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10106         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10107
10108         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10109         /* Assume that if one arm of the if_then_else is a register,
10110            that it will be tied with the result and eliminate the
10111            conditional insn.  */
10112         if (REG_P (XEXP (x, 1)))
10113           *cost += op2cost;
10114         else if (REG_P (XEXP (x, 2)))
10115           *cost += op1cost;
10116         else
10117           {
10118             if (speed_p)
10119               {
10120                 if (extra_cost->alu.non_exec_costs_exec)
10121                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10122                 else
10123                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10124               }
10125             else
10126               *cost += op1cost + op2cost;
10127           }
10128       }
10129       return true;
10130
10131     case COMPARE:
10132       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10133         *cost = 0;
10134       else
10135         {
10136           machine_mode op0mode;
10137           /* We'll mostly assume that the cost of a compare is the cost of the
10138              LHS.  However, there are some notable exceptions.  */
10139
10140           /* Floating point compares are never done as side-effects.  */
10141           op0mode = GET_MODE (XEXP (x, 0));
10142           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10143               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10144             {
10145               if (speed_p)
10146                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10147
10148               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10149                 {
10150                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10151                   return true;
10152                 }
10153
10154               return false;
10155             }
10156           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10157             {
10158               *cost = LIBCALL_COST (2);
10159               return false;
10160             }
10161
10162           /* DImode compares normally take two insns.  */
10163           if (op0mode == DImode)
10164             {
10165               *cost += COSTS_N_INSNS (1);
10166               if (speed_p)
10167                 *cost += 2 * extra_cost->alu.arith;
10168               return false;
10169             }
10170
10171           if (op0mode == SImode)
10172             {
10173               rtx shift_op;
10174               rtx shift_reg;
10175
10176               if (XEXP (x, 1) == const0_rtx
10177                   && !(REG_P (XEXP (x, 0))
10178                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10179                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10180                 {
10181                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10182
10183                   /* Multiply operations that set the flags are often
10184                      significantly more expensive.  */
10185                   if (speed_p
10186                       && GET_CODE (XEXP (x, 0)) == MULT
10187                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10188                     *cost += extra_cost->mult[0].flag_setting;
10189
10190                   if (speed_p
10191                       && GET_CODE (XEXP (x, 0)) == PLUS
10192                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10193                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10194                                                             0), 1), mode))
10195                     *cost += extra_cost->mult[0].flag_setting;
10196                   return true;
10197                 }
10198
10199               shift_reg = NULL;
10200               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10201               if (shift_op != NULL)
10202                 {
10203                   if (shift_reg != NULL)
10204                     {
10205                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10206                                          1, speed_p);
10207                       if (speed_p)
10208                         *cost += extra_cost->alu.arith_shift_reg;
10209                     }
10210                   else if (speed_p)
10211                     *cost += extra_cost->alu.arith_shift;
10212                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10213                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10214                   return true;
10215                 }
10216
10217               if (speed_p)
10218                 *cost += extra_cost->alu.arith;
10219               if (CONST_INT_P (XEXP (x, 1))
10220                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10221                 {
10222                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10223                   return true;
10224                 }
10225               return false;
10226             }
10227
10228           /* Vector mode?  */
10229
10230           *cost = LIBCALL_COST (2);
10231           return false;
10232         }
10233       return true;
10234
10235     case EQ:
10236     case NE:
10237     case LT:
10238     case LE:
10239     case GT:
10240     case GE:
10241     case LTU:
10242     case LEU:
10243     case GEU:
10244     case GTU:
10245     case ORDERED:
10246     case UNORDERED:
10247     case UNEQ:
10248     case UNLE:
10249     case UNLT:
10250     case UNGE:
10251     case UNGT:
10252     case LTGT:
10253       if (outer_code == SET)
10254         {
10255           /* Is it a store-flag operation?  */
10256           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10257               && XEXP (x, 1) == const0_rtx)
10258             {
10259               /* Thumb also needs an IT insn.  */
10260               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10261               return true;
10262             }
10263           if (XEXP (x, 1) == const0_rtx)
10264             {
10265               switch (code)
10266                 {
10267                 case LT:
10268                   /* LSR Rd, Rn, #31.  */
10269                   if (speed_p)
10270                     *cost += extra_cost->alu.shift;
10271                   break;
10272
10273                 case EQ:
10274                   /* RSBS T1, Rn, #0
10275                      ADC  Rd, Rn, T1.  */
10276
10277                 case NE:
10278                   /* SUBS T1, Rn, #1
10279                      SBC  Rd, Rn, T1.  */
10280                   *cost += COSTS_N_INSNS (1);
10281                   break;
10282
10283                 case LE:
10284                   /* RSBS T1, Rn, Rn, LSR #31
10285                      ADC  Rd, Rn, T1. */
10286                   *cost += COSTS_N_INSNS (1);
10287                   if (speed_p)
10288                     *cost += extra_cost->alu.arith_shift;
10289                   break;
10290
10291                 case GT:
10292                   /* RSB  Rd, Rn, Rn, ASR #1
10293                      LSR  Rd, Rd, #31.  */
10294                   *cost += COSTS_N_INSNS (1);
10295                   if (speed_p)
10296                     *cost += (extra_cost->alu.arith_shift
10297                               + extra_cost->alu.shift);
10298                   break;
10299
10300                 case GE:
10301                   /* ASR  Rd, Rn, #31
10302                      ADD  Rd, Rn, #1.  */
10303                   *cost += COSTS_N_INSNS (1);
10304                   if (speed_p)
10305                     *cost += extra_cost->alu.shift;
10306                   break;
10307
10308                 default:
10309                   /* Remaining cases are either meaningless or would take
10310                      three insns anyway.  */
10311                   *cost = COSTS_N_INSNS (3);
10312                   break;
10313                 }
10314               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10315               return true;
10316             }
10317           else
10318             {
10319               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10320               if (CONST_INT_P (XEXP (x, 1))
10321                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10322                 {
10323                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10324                   return true;
10325                 }
10326
10327               return false;
10328             }
10329         }
10330       /* Not directly inside a set.  If it involves the condition code
10331          register it must be the condition for a branch, cond_exec or
10332          I_T_E operation.  Since the comparison is performed elsewhere
10333          this is just the control part which has no additional
10334          cost.  */
10335       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10336                && XEXP (x, 1) == const0_rtx)
10337         {
10338           *cost = 0;
10339           return true;
10340         }
10341       return false;
10342
10343     case ABS:
10344       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10345           && (mode == SFmode || !TARGET_VFP_SINGLE))
10346         {
10347           if (speed_p)
10348             *cost += extra_cost->fp[mode != SFmode].neg;
10349
10350           return false;
10351         }
10352       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10353         {
10354           *cost = LIBCALL_COST (1);
10355           return false;
10356         }
10357
10358       if (mode == SImode)
10359         {
10360           if (speed_p)
10361             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10362           return false;
10363         }
10364       /* Vector mode?  */
10365       *cost = LIBCALL_COST (1);
10366       return false;
10367
10368     case SIGN_EXTEND:
10369       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10370           && MEM_P (XEXP (x, 0)))
10371         {
10372           if (mode == DImode)
10373             *cost += COSTS_N_INSNS (1);
10374
10375           if (!speed_p)
10376             return true;
10377
10378           if (GET_MODE (XEXP (x, 0)) == SImode)
10379             *cost += extra_cost->ldst.load;
10380           else
10381             *cost += extra_cost->ldst.load_sign_extend;
10382
10383           if (mode == DImode)
10384             *cost += extra_cost->alu.shift;
10385
10386           return true;
10387         }
10388
10389       /* Widening from less than 32-bits requires an extend operation.  */
10390       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10391         {
10392           /* We have SXTB/SXTH.  */
10393           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10394           if (speed_p)
10395             *cost += extra_cost->alu.extend;
10396         }
10397       else if (GET_MODE (XEXP (x, 0)) != SImode)
10398         {
10399           /* Needs two shifts.  */
10400           *cost += COSTS_N_INSNS (1);
10401           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10402           if (speed_p)
10403             *cost += 2 * extra_cost->alu.shift;
10404         }
10405
10406       /* Widening beyond 32-bits requires one more insn.  */
10407       if (mode == DImode)
10408         {
10409           *cost += COSTS_N_INSNS (1);
10410           if (speed_p)
10411             *cost += extra_cost->alu.shift;
10412         }
10413
10414       return true;
10415
10416     case ZERO_EXTEND:
10417       if ((arm_arch4
10418            || GET_MODE (XEXP (x, 0)) == SImode
10419            || GET_MODE (XEXP (x, 0)) == QImode)
10420           && MEM_P (XEXP (x, 0)))
10421         {
10422           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10423
10424           if (mode == DImode)
10425             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10426
10427           return true;
10428         }
10429
10430       /* Widening from less than 32-bits requires an extend operation.  */
10431       if (GET_MODE (XEXP (x, 0)) == QImode)
10432         {
10433           /* UXTB can be a shorter instruction in Thumb2, but it might
10434              be slower than the AND Rd, Rn, #255 alternative.  When
10435              optimizing for speed it should never be slower to use
10436              AND, and we don't really model 16-bit vs 32-bit insns
10437              here.  */
10438           if (speed_p)
10439             *cost += extra_cost->alu.logical;
10440         }
10441       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10442         {
10443           /* We have UXTB/UXTH.  */
10444           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10445           if (speed_p)
10446             *cost += extra_cost->alu.extend;
10447         }
10448       else if (GET_MODE (XEXP (x, 0)) != SImode)
10449         {
10450           /* Needs two shifts.  It's marginally preferable to use
10451              shifts rather than two BIC instructions as the second
10452              shift may merge with a subsequent insn as a shifter
10453              op.  */
10454           *cost = COSTS_N_INSNS (2);
10455           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10456           if (speed_p)
10457             *cost += 2 * extra_cost->alu.shift;
10458         }
10459
10460       /* Widening beyond 32-bits requires one more insn.  */
10461       if (mode == DImode)
10462         {
10463           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10464         }
10465
10466       return true;
10467
10468     case CONST_INT:
10469       *cost = 0;
10470       /* CONST_INT has no mode, so we cannot tell for sure how many
10471          insns are really going to be needed.  The best we can do is
10472          look at the value passed.  If it fits in SImode, then assume
10473          that's the mode it will be used for.  Otherwise assume it
10474          will be used in DImode.  */
10475       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10476         mode = SImode;
10477       else
10478         mode = DImode;
10479
10480       /* Avoid blowing up in arm_gen_constant ().  */
10481       if (!(outer_code == PLUS
10482             || outer_code == AND
10483             || outer_code == IOR
10484             || outer_code == XOR
10485             || outer_code == MINUS))
10486         outer_code = SET;
10487
10488     const_int_cost:
10489       if (mode == SImode)
10490         {
10491           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10492                                                     INTVAL (x), NULL, NULL,
10493                                                     0, 0));
10494           /* Extra costs?  */
10495         }
10496       else
10497         {
10498           *cost += COSTS_N_INSNS (arm_gen_constant
10499                                   (outer_code, SImode, NULL,
10500                                    trunc_int_for_mode (INTVAL (x), SImode),
10501                                    NULL, NULL, 0, 0)
10502                                   + arm_gen_constant (outer_code, SImode, NULL,
10503                                                       INTVAL (x) >> 32, NULL,
10504                                                       NULL, 0, 0));
10505           /* Extra costs?  */
10506         }
10507
10508       return true;
10509
10510     case CONST:
10511     case LABEL_REF:
10512     case SYMBOL_REF:
10513       if (speed_p)
10514         {
10515           if (arm_arch_thumb2 && !flag_pic)
10516             *cost += COSTS_N_INSNS (1);
10517           else
10518             *cost += extra_cost->ldst.load;
10519         }
10520       else
10521         *cost += COSTS_N_INSNS (1);
10522
10523       if (flag_pic)
10524         {
10525           *cost += COSTS_N_INSNS (1);
10526           if (speed_p)
10527             *cost += extra_cost->alu.arith;
10528         }
10529
10530       return true;
10531
10532     case CONST_FIXED:
10533       *cost = COSTS_N_INSNS (4);
10534       /* Fixme.  */
10535       return true;
10536
10537     case CONST_DOUBLE:
10538       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10539           && (mode == SFmode || !TARGET_VFP_SINGLE))
10540         {
10541           if (vfp3_const_double_rtx (x))
10542             {
10543               if (speed_p)
10544                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10545               return true;
10546             }
10547
10548           if (speed_p)
10549             {
10550               if (mode == DFmode)
10551                 *cost += extra_cost->ldst.loadd;
10552               else
10553                 *cost += extra_cost->ldst.loadf;
10554             }
10555           else
10556             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10557
10558           return true;
10559         }
10560       *cost = COSTS_N_INSNS (4);
10561       return true;
10562
10563     case CONST_VECTOR:
10564       /* Fixme.  */
10565       if (TARGET_NEON
10566           && TARGET_HARD_FLOAT
10567           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10568           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10569         *cost = COSTS_N_INSNS (1);
10570       else
10571         *cost = COSTS_N_INSNS (4);
10572       return true;
10573
10574     case HIGH:
10575     case LO_SUM:
10576       /* When optimizing for size, we prefer constant pool entries to
10577          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10578       if (!speed_p)
10579         *cost += 1;
10580       return true;
10581
10582     case CLZ:
10583       if (speed_p)
10584         *cost += extra_cost->alu.clz;
10585       return false;
10586
10587     case SMIN:
10588       if (XEXP (x, 1) == const0_rtx)
10589         {
10590           if (speed_p)
10591             *cost += extra_cost->alu.log_shift;
10592           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10593           return true;
10594         }
10595       /* Fall through.  */
10596     case SMAX:
10597     case UMIN:
10598     case UMAX:
10599       *cost += COSTS_N_INSNS (1);
10600       return false;
10601
10602     case TRUNCATE:
10603       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10604           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10605           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10606           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10607           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10608                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10609               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10610                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10611                       == ZERO_EXTEND))))
10612         {
10613           if (speed_p)
10614             *cost += extra_cost->mult[1].extend;
10615           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10616                               ZERO_EXTEND, 0, speed_p)
10617                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10618                                 ZERO_EXTEND, 0, speed_p));
10619           return true;
10620         }
10621       *cost = LIBCALL_COST (1);
10622       return false;
10623
10624     case UNSPEC_VOLATILE:
10625     case UNSPEC:
10626       return arm_unspec_cost (x, outer_code, speed_p, cost);
10627
10628     case PC:
10629       /* Reading the PC is like reading any other register.  Writing it
10630          is more expensive, but we take that into account elsewhere.  */
10631       *cost = 0;
10632       return true;
10633
10634     case ZERO_EXTRACT:
10635       /* TODO: Simple zero_extract of bottom bits using AND.  */
10636       /* Fall through.  */
10637     case SIGN_EXTRACT:
10638       if (arm_arch6
10639           && mode == SImode
10640           && CONST_INT_P (XEXP (x, 1))
10641           && CONST_INT_P (XEXP (x, 2)))
10642         {
10643           if (speed_p)
10644             *cost += extra_cost->alu.bfx;
10645           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10646           return true;
10647         }
10648       /* Without UBFX/SBFX, need to resort to shift operations.  */
10649       *cost += COSTS_N_INSNS (1);
10650       if (speed_p)
10651         *cost += 2 * extra_cost->alu.shift;
10652       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10653       return true;
10654
10655     case FLOAT_EXTEND:
10656       if (TARGET_HARD_FLOAT)
10657         {
10658           if (speed_p)
10659             *cost += extra_cost->fp[mode == DFmode].widen;
10660           if (!TARGET_FPU_ARMV8
10661               && GET_MODE (XEXP (x, 0)) == HFmode)
10662             {
10663               /* Pre v8, widening HF->DF is a two-step process, first
10664                  widening to SFmode.  */
10665               *cost += COSTS_N_INSNS (1);
10666               if (speed_p)
10667                 *cost += extra_cost->fp[0].widen;
10668             }
10669           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10670           return true;
10671         }
10672
10673       *cost = LIBCALL_COST (1);
10674       return false;
10675
10676     case FLOAT_TRUNCATE:
10677       if (TARGET_HARD_FLOAT)
10678         {
10679           if (speed_p)
10680             *cost += extra_cost->fp[mode == DFmode].narrow;
10681           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10682           return true;
10683           /* Vector modes?  */
10684         }
10685       *cost = LIBCALL_COST (1);
10686       return false;
10687
10688     case FMA:
10689       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10690         {
10691           rtx op0 = XEXP (x, 0);
10692           rtx op1 = XEXP (x, 1);
10693           rtx op2 = XEXP (x, 2);
10694
10695
10696           /* vfms or vfnma.  */
10697           if (GET_CODE (op0) == NEG)
10698             op0 = XEXP (op0, 0);
10699
10700           /* vfnms or vfnma.  */
10701           if (GET_CODE (op2) == NEG)
10702             op2 = XEXP (op2, 0);
10703
10704           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10705           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10706           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10707
10708           if (speed_p)
10709             *cost += extra_cost->fp[mode ==DFmode].fma;
10710
10711           return true;
10712         }
10713
10714       *cost = LIBCALL_COST (3);
10715       return false;
10716
10717     case FIX:
10718     case UNSIGNED_FIX:
10719       if (TARGET_HARD_FLOAT)
10720         {
10721           /* The *combine_vcvtf2i reduces a vmul+vcvt into
10722              a vcvt fixed-point conversion.  */
10723           if (code == FIX && mode == SImode
10724               && GET_CODE (XEXP (x, 0)) == FIX
10725               && GET_MODE (XEXP (x, 0)) == SFmode
10726               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10727               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10728                  > 0)
10729             {
10730               if (speed_p)
10731                 *cost += extra_cost->fp[0].toint;
10732
10733               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10734                                  code, 0, speed_p);
10735               return true;
10736             }
10737
10738           if (GET_MODE_CLASS (mode) == MODE_INT)
10739             {
10740               mode = GET_MODE (XEXP (x, 0));
10741               if (speed_p)
10742                 *cost += extra_cost->fp[mode == DFmode].toint;
10743               /* Strip of the 'cost' of rounding towards zero.  */
10744               if (GET_CODE (XEXP (x, 0)) == FIX)
10745                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10746                                    0, speed_p);
10747               else
10748                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10749               /* ??? Increase the cost to deal with transferring from
10750                  FP -> CORE registers?  */
10751               return true;
10752             }
10753           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10754                    && TARGET_FPU_ARMV8)
10755             {
10756               if (speed_p)
10757                 *cost += extra_cost->fp[mode == DFmode].roundint;
10758               return false;
10759             }
10760           /* Vector costs? */
10761         }
10762       *cost = LIBCALL_COST (1);
10763       return false;
10764
10765     case FLOAT:
10766     case UNSIGNED_FLOAT:
10767       if (TARGET_HARD_FLOAT)
10768         {
10769           /* ??? Increase the cost to deal with transferring from CORE
10770              -> FP registers?  */
10771           if (speed_p)
10772             *cost += extra_cost->fp[mode == DFmode].fromint;
10773           return false;
10774         }
10775       *cost = LIBCALL_COST (1);
10776       return false;
10777
10778     case CALL:
10779       return true;
10780
10781     case ASM_OPERANDS:
10782       {
10783       /* Just a guess.  Guess number of instructions in the asm
10784          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
10785          though (see PR60663).  */
10786         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10787         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10788
10789         *cost = COSTS_N_INSNS (asm_length + num_operands);
10790         return true;
10791       }
10792     default:
10793       if (mode != VOIDmode)
10794         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10795       else
10796         *cost = COSTS_N_INSNS (4); /* Who knows?  */
10797       return false;
10798     }
10799 }
10800
10801 #undef HANDLE_NARROW_SHIFT_ARITH
10802
10803 /* RTX costs entry point.  */
10804
10805 static bool
10806 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10807                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10808 {
10809   bool result;
10810   int code = GET_CODE (x);
10811   gcc_assert (current_tune->insn_extra_cost);
10812
10813   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
10814                                 (enum rtx_code) outer_code,
10815                                 current_tune->insn_extra_cost,
10816                                 total, speed);
10817
10818   if (dump_file && (dump_flags & TDF_DETAILS))
10819     {
10820       print_rtl_single (dump_file, x);
10821       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10822                *total, result ? "final" : "partial");
10823     }
10824   return result;
10825 }
10826
10827 /* All address computations that can be done are free, but rtx cost returns
10828    the same for practically all of them.  So we weight the different types
10829    of address here in the order (most pref first):
10830    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
10831 static inline int
10832 arm_arm_address_cost (rtx x)
10833 {
10834   enum rtx_code c  = GET_CODE (x);
10835
10836   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10837     return 0;
10838   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10839     return 10;
10840
10841   if (c == PLUS)
10842     {
10843       if (CONST_INT_P (XEXP (x, 1)))
10844         return 2;
10845
10846       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10847         return 3;
10848
10849       return 4;
10850     }
10851
10852   return 6;
10853 }
10854
10855 static inline int
10856 arm_thumb_address_cost (rtx x)
10857 {
10858   enum rtx_code c  = GET_CODE (x);
10859
10860   if (c == REG)
10861     return 1;
10862   if (c == PLUS
10863       && REG_P (XEXP (x, 0))
10864       && CONST_INT_P (XEXP (x, 1)))
10865     return 1;
10866
10867   return 2;
10868 }
10869
10870 static int
10871 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10872                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10873 {
10874   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10875 }
10876
10877 /* Adjust cost hook for XScale.  */
10878 static bool
10879 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10880                           int * cost)
10881 {
10882   /* Some true dependencies can have a higher cost depending
10883      on precisely how certain input operands are used.  */
10884   if (dep_type == 0
10885       && recog_memoized (insn) >= 0
10886       && recog_memoized (dep) >= 0)
10887     {
10888       int shift_opnum = get_attr_shift (insn);
10889       enum attr_type attr_type = get_attr_type (dep);
10890
10891       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10892          operand for INSN.  If we have a shifted input operand and the
10893          instruction we depend on is another ALU instruction, then we may
10894          have to account for an additional stall.  */
10895       if (shift_opnum != 0
10896           && (attr_type == TYPE_ALU_SHIFT_IMM
10897               || attr_type == TYPE_ALUS_SHIFT_IMM
10898               || attr_type == TYPE_LOGIC_SHIFT_IMM
10899               || attr_type == TYPE_LOGICS_SHIFT_IMM
10900               || attr_type == TYPE_ALU_SHIFT_REG
10901               || attr_type == TYPE_ALUS_SHIFT_REG
10902               || attr_type == TYPE_LOGIC_SHIFT_REG
10903               || attr_type == TYPE_LOGICS_SHIFT_REG
10904               || attr_type == TYPE_MOV_SHIFT
10905               || attr_type == TYPE_MVN_SHIFT
10906               || attr_type == TYPE_MOV_SHIFT_REG
10907               || attr_type == TYPE_MVN_SHIFT_REG))
10908         {
10909           rtx shifted_operand;
10910           int opno;
10911
10912           /* Get the shifted operand.  */
10913           extract_insn (insn);
10914           shifted_operand = recog_data.operand[shift_opnum];
10915
10916           /* Iterate over all the operands in DEP.  If we write an operand
10917              that overlaps with SHIFTED_OPERAND, then we have increase the
10918              cost of this dependency.  */
10919           extract_insn (dep);
10920           preprocess_constraints (dep);
10921           for (opno = 0; opno < recog_data.n_operands; opno++)
10922             {
10923               /* We can ignore strict inputs.  */
10924               if (recog_data.operand_type[opno] == OP_IN)
10925                 continue;
10926
10927               if (reg_overlap_mentioned_p (recog_data.operand[opno],
10928                                            shifted_operand))
10929                 {
10930                   *cost = 2;
10931                   return false;
10932                 }
10933             }
10934         }
10935     }
10936   return true;
10937 }
10938
10939 /* Adjust cost hook for Cortex A9.  */
10940 static bool
10941 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10942                              int * cost)
10943 {
10944   switch (dep_type)
10945     {
10946     case REG_DEP_ANTI:
10947       *cost = 0;
10948       return false;
10949
10950     case REG_DEP_TRUE:
10951     case REG_DEP_OUTPUT:
10952         if (recog_memoized (insn) >= 0
10953             && recog_memoized (dep) >= 0)
10954           {
10955             if (GET_CODE (PATTERN (insn)) == SET)
10956               {
10957                 if (GET_MODE_CLASS
10958                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
10959                   || GET_MODE_CLASS
10960                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
10961                   {
10962                     enum attr_type attr_type_insn = get_attr_type (insn);
10963                     enum attr_type attr_type_dep = get_attr_type (dep);
10964
10965                     /* By default all dependencies of the form
10966                        s0 = s0 <op> s1
10967                        s0 = s0 <op> s2
10968                        have an extra latency of 1 cycle because
10969                        of the input and output dependency in this
10970                        case. However this gets modeled as an true
10971                        dependency and hence all these checks.  */
10972                     if (REG_P (SET_DEST (PATTERN (insn)))
10973                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
10974                       {
10975                         /* FMACS is a special case where the dependent
10976                            instruction can be issued 3 cycles before
10977                            the normal latency in case of an output
10978                            dependency.  */
10979                         if ((attr_type_insn == TYPE_FMACS
10980                              || attr_type_insn == TYPE_FMACD)
10981                             && (attr_type_dep == TYPE_FMACS
10982                                 || attr_type_dep == TYPE_FMACD))
10983                           {
10984                             if (dep_type == REG_DEP_OUTPUT)
10985                               *cost = insn_default_latency (dep) - 3;
10986                             else
10987                               *cost = insn_default_latency (dep);
10988                             return false;
10989                           }
10990                         else
10991                           {
10992                             if (dep_type == REG_DEP_OUTPUT)
10993                               *cost = insn_default_latency (dep) + 1;
10994                             else
10995                               *cost = insn_default_latency (dep);
10996                           }
10997                         return false;
10998                       }
10999                   }
11000               }
11001           }
11002         break;
11003
11004     default:
11005       gcc_unreachable ();
11006     }
11007
11008   return true;
11009 }
11010
11011 /* Adjust cost hook for FA726TE.  */
11012 static bool
11013 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11014                            int * cost)
11015 {
11016   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11017      have penalty of 3.  */
11018   if (dep_type == REG_DEP_TRUE
11019       && recog_memoized (insn) >= 0
11020       && recog_memoized (dep) >= 0
11021       && get_attr_conds (dep) == CONDS_SET)
11022     {
11023       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11024       if (get_attr_conds (insn) == CONDS_USE
11025           && get_attr_type (insn) != TYPE_BRANCH)
11026         {
11027           *cost = 3;
11028           return false;
11029         }
11030
11031       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11032           || get_attr_conds (insn) == CONDS_USE)
11033         {
11034           *cost = 0;
11035           return false;
11036         }
11037     }
11038
11039   return true;
11040 }
11041
11042 /* Implement TARGET_REGISTER_MOVE_COST.
11043
11044    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11045    it is typically more expensive than a single memory access.  We set
11046    the cost to less than two memory accesses so that floating
11047    point to integer conversion does not go through memory.  */
11048
11049 int
11050 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11051                         reg_class_t from, reg_class_t to)
11052 {
11053   if (TARGET_32BIT)
11054     {
11055       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11056           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11057         return 15;
11058       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11059                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11060         return 4;
11061       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11062         return 20;
11063       else
11064         return 2;
11065     }
11066   else
11067     {
11068       if (from == HI_REGS || to == HI_REGS)
11069         return 4;
11070       else
11071         return 2;
11072     }
11073 }
11074
11075 /* Implement TARGET_MEMORY_MOVE_COST.  */
11076
11077 int
11078 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11079                       bool in ATTRIBUTE_UNUSED)
11080 {
11081   if (TARGET_32BIT)
11082     return 10;
11083   else
11084     {
11085       if (GET_MODE_SIZE (mode) < 4)
11086         return 8;
11087       else
11088         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11089     }
11090 }
11091
11092 /* Vectorizer cost model implementation.  */
11093
11094 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11095 static int
11096 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11097                                 tree vectype,
11098                                 int misalign ATTRIBUTE_UNUSED)
11099 {
11100   unsigned elements;
11101
11102   switch (type_of_cost)
11103     {
11104       case scalar_stmt:
11105         return current_tune->vec_costs->scalar_stmt_cost;
11106
11107       case scalar_load:
11108         return current_tune->vec_costs->scalar_load_cost;
11109
11110       case scalar_store:
11111         return current_tune->vec_costs->scalar_store_cost;
11112
11113       case vector_stmt:
11114         return current_tune->vec_costs->vec_stmt_cost;
11115
11116       case vector_load:
11117         return current_tune->vec_costs->vec_align_load_cost;
11118
11119       case vector_store:
11120         return current_tune->vec_costs->vec_store_cost;
11121
11122       case vec_to_scalar:
11123         return current_tune->vec_costs->vec_to_scalar_cost;
11124
11125       case scalar_to_vec:
11126         return current_tune->vec_costs->scalar_to_vec_cost;
11127
11128       case unaligned_load:
11129         return current_tune->vec_costs->vec_unalign_load_cost;
11130
11131       case unaligned_store:
11132         return current_tune->vec_costs->vec_unalign_store_cost;
11133
11134       case cond_branch_taken:
11135         return current_tune->vec_costs->cond_taken_branch_cost;
11136
11137       case cond_branch_not_taken:
11138         return current_tune->vec_costs->cond_not_taken_branch_cost;
11139
11140       case vec_perm:
11141       case vec_promote_demote:
11142         return current_tune->vec_costs->vec_stmt_cost;
11143
11144       case vec_construct:
11145         elements = TYPE_VECTOR_SUBPARTS (vectype);
11146         return elements / 2 + 1;
11147
11148       default:
11149         gcc_unreachable ();
11150     }
11151 }
11152
11153 /* Implement targetm.vectorize.add_stmt_cost.  */
11154
11155 static unsigned
11156 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11157                    struct _stmt_vec_info *stmt_info, int misalign,
11158                    enum vect_cost_model_location where)
11159 {
11160   unsigned *cost = (unsigned *) data;
11161   unsigned retval = 0;
11162
11163   if (flag_vect_cost_model)
11164     {
11165       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11166       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11167
11168       /* Statements in an inner loop relative to the loop being
11169          vectorized are weighted more heavily.  The value here is
11170          arbitrary and could potentially be improved with analysis.  */
11171       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11172         count *= 50;  /* FIXME.  */
11173
11174       retval = (unsigned) (count * stmt_cost);
11175       cost[where] += retval;
11176     }
11177
11178   return retval;
11179 }
11180
11181 /* Return true if and only if this insn can dual-issue only as older.  */
11182 static bool
11183 cortexa7_older_only (rtx_insn *insn)
11184 {
11185   if (recog_memoized (insn) < 0)
11186     return false;
11187
11188   switch (get_attr_type (insn))
11189     {
11190     case TYPE_ALU_DSP_REG:
11191     case TYPE_ALU_SREG:
11192     case TYPE_ALUS_SREG:
11193     case TYPE_LOGIC_REG:
11194     case TYPE_LOGICS_REG:
11195     case TYPE_ADC_REG:
11196     case TYPE_ADCS_REG:
11197     case TYPE_ADR:
11198     case TYPE_BFM:
11199     case TYPE_REV:
11200     case TYPE_MVN_REG:
11201     case TYPE_SHIFT_IMM:
11202     case TYPE_SHIFT_REG:
11203     case TYPE_LOAD_BYTE:
11204     case TYPE_LOAD1:
11205     case TYPE_STORE1:
11206     case TYPE_FFARITHS:
11207     case TYPE_FADDS:
11208     case TYPE_FFARITHD:
11209     case TYPE_FADDD:
11210     case TYPE_FMOV:
11211     case TYPE_F_CVT:
11212     case TYPE_FCMPS:
11213     case TYPE_FCMPD:
11214     case TYPE_FCONSTS:
11215     case TYPE_FCONSTD:
11216     case TYPE_FMULS:
11217     case TYPE_FMACS:
11218     case TYPE_FMULD:
11219     case TYPE_FMACD:
11220     case TYPE_FDIVS:
11221     case TYPE_FDIVD:
11222     case TYPE_F_MRC:
11223     case TYPE_F_MRRC:
11224     case TYPE_F_FLAG:
11225     case TYPE_F_LOADS:
11226     case TYPE_F_STORES:
11227       return true;
11228     default:
11229       return false;
11230     }
11231 }
11232
11233 /* Return true if and only if this insn can dual-issue as younger.  */
11234 static bool
11235 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11236 {
11237   if (recog_memoized (insn) < 0)
11238     {
11239       if (verbose > 5)
11240         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11241       return false;
11242     }
11243
11244   switch (get_attr_type (insn))
11245     {
11246     case TYPE_ALU_IMM:
11247     case TYPE_ALUS_IMM:
11248     case TYPE_LOGIC_IMM:
11249     case TYPE_LOGICS_IMM:
11250     case TYPE_EXTEND:
11251     case TYPE_MVN_IMM:
11252     case TYPE_MOV_IMM:
11253     case TYPE_MOV_REG:
11254     case TYPE_MOV_SHIFT:
11255     case TYPE_MOV_SHIFT_REG:
11256     case TYPE_BRANCH:
11257     case TYPE_CALL:
11258       return true;
11259     default:
11260       return false;
11261     }
11262 }
11263
11264
11265 /* Look for an instruction that can dual issue only as an older
11266    instruction, and move it in front of any instructions that can
11267    dual-issue as younger, while preserving the relative order of all
11268    other instructions in the ready list.  This is a hueuristic to help
11269    dual-issue in later cycles, by postponing issue of more flexible
11270    instructions.  This heuristic may affect dual issue opportunities
11271    in the current cycle.  */
11272 static void
11273 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11274                         int *n_readyp, int clock)
11275 {
11276   int i;
11277   int first_older_only = -1, first_younger = -1;
11278
11279   if (verbose > 5)
11280     fprintf (file,
11281              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11282              clock,
11283              *n_readyp);
11284
11285   /* Traverse the ready list from the head (the instruction to issue
11286      first), and looking for the first instruction that can issue as
11287      younger and the first instruction that can dual-issue only as
11288      older.  */
11289   for (i = *n_readyp - 1; i >= 0; i--)
11290     {
11291       rtx_insn *insn = ready[i];
11292       if (cortexa7_older_only (insn))
11293         {
11294           first_older_only = i;
11295           if (verbose > 5)
11296             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11297           break;
11298         }
11299       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11300         first_younger = i;
11301     }
11302
11303   /* Nothing to reorder because either no younger insn found or insn
11304      that can dual-issue only as older appears before any insn that
11305      can dual-issue as younger.  */
11306   if (first_younger == -1)
11307     {
11308       if (verbose > 5)
11309         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11310       return;
11311     }
11312
11313   /* Nothing to reorder because no older-only insn in the ready list.  */
11314   if (first_older_only == -1)
11315     {
11316       if (verbose > 5)
11317         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11318       return;
11319     }
11320
11321   /* Move first_older_only insn before first_younger.  */
11322   if (verbose > 5)
11323     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11324              INSN_UID(ready [first_older_only]),
11325              INSN_UID(ready [first_younger]));
11326   rtx_insn *first_older_only_insn = ready [first_older_only];
11327   for (i = first_older_only; i < first_younger; i++)
11328     {
11329       ready[i] = ready[i+1];
11330     }
11331
11332   ready[i] = first_older_only_insn;
11333   return;
11334 }
11335
11336 /* Implement TARGET_SCHED_REORDER. */
11337 static int
11338 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11339                    int clock)
11340 {
11341   switch (arm_tune)
11342     {
11343     case TARGET_CPU_cortexa7:
11344       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11345       break;
11346     default:
11347       /* Do nothing for other cores.  */
11348       break;
11349     }
11350
11351   return arm_issue_rate ();
11352 }
11353
11354 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11355    It corrects the value of COST based on the relationship between
11356    INSN and DEP through the dependence LINK.  It returns the new
11357    value. There is a per-core adjust_cost hook to adjust scheduler costs
11358    and the per-core hook can choose to completely override the generic
11359    adjust_cost function. Only put bits of code into arm_adjust_cost that
11360    are common across all cores.  */
11361 static int
11362 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11363                  unsigned int)
11364 {
11365   rtx i_pat, d_pat;
11366
11367  /* When generating Thumb-1 code, we want to place flag-setting operations
11368     close to a conditional branch which depends on them, so that we can
11369     omit the comparison. */
11370   if (TARGET_THUMB1
11371       && dep_type == 0
11372       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11373       && recog_memoized (dep) >= 0
11374       && get_attr_conds (dep) == CONDS_SET)
11375     return 0;
11376
11377   if (current_tune->sched_adjust_cost != NULL)
11378     {
11379       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11380         return cost;
11381     }
11382
11383   /* XXX Is this strictly true?  */
11384   if (dep_type == REG_DEP_ANTI
11385       || dep_type == REG_DEP_OUTPUT)
11386     return 0;
11387
11388   /* Call insns don't incur a stall, even if they follow a load.  */
11389   if (dep_type == 0
11390       && CALL_P (insn))
11391     return 1;
11392
11393   if ((i_pat = single_set (insn)) != NULL
11394       && MEM_P (SET_SRC (i_pat))
11395       && (d_pat = single_set (dep)) != NULL
11396       && MEM_P (SET_DEST (d_pat)))
11397     {
11398       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11399       /* This is a load after a store, there is no conflict if the load reads
11400          from a cached area.  Assume that loads from the stack, and from the
11401          constant pool are cached, and that others will miss.  This is a
11402          hack.  */
11403
11404       if ((GET_CODE (src_mem) == SYMBOL_REF
11405            && CONSTANT_POOL_ADDRESS_P (src_mem))
11406           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11407           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11408           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11409         return 1;
11410     }
11411
11412   return cost;
11413 }
11414
11415 int
11416 arm_max_conditional_execute (void)
11417 {
11418   return max_insns_skipped;
11419 }
11420
11421 static int
11422 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11423 {
11424   if (TARGET_32BIT)
11425     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11426   else
11427     return (optimize > 0) ? 2 : 0;
11428 }
11429
11430 static int
11431 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11432 {
11433   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11434 }
11435
11436 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11437    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11438    sequences of non-executed instructions in IT blocks probably take the same
11439    amount of time as executed instructions (and the IT instruction itself takes
11440    space in icache).  This function was experimentally determined to give good
11441    results on a popular embedded benchmark.  */
11442
11443 static int
11444 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11445 {
11446   return (TARGET_32BIT && speed_p) ? 1
11447          : arm_default_branch_cost (speed_p, predictable_p);
11448 }
11449
11450 static int
11451 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11452 {
11453   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11454 }
11455
11456 static bool fp_consts_inited = false;
11457
11458 static REAL_VALUE_TYPE value_fp0;
11459
11460 static void
11461 init_fp_table (void)
11462 {
11463   REAL_VALUE_TYPE r;
11464
11465   r = REAL_VALUE_ATOF ("0", DFmode);
11466   value_fp0 = r;
11467   fp_consts_inited = true;
11468 }
11469
11470 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11471 int
11472 arm_const_double_rtx (rtx x)
11473 {
11474   const REAL_VALUE_TYPE *r;
11475
11476   if (!fp_consts_inited)
11477     init_fp_table ();
11478
11479   r = CONST_DOUBLE_REAL_VALUE (x);
11480   if (REAL_VALUE_MINUS_ZERO (*r))
11481     return 0;
11482
11483   if (real_equal (r, &value_fp0))
11484     return 1;
11485
11486   return 0;
11487 }
11488
11489 /* VFPv3 has a fairly wide range of representable immediates, formed from
11490    "quarter-precision" floating-point values. These can be evaluated using this
11491    formula (with ^ for exponentiation):
11492
11493      -1^s * n * 2^-r
11494
11495    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11496    16 <= n <= 31 and 0 <= r <= 7.
11497
11498    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11499
11500      - A (most-significant) is the sign bit.
11501      - BCD are the exponent (encoded as r XOR 3).
11502      - EFGH are the mantissa (encoded as n - 16).
11503 */
11504
11505 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11506    fconst[sd] instruction, or -1 if X isn't suitable.  */
11507 static int
11508 vfp3_const_double_index (rtx x)
11509 {
11510   REAL_VALUE_TYPE r, m;
11511   int sign, exponent;
11512   unsigned HOST_WIDE_INT mantissa, mant_hi;
11513   unsigned HOST_WIDE_INT mask;
11514   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11515   bool fail;
11516
11517   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11518     return -1;
11519
11520   r = *CONST_DOUBLE_REAL_VALUE (x);
11521
11522   /* We can't represent these things, so detect them first.  */
11523   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11524     return -1;
11525
11526   /* Extract sign, exponent and mantissa.  */
11527   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11528   r = real_value_abs (&r);
11529   exponent = REAL_EXP (&r);
11530   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11531      highest (sign) bit, with a fixed binary point at bit point_pos.
11532      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11533      bits for the mantissa, this may fail (low bits would be lost).  */
11534   real_ldexp (&m, &r, point_pos - exponent);
11535   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11536   mantissa = w.elt (0);
11537   mant_hi = w.elt (1);
11538
11539   /* If there are bits set in the low part of the mantissa, we can't
11540      represent this value.  */
11541   if (mantissa != 0)
11542     return -1;
11543
11544   /* Now make it so that mantissa contains the most-significant bits, and move
11545      the point_pos to indicate that the least-significant bits have been
11546      discarded.  */
11547   point_pos -= HOST_BITS_PER_WIDE_INT;
11548   mantissa = mant_hi;
11549
11550   /* We can permit four significant bits of mantissa only, plus a high bit
11551      which is always 1.  */
11552   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11553   if ((mantissa & mask) != 0)
11554     return -1;
11555
11556   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11557   mantissa >>= point_pos - 5;
11558
11559   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11560      floating-point immediate zero with Neon using an integer-zero load, but
11561      that case is handled elsewhere.)  */
11562   if (mantissa == 0)
11563     return -1;
11564
11565   gcc_assert (mantissa >= 16 && mantissa <= 31);
11566
11567   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11568      normalized significands are in the range [1, 2). (Our mantissa is shifted
11569      left 4 places at this point relative to normalized IEEE754 values).  GCC
11570      internally uses [0.5, 1) (see real.c), so the exponent returned from
11571      REAL_EXP must be altered.  */
11572   exponent = 5 - exponent;
11573
11574   if (exponent < 0 || exponent > 7)
11575     return -1;
11576
11577   /* Sign, mantissa and exponent are now in the correct form to plug into the
11578      formula described in the comment above.  */
11579   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11580 }
11581
11582 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11583 int
11584 vfp3_const_double_rtx (rtx x)
11585 {
11586   if (!TARGET_VFP3)
11587     return 0;
11588
11589   return vfp3_const_double_index (x) != -1;
11590 }
11591
11592 /* Recognize immediates which can be used in various Neon instructions. Legal
11593    immediates are described by the following table (for VMVN variants, the
11594    bitwise inverse of the constant shown is recognized. In either case, VMOV
11595    is output and the correct instruction to use for a given constant is chosen
11596    by the assembler). The constant shown is replicated across all elements of
11597    the destination vector.
11598
11599    insn elems variant constant (binary)
11600    ---- ----- ------- -----------------
11601    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11602    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11603    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11604    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11605    vmov  i16     4    00000000 abcdefgh
11606    vmov  i16     5    abcdefgh 00000000
11607    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11608    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11609    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11610    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11611    vmvn  i16    10    00000000 abcdefgh
11612    vmvn  i16    11    abcdefgh 00000000
11613    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11614    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11615    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11616    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11617    vmov   i8    16    abcdefgh
11618    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11619                       eeeeeeee ffffffff gggggggg hhhhhhhh
11620    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11621    vmov  f32    19    00000000 00000000 00000000 00000000
11622
11623    For case 18, B = !b. Representable values are exactly those accepted by
11624    vfp3_const_double_index, but are output as floating-point numbers rather
11625    than indices.
11626
11627    For case 19, we will change it to vmov.i32 when assembling.
11628
11629    Variants 0-5 (inclusive) may also be used as immediates for the second
11630    operand of VORR/VBIC instructions.
11631
11632    The INVERSE argument causes the bitwise inverse of the given operand to be
11633    recognized instead (used for recognizing legal immediates for the VAND/VORN
11634    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11635    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11636    output, rather than the real insns vbic/vorr).
11637
11638    INVERSE makes no difference to the recognition of float vectors.
11639
11640    The return value is the variant of immediate as shown in the above table, or
11641    -1 if the given value doesn't match any of the listed patterns.
11642 */
11643 static int
11644 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11645                       rtx *modconst, int *elementwidth)
11646 {
11647 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11648   matches = 1;                                  \
11649   for (i = 0; i < idx; i += (STRIDE))           \
11650     if (!(TEST))                                \
11651       matches = 0;                              \
11652   if (matches)                                  \
11653     {                                           \
11654       immtype = (CLASS);                        \
11655       elsize = (ELSIZE);                        \
11656       break;                                    \
11657     }
11658
11659   unsigned int i, elsize = 0, idx = 0, n_elts;
11660   unsigned int innersize;
11661   unsigned char bytes[16];
11662   int immtype = -1, matches;
11663   unsigned int invmask = inverse ? 0xff : 0;
11664   bool vector = GET_CODE (op) == CONST_VECTOR;
11665
11666   if (vector)
11667     n_elts = CONST_VECTOR_NUNITS (op);
11668   else
11669     {
11670       n_elts = 1;
11671       if (mode == VOIDmode)
11672         mode = DImode;
11673     }
11674
11675   innersize = GET_MODE_UNIT_SIZE (mode);
11676
11677   /* Vectors of float constants.  */
11678   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11679     {
11680       rtx el0 = CONST_VECTOR_ELT (op, 0);
11681
11682       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11683         return -1;
11684
11685       /* FP16 vectors cannot be represented.  */
11686       if (GET_MODE_INNER (mode) == HFmode)
11687         return -1;
11688
11689       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11690          are distinct in this context.  */
11691       if (!const_vec_duplicate_p (op))
11692         return -1;
11693
11694       if (modconst)
11695         *modconst = CONST_VECTOR_ELT (op, 0);
11696
11697       if (elementwidth)
11698         *elementwidth = 0;
11699
11700       if (el0 == CONST0_RTX (GET_MODE (el0)))
11701         return 19;
11702       else
11703         return 18;
11704     }
11705
11706   /* The tricks done in the code below apply for little-endian vector layout.
11707      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11708      FIXME: Implement logic for big-endian vectors.  */
11709   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11710     return -1;
11711
11712   /* Splat vector constant out into a byte vector.  */
11713   for (i = 0; i < n_elts; i++)
11714     {
11715       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11716       unsigned HOST_WIDE_INT elpart;
11717
11718       gcc_assert (CONST_INT_P (el));
11719       elpart = INTVAL (el);
11720
11721       for (unsigned int byte = 0; byte < innersize; byte++)
11722         {
11723           bytes[idx++] = (elpart & 0xff) ^ invmask;
11724           elpart >>= BITS_PER_UNIT;
11725         }
11726     }
11727
11728   /* Sanity check.  */
11729   gcc_assert (idx == GET_MODE_SIZE (mode));
11730
11731   do
11732     {
11733       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11734                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11735
11736       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11737                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11738
11739       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11740                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11741
11742       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11743                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11744
11745       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11746
11747       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11748
11749       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11750                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11751
11752       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11753                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11754
11755       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11756                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11757
11758       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11759                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11760
11761       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11762
11763       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11764
11765       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11766                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11767
11768       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11769                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11770
11771       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11772                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11773
11774       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11775                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11776
11777       CHECK (1, 8, 16, bytes[i] == bytes[0]);
11778
11779       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11780                         && bytes[i] == bytes[(i + 8) % idx]);
11781     }
11782   while (0);
11783
11784   if (immtype == -1)
11785     return -1;
11786
11787   if (elementwidth)
11788     *elementwidth = elsize;
11789
11790   if (modconst)
11791     {
11792       unsigned HOST_WIDE_INT imm = 0;
11793
11794       /* Un-invert bytes of recognized vector, if necessary.  */
11795       if (invmask != 0)
11796         for (i = 0; i < idx; i++)
11797           bytes[i] ^= invmask;
11798
11799       if (immtype == 17)
11800         {
11801           /* FIXME: Broken on 32-bit H_W_I hosts.  */
11802           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11803
11804           for (i = 0; i < 8; i++)
11805             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11806                    << (i * BITS_PER_UNIT);
11807
11808           *modconst = GEN_INT (imm);
11809         }
11810       else
11811         {
11812           unsigned HOST_WIDE_INT imm = 0;
11813
11814           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11815             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11816
11817           *modconst = GEN_INT (imm);
11818         }
11819     }
11820
11821   return immtype;
11822 #undef CHECK
11823 }
11824
11825 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11826    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11827    float elements), and a modified constant (whatever should be output for a
11828    VMOV) in *MODCONST.  */
11829
11830 int
11831 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11832                                rtx *modconst, int *elementwidth)
11833 {
11834   rtx tmpconst;
11835   int tmpwidth;
11836   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11837
11838   if (retval == -1)
11839     return 0;
11840
11841   if (modconst)
11842     *modconst = tmpconst;
11843
11844   if (elementwidth)
11845     *elementwidth = tmpwidth;
11846
11847   return 1;
11848 }
11849
11850 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
11851    the immediate is valid, write a constant suitable for using as an operand
11852    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11853    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
11854
11855 int
11856 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11857                                 rtx *modconst, int *elementwidth)
11858 {
11859   rtx tmpconst;
11860   int tmpwidth;
11861   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11862
11863   if (retval < 0 || retval > 5)
11864     return 0;
11865
11866   if (modconst)
11867     *modconst = tmpconst;
11868
11869   if (elementwidth)
11870     *elementwidth = tmpwidth;
11871
11872   return 1;
11873 }
11874
11875 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
11876    the immediate is valid, write a constant suitable for using as an operand
11877    to VSHR/VSHL to *MODCONST and the corresponding element width to
11878    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11879    because they have different limitations.  */
11880
11881 int
11882 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11883                                 rtx *modconst, int *elementwidth,
11884                                 bool isleftshift)
11885 {
11886   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11887   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11888   unsigned HOST_WIDE_INT last_elt = 0;
11889   unsigned HOST_WIDE_INT maxshift;
11890
11891   /* Split vector constant out into a byte vector.  */
11892   for (i = 0; i < n_elts; i++)
11893     {
11894       rtx el = CONST_VECTOR_ELT (op, i);
11895       unsigned HOST_WIDE_INT elpart;
11896
11897       if (CONST_INT_P (el))
11898         elpart = INTVAL (el);
11899       else if (CONST_DOUBLE_P (el))
11900         return 0;
11901       else
11902         gcc_unreachable ();
11903
11904       if (i != 0 && elpart != last_elt)
11905         return 0;
11906
11907       last_elt = elpart;
11908     }
11909
11910   /* Shift less than element size.  */
11911   maxshift = innersize * 8;
11912
11913   if (isleftshift)
11914     {
11915       /* Left shift immediate value can be from 0 to <size>-1.  */
11916       if (last_elt >= maxshift)
11917         return 0;
11918     }
11919   else
11920     {
11921       /* Right shift immediate value can be from 1 to <size>.  */
11922       if (last_elt == 0 || last_elt > maxshift)
11923         return 0;
11924     }
11925
11926   if (elementwidth)
11927     *elementwidth = innersize * 8;
11928
11929   if (modconst)
11930     *modconst = CONST_VECTOR_ELT (op, 0);
11931
11932   return 1;
11933 }
11934
11935 /* Return a string suitable for output of Neon immediate logic operation
11936    MNEM.  */
11937
11938 char *
11939 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
11940                              int inverse, int quad)
11941 {
11942   int width, is_valid;
11943   static char templ[40];
11944
11945   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
11946
11947   gcc_assert (is_valid != 0);
11948
11949   if (quad)
11950     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
11951   else
11952     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
11953
11954   return templ;
11955 }
11956
11957 /* Return a string suitable for output of Neon immediate shift operation
11958    (VSHR or VSHL) MNEM.  */
11959
11960 char *
11961 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
11962                              machine_mode mode, int quad,
11963                              bool isleftshift)
11964 {
11965   int width, is_valid;
11966   static char templ[40];
11967
11968   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
11969   gcc_assert (is_valid != 0);
11970
11971   if (quad)
11972     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
11973   else
11974     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
11975
11976   return templ;
11977 }
11978
11979 /* Output a sequence of pairwise operations to implement a reduction.
11980    NOTE: We do "too much work" here, because pairwise operations work on two
11981    registers-worth of operands in one go. Unfortunately we can't exploit those
11982    extra calculations to do the full operation in fewer steps, I don't think.
11983    Although all vector elements of the result but the first are ignored, we
11984    actually calculate the same result in each of the elements. An alternative
11985    such as initially loading a vector with zero to use as each of the second
11986    operands would use up an additional register and take an extra instruction,
11987    for no particular gain.  */
11988
11989 void
11990 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
11991                       rtx (*reduc) (rtx, rtx, rtx))
11992 {
11993   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
11994   rtx tmpsum = op1;
11995
11996   for (i = parts / 2; i >= 1; i /= 2)
11997     {
11998       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
11999       emit_insn (reduc (dest, tmpsum, tmpsum));
12000       tmpsum = dest;
12001     }
12002 }
12003
12004 /* If VALS is a vector constant that can be loaded into a register
12005    using VDUP, generate instructions to do so and return an RTX to
12006    assign to the register.  Otherwise return NULL_RTX.  */
12007
12008 static rtx
12009 neon_vdup_constant (rtx vals)
12010 {
12011   machine_mode mode = GET_MODE (vals);
12012   machine_mode inner_mode = GET_MODE_INNER (mode);
12013   rtx x;
12014
12015   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12016     return NULL_RTX;
12017
12018   if (!const_vec_duplicate_p (vals, &x))
12019     /* The elements are not all the same.  We could handle repeating
12020        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12021        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12022        vdup.i16).  */
12023     return NULL_RTX;
12024
12025   /* We can load this constant by using VDUP and a constant in a
12026      single ARM register.  This will be cheaper than a vector
12027      load.  */
12028
12029   x = copy_to_mode_reg (inner_mode, x);
12030   return gen_rtx_VEC_DUPLICATE (mode, x);
12031 }
12032
12033 /* Generate code to load VALS, which is a PARALLEL containing only
12034    constants (for vec_init) or CONST_VECTOR, efficiently into a
12035    register.  Returns an RTX to copy into the register, or NULL_RTX
12036    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12037
12038 rtx
12039 neon_make_constant (rtx vals)
12040 {
12041   machine_mode mode = GET_MODE (vals);
12042   rtx target;
12043   rtx const_vec = NULL_RTX;
12044   int n_elts = GET_MODE_NUNITS (mode);
12045   int n_const = 0;
12046   int i;
12047
12048   if (GET_CODE (vals) == CONST_VECTOR)
12049     const_vec = vals;
12050   else if (GET_CODE (vals) == PARALLEL)
12051     {
12052       /* A CONST_VECTOR must contain only CONST_INTs and
12053          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12054          Only store valid constants in a CONST_VECTOR.  */
12055       for (i = 0; i < n_elts; ++i)
12056         {
12057           rtx x = XVECEXP (vals, 0, i);
12058           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12059             n_const++;
12060         }
12061       if (n_const == n_elts)
12062         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12063     }
12064   else
12065     gcc_unreachable ();
12066
12067   if (const_vec != NULL
12068       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12069     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12070     return const_vec;
12071   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12072     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12073        pipeline cycle; creating the constant takes one or two ARM
12074        pipeline cycles.  */
12075     return target;
12076   else if (const_vec != NULL_RTX)
12077     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12078        (for either double or quad vectors).  We can not take advantage
12079        of single-cycle VLD1 because we need a PC-relative addressing
12080        mode.  */
12081     return const_vec;
12082   else
12083     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12084        We can not construct an initializer.  */
12085     return NULL_RTX;
12086 }
12087
12088 /* Initialize vector TARGET to VALS.  */
12089
12090 void
12091 neon_expand_vector_init (rtx target, rtx vals)
12092 {
12093   machine_mode mode = GET_MODE (target);
12094   machine_mode inner_mode = GET_MODE_INNER (mode);
12095   int n_elts = GET_MODE_NUNITS (mode);
12096   int n_var = 0, one_var = -1;
12097   bool all_same = true;
12098   rtx x, mem;
12099   int i;
12100
12101   for (i = 0; i < n_elts; ++i)
12102     {
12103       x = XVECEXP (vals, 0, i);
12104       if (!CONSTANT_P (x))
12105         ++n_var, one_var = i;
12106
12107       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12108         all_same = false;
12109     }
12110
12111   if (n_var == 0)
12112     {
12113       rtx constant = neon_make_constant (vals);
12114       if (constant != NULL_RTX)
12115         {
12116           emit_move_insn (target, constant);
12117           return;
12118         }
12119     }
12120
12121   /* Splat a single non-constant element if we can.  */
12122   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12123     {
12124       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12125       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12126       return;
12127     }
12128
12129   /* One field is non-constant.  Load constant then overwrite varying
12130      field.  This is more efficient than using the stack.  */
12131   if (n_var == 1)
12132     {
12133       rtx copy = copy_rtx (vals);
12134       rtx index = GEN_INT (one_var);
12135
12136       /* Load constant part of vector, substitute neighboring value for
12137          varying element.  */
12138       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12139       neon_expand_vector_init (target, copy);
12140
12141       /* Insert variable.  */
12142       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12143       switch (mode)
12144         {
12145         case V8QImode:
12146           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12147           break;
12148         case V16QImode:
12149           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12150           break;
12151         case V4HImode:
12152           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12153           break;
12154         case V8HImode:
12155           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12156           break;
12157         case V2SImode:
12158           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12159           break;
12160         case V4SImode:
12161           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12162           break;
12163         case V2SFmode:
12164           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12165           break;
12166         case V4SFmode:
12167           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12168           break;
12169         case V2DImode:
12170           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12171           break;
12172         default:
12173           gcc_unreachable ();
12174         }
12175       return;
12176     }
12177
12178   /* Construct the vector in memory one field at a time
12179      and load the whole vector.  */
12180   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12181   for (i = 0; i < n_elts; i++)
12182     emit_move_insn (adjust_address_nv (mem, inner_mode,
12183                                     i * GET_MODE_SIZE (inner_mode)),
12184                     XVECEXP (vals, 0, i));
12185   emit_move_insn (target, mem);
12186 }
12187
12188 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12189    ERR if it doesn't.  EXP indicates the source location, which includes the
12190    inlining history for intrinsics.  */
12191
12192 static void
12193 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12194               const_tree exp, const char *desc)
12195 {
12196   HOST_WIDE_INT lane;
12197
12198   gcc_assert (CONST_INT_P (operand));
12199
12200   lane = INTVAL (operand);
12201
12202   if (lane < low || lane >= high)
12203     {
12204       if (exp)
12205         error ("%K%s %wd out of range %wd - %wd",
12206                exp, desc, lane, low, high - 1);
12207       else
12208         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12209     }
12210 }
12211
12212 /* Bounds-check lanes.  */
12213
12214 void
12215 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12216                   const_tree exp)
12217 {
12218   bounds_check (operand, low, high, exp, "lane");
12219 }
12220
12221 /* Bounds-check constants.  */
12222
12223 void
12224 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12225 {
12226   bounds_check (operand, low, high, NULL_TREE, "constant");
12227 }
12228
12229 HOST_WIDE_INT
12230 neon_element_bits (machine_mode mode)
12231 {
12232   return GET_MODE_UNIT_BITSIZE (mode);
12233 }
12234
12235 \f
12236 /* Predicates for `match_operand' and `match_operator'.  */
12237
12238 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12239    WB is true if full writeback address modes are allowed and is false
12240    if limited writeback address modes (POST_INC and PRE_DEC) are
12241    allowed.  */
12242
12243 int
12244 arm_coproc_mem_operand (rtx op, bool wb)
12245 {
12246   rtx ind;
12247
12248   /* Reject eliminable registers.  */
12249   if (! (reload_in_progress || reload_completed || lra_in_progress)
12250       && (   reg_mentioned_p (frame_pointer_rtx, op)
12251           || reg_mentioned_p (arg_pointer_rtx, op)
12252           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12253           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12254           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12255           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12256     return FALSE;
12257
12258   /* Constants are converted into offsets from labels.  */
12259   if (!MEM_P (op))
12260     return FALSE;
12261
12262   ind = XEXP (op, 0);
12263
12264   if (reload_completed
12265       && (GET_CODE (ind) == LABEL_REF
12266           || (GET_CODE (ind) == CONST
12267               && GET_CODE (XEXP (ind, 0)) == PLUS
12268               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12269               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12270     return TRUE;
12271
12272   /* Match: (mem (reg)).  */
12273   if (REG_P (ind))
12274     return arm_address_register_rtx_p (ind, 0);
12275
12276   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12277      acceptable in any case (subject to verification by
12278      arm_address_register_rtx_p).  We need WB to be true to accept
12279      PRE_INC and POST_DEC.  */
12280   if (GET_CODE (ind) == POST_INC
12281       || GET_CODE (ind) == PRE_DEC
12282       || (wb
12283           && (GET_CODE (ind) == PRE_INC
12284               || GET_CODE (ind) == POST_DEC)))
12285     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12286
12287   if (wb
12288       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12289       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12290       && GET_CODE (XEXP (ind, 1)) == PLUS
12291       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12292     ind = XEXP (ind, 1);
12293
12294   /* Match:
12295      (plus (reg)
12296            (const)).  */
12297   if (GET_CODE (ind) == PLUS
12298       && REG_P (XEXP (ind, 0))
12299       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12300       && CONST_INT_P (XEXP (ind, 1))
12301       && INTVAL (XEXP (ind, 1)) > -1024
12302       && INTVAL (XEXP (ind, 1)) <  1024
12303       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12304     return TRUE;
12305
12306   return FALSE;
12307 }
12308
12309 /* Return TRUE if OP is a memory operand which we can load or store a vector
12310    to/from. TYPE is one of the following values:
12311     0 - Vector load/stor (vldr)
12312     1 - Core registers (ldm)
12313     2 - Element/structure loads (vld1)
12314  */
12315 int
12316 neon_vector_mem_operand (rtx op, int type, bool strict)
12317 {
12318   rtx ind;
12319
12320   /* Reject eliminable registers.  */
12321   if (strict && ! (reload_in_progress || reload_completed)
12322       && (reg_mentioned_p (frame_pointer_rtx, op)
12323           || reg_mentioned_p (arg_pointer_rtx, op)
12324           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12325           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12326           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12327           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12328     return FALSE;
12329
12330   /* Constants are converted into offsets from labels.  */
12331   if (!MEM_P (op))
12332     return FALSE;
12333
12334   ind = XEXP (op, 0);
12335
12336   if (reload_completed
12337       && (GET_CODE (ind) == LABEL_REF
12338           || (GET_CODE (ind) == CONST
12339               && GET_CODE (XEXP (ind, 0)) == PLUS
12340               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12341               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12342     return TRUE;
12343
12344   /* Match: (mem (reg)).  */
12345   if (REG_P (ind))
12346     return arm_address_register_rtx_p (ind, 0);
12347
12348   /* Allow post-increment with Neon registers.  */
12349   if ((type != 1 && GET_CODE (ind) == POST_INC)
12350       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12351     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12352
12353   /* Allow post-increment by register for VLDn */
12354   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12355       && GET_CODE (XEXP (ind, 1)) == PLUS
12356       && REG_P (XEXP (XEXP (ind, 1), 1)))
12357      return true;
12358
12359   /* Match:
12360      (plus (reg)
12361           (const)).  */
12362   if (type == 0
12363       && GET_CODE (ind) == PLUS
12364       && REG_P (XEXP (ind, 0))
12365       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12366       && CONST_INT_P (XEXP (ind, 1))
12367       && INTVAL (XEXP (ind, 1)) > -1024
12368       /* For quad modes, we restrict the constant offset to be slightly less
12369          than what the instruction format permits.  We have no such constraint
12370          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12371       && (INTVAL (XEXP (ind, 1))
12372           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12373       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12374     return TRUE;
12375
12376   return FALSE;
12377 }
12378
12379 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12380    type.  */
12381 int
12382 neon_struct_mem_operand (rtx op)
12383 {
12384   rtx ind;
12385
12386   /* Reject eliminable registers.  */
12387   if (! (reload_in_progress || reload_completed)
12388       && (   reg_mentioned_p (frame_pointer_rtx, op)
12389           || reg_mentioned_p (arg_pointer_rtx, op)
12390           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12391           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12392           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12393           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12394     return FALSE;
12395
12396   /* Constants are converted into offsets from labels.  */
12397   if (!MEM_P (op))
12398     return FALSE;
12399
12400   ind = XEXP (op, 0);
12401
12402   if (reload_completed
12403       && (GET_CODE (ind) == LABEL_REF
12404           || (GET_CODE (ind) == CONST
12405               && GET_CODE (XEXP (ind, 0)) == PLUS
12406               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12407               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12408     return TRUE;
12409
12410   /* Match: (mem (reg)).  */
12411   if (REG_P (ind))
12412     return arm_address_register_rtx_p (ind, 0);
12413
12414   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12415   if (GET_CODE (ind) == POST_INC
12416       || GET_CODE (ind) == PRE_DEC)
12417     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12418
12419   return FALSE;
12420 }
12421
12422 /* Return true if X is a register that will be eliminated later on.  */
12423 int
12424 arm_eliminable_register (rtx x)
12425 {
12426   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12427                        || REGNO (x) == ARG_POINTER_REGNUM
12428                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12429                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12430 }
12431
12432 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12433    coprocessor registers.  Otherwise return NO_REGS.  */
12434
12435 enum reg_class
12436 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12437 {
12438   if (mode == HFmode)
12439     {
12440       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12441         return GENERAL_REGS;
12442       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12443         return NO_REGS;
12444       return GENERAL_REGS;
12445     }
12446
12447   /* The neon move patterns handle all legitimate vector and struct
12448      addresses.  */
12449   if (TARGET_NEON
12450       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12451       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12452           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12453           || VALID_NEON_STRUCT_MODE (mode)))
12454     return NO_REGS;
12455
12456   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12457     return NO_REGS;
12458
12459   return GENERAL_REGS;
12460 }
12461
12462 /* Values which must be returned in the most-significant end of the return
12463    register.  */
12464
12465 static bool
12466 arm_return_in_msb (const_tree valtype)
12467 {
12468   return (TARGET_AAPCS_BASED
12469           && BYTES_BIG_ENDIAN
12470           && (AGGREGATE_TYPE_P (valtype)
12471               || TREE_CODE (valtype) == COMPLEX_TYPE
12472               || FIXED_POINT_TYPE_P (valtype)));
12473 }
12474
12475 /* Return TRUE if X references a SYMBOL_REF.  */
12476 int
12477 symbol_mentioned_p (rtx x)
12478 {
12479   const char * fmt;
12480   int i;
12481
12482   if (GET_CODE (x) == SYMBOL_REF)
12483     return 1;
12484
12485   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12486      are constant offsets, not symbols.  */
12487   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12488     return 0;
12489
12490   fmt = GET_RTX_FORMAT (GET_CODE (x));
12491
12492   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12493     {
12494       if (fmt[i] == 'E')
12495         {
12496           int j;
12497
12498           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12499             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12500               return 1;
12501         }
12502       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12503         return 1;
12504     }
12505
12506   return 0;
12507 }
12508
12509 /* Return TRUE if X references a LABEL_REF.  */
12510 int
12511 label_mentioned_p (rtx x)
12512 {
12513   const char * fmt;
12514   int i;
12515
12516   if (GET_CODE (x) == LABEL_REF)
12517     return 1;
12518
12519   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12520      instruction, but they are constant offsets, not symbols.  */
12521   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12522     return 0;
12523
12524   fmt = GET_RTX_FORMAT (GET_CODE (x));
12525   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12526     {
12527       if (fmt[i] == 'E')
12528         {
12529           int j;
12530
12531           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12532             if (label_mentioned_p (XVECEXP (x, i, j)))
12533               return 1;
12534         }
12535       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12536         return 1;
12537     }
12538
12539   return 0;
12540 }
12541
12542 int
12543 tls_mentioned_p (rtx x)
12544 {
12545   switch (GET_CODE (x))
12546     {
12547     case CONST:
12548       return tls_mentioned_p (XEXP (x, 0));
12549
12550     case UNSPEC:
12551       if (XINT (x, 1) == UNSPEC_TLS)
12552         return 1;
12553
12554     /* Fall through.  */
12555     default:
12556       return 0;
12557     }
12558 }
12559
12560 /* Must not copy any rtx that uses a pc-relative address.
12561    Also, disallow copying of load-exclusive instructions that
12562    may appear after splitting of compare-and-swap-style operations
12563    so as to prevent those loops from being transformed away from their
12564    canonical forms (see PR 69904).  */
12565
12566 static bool
12567 arm_cannot_copy_insn_p (rtx_insn *insn)
12568 {
12569   /* The tls call insn cannot be copied, as it is paired with a data
12570      word.  */
12571   if (recog_memoized (insn) == CODE_FOR_tlscall)
12572     return true;
12573
12574   subrtx_iterator::array_type array;
12575   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12576     {
12577       const_rtx x = *iter;
12578       if (GET_CODE (x) == UNSPEC
12579           && (XINT (x, 1) == UNSPEC_PIC_BASE
12580               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12581         return true;
12582     }
12583
12584   rtx set = single_set (insn);
12585   if (set)
12586     {
12587       rtx src = SET_SRC (set);
12588       if (GET_CODE (src) == ZERO_EXTEND)
12589         src = XEXP (src, 0);
12590
12591       /* Catch the load-exclusive and load-acquire operations.  */
12592       if (GET_CODE (src) == UNSPEC_VOLATILE
12593           && (XINT (src, 1) == VUNSPEC_LL
12594               || XINT (src, 1) == VUNSPEC_LAX))
12595         return true;
12596     }
12597   return false;
12598 }
12599
12600 enum rtx_code
12601 minmax_code (rtx x)
12602 {
12603   enum rtx_code code = GET_CODE (x);
12604
12605   switch (code)
12606     {
12607     case SMAX:
12608       return GE;
12609     case SMIN:
12610       return LE;
12611     case UMIN:
12612       return LEU;
12613     case UMAX:
12614       return GEU;
12615     default:
12616       gcc_unreachable ();
12617     }
12618 }
12619
12620 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12621
12622 bool
12623 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12624                         int *mask, bool *signed_sat)
12625 {
12626   /* The high bound must be a power of two minus one.  */
12627   int log = exact_log2 (INTVAL (hi_bound) + 1);
12628   if (log == -1)
12629     return false;
12630
12631   /* The low bound is either zero (for usat) or one less than the
12632      negation of the high bound (for ssat).  */
12633   if (INTVAL (lo_bound) == 0)
12634     {
12635       if (mask)
12636         *mask = log;
12637       if (signed_sat)
12638         *signed_sat = false;
12639
12640       return true;
12641     }
12642
12643   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12644     {
12645       if (mask)
12646         *mask = log + 1;
12647       if (signed_sat)
12648         *signed_sat = true;
12649
12650       return true;
12651     }
12652
12653   return false;
12654 }
12655
12656 /* Return 1 if memory locations are adjacent.  */
12657 int
12658 adjacent_mem_locations (rtx a, rtx b)
12659 {
12660   /* We don't guarantee to preserve the order of these memory refs.  */
12661   if (volatile_refs_p (a) || volatile_refs_p (b))
12662     return 0;
12663
12664   if ((REG_P (XEXP (a, 0))
12665        || (GET_CODE (XEXP (a, 0)) == PLUS
12666            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12667       && (REG_P (XEXP (b, 0))
12668           || (GET_CODE (XEXP (b, 0)) == PLUS
12669               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12670     {
12671       HOST_WIDE_INT val0 = 0, val1 = 0;
12672       rtx reg0, reg1;
12673       int val_diff;
12674
12675       if (GET_CODE (XEXP (a, 0)) == PLUS)
12676         {
12677           reg0 = XEXP (XEXP (a, 0), 0);
12678           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12679         }
12680       else
12681         reg0 = XEXP (a, 0);
12682
12683       if (GET_CODE (XEXP (b, 0)) == PLUS)
12684         {
12685           reg1 = XEXP (XEXP (b, 0), 0);
12686           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12687         }
12688       else
12689         reg1 = XEXP (b, 0);
12690
12691       /* Don't accept any offset that will require multiple
12692          instructions to handle, since this would cause the
12693          arith_adjacentmem pattern to output an overlong sequence.  */
12694       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12695         return 0;
12696
12697       /* Don't allow an eliminable register: register elimination can make
12698          the offset too large.  */
12699       if (arm_eliminable_register (reg0))
12700         return 0;
12701
12702       val_diff = val1 - val0;
12703
12704       if (arm_ld_sched)
12705         {
12706           /* If the target has load delay slots, then there's no benefit
12707              to using an ldm instruction unless the offset is zero and
12708              we are optimizing for size.  */
12709           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12710                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12711                   && (val_diff == 4 || val_diff == -4));
12712         }
12713
12714       return ((REGNO (reg0) == REGNO (reg1))
12715               && (val_diff == 4 || val_diff == -4));
12716     }
12717
12718   return 0;
12719 }
12720
12721 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12722    for load operations, false for store operations.  CONSECUTIVE is true
12723    if the register numbers in the operation must be consecutive in the register
12724    bank. RETURN_PC is true if value is to be loaded in PC.
12725    The pattern we are trying to match for load is:
12726      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12727       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12728        :
12729        :
12730       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12731      ]
12732      where
12733      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12734      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12735      3.  If consecutive is TRUE, then for kth register being loaded,
12736          REGNO (R_dk) = REGNO (R_d0) + k.
12737    The pattern for store is similar.  */
12738 bool
12739 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12740                      bool consecutive, bool return_pc)
12741 {
12742   HOST_WIDE_INT count = XVECLEN (op, 0);
12743   rtx reg, mem, addr;
12744   unsigned regno;
12745   unsigned first_regno;
12746   HOST_WIDE_INT i = 1, base = 0, offset = 0;
12747   rtx elt;
12748   bool addr_reg_in_reglist = false;
12749   bool update = false;
12750   int reg_increment;
12751   int offset_adj;
12752   int regs_per_val;
12753
12754   /* If not in SImode, then registers must be consecutive
12755      (e.g., VLDM instructions for DFmode).  */
12756   gcc_assert ((mode == SImode) || consecutive);
12757   /* Setting return_pc for stores is illegal.  */
12758   gcc_assert (!return_pc || load);
12759
12760   /* Set up the increments and the regs per val based on the mode.  */
12761   reg_increment = GET_MODE_SIZE (mode);
12762   regs_per_val = reg_increment / 4;
12763   offset_adj = return_pc ? 1 : 0;
12764
12765   if (count <= 1
12766       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12767       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12768     return false;
12769
12770   /* Check if this is a write-back.  */
12771   elt = XVECEXP (op, 0, offset_adj);
12772   if (GET_CODE (SET_SRC (elt)) == PLUS)
12773     {
12774       i++;
12775       base = 1;
12776       update = true;
12777
12778       /* The offset adjustment must be the number of registers being
12779          popped times the size of a single register.  */
12780       if (!REG_P (SET_DEST (elt))
12781           || !REG_P (XEXP (SET_SRC (elt), 0))
12782           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12783           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12784           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12785              ((count - 1 - offset_adj) * reg_increment))
12786         return false;
12787     }
12788
12789   i = i + offset_adj;
12790   base = base + offset_adj;
12791   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12792      success depends on the type: VLDM can do just one reg,
12793      LDM must do at least two.  */
12794   if ((count <= i) && (mode == SImode))
12795       return false;
12796
12797   elt = XVECEXP (op, 0, i - 1);
12798   if (GET_CODE (elt) != SET)
12799     return false;
12800
12801   if (load)
12802     {
12803       reg = SET_DEST (elt);
12804       mem = SET_SRC (elt);
12805     }
12806   else
12807     {
12808       reg = SET_SRC (elt);
12809       mem = SET_DEST (elt);
12810     }
12811
12812   if (!REG_P (reg) || !MEM_P (mem))
12813     return false;
12814
12815   regno = REGNO (reg);
12816   first_regno = regno;
12817   addr = XEXP (mem, 0);
12818   if (GET_CODE (addr) == PLUS)
12819     {
12820       if (!CONST_INT_P (XEXP (addr, 1)))
12821         return false;
12822
12823       offset = INTVAL (XEXP (addr, 1));
12824       addr = XEXP (addr, 0);
12825     }
12826
12827   if (!REG_P (addr))
12828     return false;
12829
12830   /* Don't allow SP to be loaded unless it is also the base register. It
12831      guarantees that SP is reset correctly when an LDM instruction
12832      is interrupted. Otherwise, we might end up with a corrupt stack.  */
12833   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12834     return false;
12835
12836   for (; i < count; i++)
12837     {
12838       elt = XVECEXP (op, 0, i);
12839       if (GET_CODE (elt) != SET)
12840         return false;
12841
12842       if (load)
12843         {
12844           reg = SET_DEST (elt);
12845           mem = SET_SRC (elt);
12846         }
12847       else
12848         {
12849           reg = SET_SRC (elt);
12850           mem = SET_DEST (elt);
12851         }
12852
12853       if (!REG_P (reg)
12854           || GET_MODE (reg) != mode
12855           || REGNO (reg) <= regno
12856           || (consecutive
12857               && (REGNO (reg) !=
12858                   (unsigned int) (first_regno + regs_per_val * (i - base))))
12859           /* Don't allow SP to be loaded unless it is also the base register. It
12860              guarantees that SP is reset correctly when an LDM instruction
12861              is interrupted. Otherwise, we might end up with a corrupt stack.  */
12862           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12863           || !MEM_P (mem)
12864           || GET_MODE (mem) != mode
12865           || ((GET_CODE (XEXP (mem, 0)) != PLUS
12866                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12867                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12868                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12869                    offset + (i - base) * reg_increment))
12870               && (!REG_P (XEXP (mem, 0))
12871                   || offset + (i - base) * reg_increment != 0)))
12872         return false;
12873
12874       regno = REGNO (reg);
12875       if (regno == REGNO (addr))
12876         addr_reg_in_reglist = true;
12877     }
12878
12879   if (load)
12880     {
12881       if (update && addr_reg_in_reglist)
12882         return false;
12883
12884       /* For Thumb-1, address register is always modified - either by write-back
12885          or by explicit load.  If the pattern does not describe an update,
12886          then the address register must be in the list of loaded registers.  */
12887       if (TARGET_THUMB1)
12888         return update || addr_reg_in_reglist;
12889     }
12890
12891   return true;
12892 }
12893
12894 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12895    or stores (depending on IS_STORE) into a load-multiple or store-multiple
12896    instruction.  ADD_OFFSET is nonzero if the base address register needs
12897    to be modified with an add instruction before we can use it.  */
12898
12899 static bool
12900 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
12901                                  int nops, HOST_WIDE_INT add_offset)
12902  {
12903   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12904      if the offset isn't small enough.  The reason 2 ldrs are faster
12905      is because these ARMs are able to do more than one cache access
12906      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
12907      whilst the ARM8 has a double bandwidth cache.  This means that
12908      these cores can do both an instruction fetch and a data fetch in
12909      a single cycle, so the trick of calculating the address into a
12910      scratch register (one of the result regs) and then doing a load
12911      multiple actually becomes slower (and no smaller in code size).
12912      That is the transformation
12913
12914         ldr     rd1, [rbase + offset]
12915         ldr     rd2, [rbase + offset + 4]
12916
12917      to
12918
12919         add     rd1, rbase, offset
12920         ldmia   rd1, {rd1, rd2}
12921
12922      produces worse code -- '3 cycles + any stalls on rd2' instead of
12923      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
12924      access per cycle, the first sequence could never complete in less
12925      than 6 cycles, whereas the ldm sequence would only take 5 and
12926      would make better use of sequential accesses if not hitting the
12927      cache.
12928
12929      We cheat here and test 'arm_ld_sched' which we currently know to
12930      only be true for the ARM8, ARM9 and StrongARM.  If this ever
12931      changes, then the test below needs to be reworked.  */
12932   if (nops == 2 && arm_ld_sched && add_offset != 0)
12933     return false;
12934
12935   /* XScale has load-store double instructions, but they have stricter
12936      alignment requirements than load-store multiple, so we cannot
12937      use them.
12938
12939      For XScale ldm requires 2 + NREGS cycles to complete and blocks
12940      the pipeline until completion.
12941
12942         NREGS           CYCLES
12943           1               3
12944           2               4
12945           3               5
12946           4               6
12947
12948      An ldr instruction takes 1-3 cycles, but does not block the
12949      pipeline.
12950
12951         NREGS           CYCLES
12952           1              1-3
12953           2              2-6
12954           3              3-9
12955           4              4-12
12956
12957      Best case ldr will always win.  However, the more ldr instructions
12958      we issue, the less likely we are to be able to schedule them well.
12959      Using ldr instructions also increases code size.
12960
12961      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12962      for counts of 3 or 4 regs.  */
12963   if (nops <= 2 && arm_tune_xscale && !optimize_size)
12964     return false;
12965   return true;
12966 }
12967
12968 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12969    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12970    an array ORDER which describes the sequence to use when accessing the
12971    offsets that produces an ascending order.  In this sequence, each
12972    offset must be larger by exactly 4 than the previous one.  ORDER[0]
12973    must have been filled in with the lowest offset by the caller.
12974    If UNSORTED_REGS is nonnull, it is an array of register numbers that
12975    we use to verify that ORDER produces an ascending order of registers.
12976    Return true if it was possible to construct such an order, false if
12977    not.  */
12978
12979 static bool
12980 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
12981                       int *unsorted_regs)
12982 {
12983   int i;
12984   for (i = 1; i < nops; i++)
12985     {
12986       int j;
12987
12988       order[i] = order[i - 1];
12989       for (j = 0; j < nops; j++)
12990         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
12991           {
12992             /* We must find exactly one offset that is higher than the
12993                previous one by 4.  */
12994             if (order[i] != order[i - 1])
12995               return false;
12996             order[i] = j;
12997           }
12998       if (order[i] == order[i - 1])
12999         return false;
13000       /* The register numbers must be ascending.  */
13001       if (unsorted_regs != NULL
13002           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13003         return false;
13004     }
13005   return true;
13006 }
13007
13008 /* Used to determine in a peephole whether a sequence of load
13009    instructions can be changed into a load-multiple instruction.
13010    NOPS is the number of separate load instructions we are examining.  The
13011    first NOPS entries in OPERANDS are the destination registers, the
13012    next NOPS entries are memory operands.  If this function is
13013    successful, *BASE is set to the common base register of the memory
13014    accesses; *LOAD_OFFSET is set to the first memory location's offset
13015    from that base register.
13016    REGS is an array filled in with the destination register numbers.
13017    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13018    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13019    the sequence of registers in REGS matches the loads from ascending memory
13020    locations, and the function verifies that the register numbers are
13021    themselves ascending.  If CHECK_REGS is false, the register numbers
13022    are stored in the order they are found in the operands.  */
13023 static int
13024 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13025                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13026 {
13027   int unsorted_regs[MAX_LDM_STM_OPS];
13028   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13029   int order[MAX_LDM_STM_OPS];
13030   rtx base_reg_rtx = NULL;
13031   int base_reg = -1;
13032   int i, ldm_case;
13033
13034   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13035      easily extended if required.  */
13036   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13037
13038   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13039
13040   /* Loop over the operands and check that the memory references are
13041      suitable (i.e. immediate offsets from the same base register).  At
13042      the same time, extract the target register, and the memory
13043      offsets.  */
13044   for (i = 0; i < nops; i++)
13045     {
13046       rtx reg;
13047       rtx offset;
13048
13049       /* Convert a subreg of a mem into the mem itself.  */
13050       if (GET_CODE (operands[nops + i]) == SUBREG)
13051         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13052
13053       gcc_assert (MEM_P (operands[nops + i]));
13054
13055       /* Don't reorder volatile memory references; it doesn't seem worth
13056          looking for the case where the order is ok anyway.  */
13057       if (MEM_VOLATILE_P (operands[nops + i]))
13058         return 0;
13059
13060       offset = const0_rtx;
13061
13062       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13063            || (GET_CODE (reg) == SUBREG
13064                && REG_P (reg = SUBREG_REG (reg))))
13065           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13066               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13067                   || (GET_CODE (reg) == SUBREG
13068                       && REG_P (reg = SUBREG_REG (reg))))
13069               && (CONST_INT_P (offset
13070                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13071         {
13072           if (i == 0)
13073             {
13074               base_reg = REGNO (reg);
13075               base_reg_rtx = reg;
13076               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13077                 return 0;
13078             }
13079           else if (base_reg != (int) REGNO (reg))
13080             /* Not addressed from the same base register.  */
13081             return 0;
13082
13083           unsorted_regs[i] = (REG_P (operands[i])
13084                               ? REGNO (operands[i])
13085                               : REGNO (SUBREG_REG (operands[i])));
13086
13087           /* If it isn't an integer register, or if it overwrites the
13088              base register but isn't the last insn in the list, then
13089              we can't do this.  */
13090           if (unsorted_regs[i] < 0
13091               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13092               || unsorted_regs[i] > 14
13093               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13094             return 0;
13095
13096           /* Don't allow SP to be loaded unless it is also the base
13097              register.  It guarantees that SP is reset correctly when
13098              an LDM instruction is interrupted.  Otherwise, we might
13099              end up with a corrupt stack.  */
13100           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13101             return 0;
13102
13103           unsorted_offsets[i] = INTVAL (offset);
13104           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13105             order[0] = i;
13106         }
13107       else
13108         /* Not a suitable memory address.  */
13109         return 0;
13110     }
13111
13112   /* All the useful information has now been extracted from the
13113      operands into unsorted_regs and unsorted_offsets; additionally,
13114      order[0] has been set to the lowest offset in the list.  Sort
13115      the offsets into order, verifying that they are adjacent, and
13116      check that the register numbers are ascending.  */
13117   if (!compute_offset_order (nops, unsorted_offsets, order,
13118                              check_regs ? unsorted_regs : NULL))
13119     return 0;
13120
13121   if (saved_order)
13122     memcpy (saved_order, order, sizeof order);
13123
13124   if (base)
13125     {
13126       *base = base_reg;
13127
13128       for (i = 0; i < nops; i++)
13129         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13130
13131       *load_offset = unsorted_offsets[order[0]];
13132     }
13133
13134   if (TARGET_THUMB1
13135       && !peep2_reg_dead_p (nops, base_reg_rtx))
13136     return 0;
13137
13138   if (unsorted_offsets[order[0]] == 0)
13139     ldm_case = 1; /* ldmia */
13140   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13141     ldm_case = 2; /* ldmib */
13142   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13143     ldm_case = 3; /* ldmda */
13144   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13145     ldm_case = 4; /* ldmdb */
13146   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13147            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13148     ldm_case = 5;
13149   else
13150     return 0;
13151
13152   if (!multiple_operation_profitable_p (false, nops,
13153                                         ldm_case == 5
13154                                         ? unsorted_offsets[order[0]] : 0))
13155     return 0;
13156
13157   return ldm_case;
13158 }
13159
13160 /* Used to determine in a peephole whether a sequence of store instructions can
13161    be changed into a store-multiple instruction.
13162    NOPS is the number of separate store instructions we are examining.
13163    NOPS_TOTAL is the total number of instructions recognized by the peephole
13164    pattern.
13165    The first NOPS entries in OPERANDS are the source registers, the next
13166    NOPS entries are memory operands.  If this function is successful, *BASE is
13167    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13168    to the first memory location's offset from that base register.  REGS is an
13169    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13170    likewise filled with the corresponding rtx's.
13171    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13172    numbers to an ascending order of stores.
13173    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13174    from ascending memory locations, and the function verifies that the register
13175    numbers are themselves ascending.  If CHECK_REGS is false, the register
13176    numbers are stored in the order they are found in the operands.  */
13177 static int
13178 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13179                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13180                          HOST_WIDE_INT *load_offset, bool check_regs)
13181 {
13182   int unsorted_regs[MAX_LDM_STM_OPS];
13183   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13184   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13185   int order[MAX_LDM_STM_OPS];
13186   int base_reg = -1;
13187   rtx base_reg_rtx = NULL;
13188   int i, stm_case;
13189
13190   /* Write back of base register is currently only supported for Thumb 1.  */
13191   int base_writeback = TARGET_THUMB1;
13192
13193   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13194      easily extended if required.  */
13195   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13196
13197   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13198
13199   /* Loop over the operands and check that the memory references are
13200      suitable (i.e. immediate offsets from the same base register).  At
13201      the same time, extract the target register, and the memory
13202      offsets.  */
13203   for (i = 0; i < nops; i++)
13204     {
13205       rtx reg;
13206       rtx offset;
13207
13208       /* Convert a subreg of a mem into the mem itself.  */
13209       if (GET_CODE (operands[nops + i]) == SUBREG)
13210         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13211
13212       gcc_assert (MEM_P (operands[nops + i]));
13213
13214       /* Don't reorder volatile memory references; it doesn't seem worth
13215          looking for the case where the order is ok anyway.  */
13216       if (MEM_VOLATILE_P (operands[nops + i]))
13217         return 0;
13218
13219       offset = const0_rtx;
13220
13221       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13222            || (GET_CODE (reg) == SUBREG
13223                && REG_P (reg = SUBREG_REG (reg))))
13224           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13225               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13226                   || (GET_CODE (reg) == SUBREG
13227                       && REG_P (reg = SUBREG_REG (reg))))
13228               && (CONST_INT_P (offset
13229                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13230         {
13231           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13232                                   ? operands[i] : SUBREG_REG (operands[i]));
13233           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13234
13235           if (i == 0)
13236             {
13237               base_reg = REGNO (reg);
13238               base_reg_rtx = reg;
13239               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13240                 return 0;
13241             }
13242           else if (base_reg != (int) REGNO (reg))
13243             /* Not addressed from the same base register.  */
13244             return 0;
13245
13246           /* If it isn't an integer register, then we can't do this.  */
13247           if (unsorted_regs[i] < 0
13248               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13249               /* The effects are unpredictable if the base register is
13250                  both updated and stored.  */
13251               || (base_writeback && unsorted_regs[i] == base_reg)
13252               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13253               || unsorted_regs[i] > 14)
13254             return 0;
13255
13256           unsorted_offsets[i] = INTVAL (offset);
13257           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13258             order[0] = i;
13259         }
13260       else
13261         /* Not a suitable memory address.  */
13262         return 0;
13263     }
13264
13265   /* All the useful information has now been extracted from the
13266      operands into unsorted_regs and unsorted_offsets; additionally,
13267      order[0] has been set to the lowest offset in the list.  Sort
13268      the offsets into order, verifying that they are adjacent, and
13269      check that the register numbers are ascending.  */
13270   if (!compute_offset_order (nops, unsorted_offsets, order,
13271                              check_regs ? unsorted_regs : NULL))
13272     return 0;
13273
13274   if (saved_order)
13275     memcpy (saved_order, order, sizeof order);
13276
13277   if (base)
13278     {
13279       *base = base_reg;
13280
13281       for (i = 0; i < nops; i++)
13282         {
13283           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13284           if (reg_rtxs)
13285             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13286         }
13287
13288       *load_offset = unsorted_offsets[order[0]];
13289     }
13290
13291   if (TARGET_THUMB1
13292       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13293     return 0;
13294
13295   if (unsorted_offsets[order[0]] == 0)
13296     stm_case = 1; /* stmia */
13297   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13298     stm_case = 2; /* stmib */
13299   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13300     stm_case = 3; /* stmda */
13301   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13302     stm_case = 4; /* stmdb */
13303   else
13304     return 0;
13305
13306   if (!multiple_operation_profitable_p (false, nops, 0))
13307     return 0;
13308
13309   return stm_case;
13310 }
13311 \f
13312 /* Routines for use in generating RTL.  */
13313
13314 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13315    the instruction; REGS and MEMS are arrays containing the operands.
13316    BASEREG is the base register to be used in addressing the memory operands.
13317    WBACK_OFFSET is nonzero if the instruction should update the base
13318    register.  */
13319
13320 static rtx
13321 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13322                          HOST_WIDE_INT wback_offset)
13323 {
13324   int i = 0, j;
13325   rtx result;
13326
13327   if (!multiple_operation_profitable_p (false, count, 0))
13328     {
13329       rtx seq;
13330
13331       start_sequence ();
13332
13333       for (i = 0; i < count; i++)
13334         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13335
13336       if (wback_offset != 0)
13337         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13338
13339       seq = get_insns ();
13340       end_sequence ();
13341
13342       return seq;
13343     }
13344
13345   result = gen_rtx_PARALLEL (VOIDmode,
13346                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13347   if (wback_offset != 0)
13348     {
13349       XVECEXP (result, 0, 0)
13350         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13351       i = 1;
13352       count++;
13353     }
13354
13355   for (j = 0; i < count; i++, j++)
13356     XVECEXP (result, 0, i)
13357       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13358
13359   return result;
13360 }
13361
13362 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13363    the instruction; REGS and MEMS are arrays containing the operands.
13364    BASEREG is the base register to be used in addressing the memory operands.
13365    WBACK_OFFSET is nonzero if the instruction should update the base
13366    register.  */
13367
13368 static rtx
13369 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13370                           HOST_WIDE_INT wback_offset)
13371 {
13372   int i = 0, j;
13373   rtx result;
13374
13375   if (GET_CODE (basereg) == PLUS)
13376     basereg = XEXP (basereg, 0);
13377
13378   if (!multiple_operation_profitable_p (false, count, 0))
13379     {
13380       rtx seq;
13381
13382       start_sequence ();
13383
13384       for (i = 0; i < count; i++)
13385         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13386
13387       if (wback_offset != 0)
13388         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13389
13390       seq = get_insns ();
13391       end_sequence ();
13392
13393       return seq;
13394     }
13395
13396   result = gen_rtx_PARALLEL (VOIDmode,
13397                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13398   if (wback_offset != 0)
13399     {
13400       XVECEXP (result, 0, 0)
13401         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13402       i = 1;
13403       count++;
13404     }
13405
13406   for (j = 0; i < count; i++, j++)
13407     XVECEXP (result, 0, i)
13408       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13409
13410   return result;
13411 }
13412
13413 /* Generate either a load-multiple or a store-multiple instruction.  This
13414    function can be used in situations where we can start with a single MEM
13415    rtx and adjust its address upwards.
13416    COUNT is the number of operations in the instruction, not counting a
13417    possible update of the base register.  REGS is an array containing the
13418    register operands.
13419    BASEREG is the base register to be used in addressing the memory operands,
13420    which are constructed from BASEMEM.
13421    WRITE_BACK specifies whether the generated instruction should include an
13422    update of the base register.
13423    OFFSETP is used to pass an offset to and from this function; this offset
13424    is not used when constructing the address (instead BASEMEM should have an
13425    appropriate offset in its address), it is used only for setting
13426    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13427
13428 static rtx
13429 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13430                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13431 {
13432   rtx mems[MAX_LDM_STM_OPS];
13433   HOST_WIDE_INT offset = *offsetp;
13434   int i;
13435
13436   gcc_assert (count <= MAX_LDM_STM_OPS);
13437
13438   if (GET_CODE (basereg) == PLUS)
13439     basereg = XEXP (basereg, 0);
13440
13441   for (i = 0; i < count; i++)
13442     {
13443       rtx addr = plus_constant (Pmode, basereg, i * 4);
13444       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13445       offset += 4;
13446     }
13447
13448   if (write_back)
13449     *offsetp = offset;
13450
13451   if (is_load)
13452     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13453                                     write_back ? 4 * count : 0);
13454   else
13455     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13456                                      write_back ? 4 * count : 0);
13457 }
13458
13459 rtx
13460 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13461                        rtx basemem, HOST_WIDE_INT *offsetp)
13462 {
13463   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13464                               offsetp);
13465 }
13466
13467 rtx
13468 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13469                         rtx basemem, HOST_WIDE_INT *offsetp)
13470 {
13471   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13472                               offsetp);
13473 }
13474
13475 /* Called from a peephole2 expander to turn a sequence of loads into an
13476    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13477    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13478    is true if we can reorder the registers because they are used commutatively
13479    subsequently.
13480    Returns true iff we could generate a new instruction.  */
13481
13482 bool
13483 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13484 {
13485   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13486   rtx mems[MAX_LDM_STM_OPS];
13487   int i, j, base_reg;
13488   rtx base_reg_rtx;
13489   HOST_WIDE_INT offset;
13490   int write_back = FALSE;
13491   int ldm_case;
13492   rtx addr;
13493
13494   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13495                                      &base_reg, &offset, !sort_regs);
13496
13497   if (ldm_case == 0)
13498     return false;
13499
13500   if (sort_regs)
13501     for (i = 0; i < nops - 1; i++)
13502       for (j = i + 1; j < nops; j++)
13503         if (regs[i] > regs[j])
13504           {
13505             int t = regs[i];
13506             regs[i] = regs[j];
13507             regs[j] = t;
13508           }
13509   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13510
13511   if (TARGET_THUMB1)
13512     {
13513       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13514       gcc_assert (ldm_case == 1 || ldm_case == 5);
13515       write_back = TRUE;
13516     }
13517
13518   if (ldm_case == 5)
13519     {
13520       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13521       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13522       offset = 0;
13523       if (!TARGET_THUMB1)
13524         {
13525           base_reg = regs[0];
13526           base_reg_rtx = newbase;
13527         }
13528     }
13529
13530   for (i = 0; i < nops; i++)
13531     {
13532       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13533       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13534                                               SImode, addr, 0);
13535     }
13536   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13537                                       write_back ? offset + i * 4 : 0));
13538   return true;
13539 }
13540
13541 /* Called from a peephole2 expander to turn a sequence of stores into an
13542    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13543    NOPS indicates how many separate stores we are trying to combine.
13544    Returns true iff we could generate a new instruction.  */
13545
13546 bool
13547 gen_stm_seq (rtx *operands, int nops)
13548 {
13549   int i;
13550   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13551   rtx mems[MAX_LDM_STM_OPS];
13552   int base_reg;
13553   rtx base_reg_rtx;
13554   HOST_WIDE_INT offset;
13555   int write_back = FALSE;
13556   int stm_case;
13557   rtx addr;
13558   bool base_reg_dies;
13559
13560   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13561                                       mem_order, &base_reg, &offset, true);
13562
13563   if (stm_case == 0)
13564     return false;
13565
13566   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13567
13568   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13569   if (TARGET_THUMB1)
13570     {
13571       gcc_assert (base_reg_dies);
13572       write_back = TRUE;
13573     }
13574
13575   if (stm_case == 5)
13576     {
13577       gcc_assert (base_reg_dies);
13578       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13579       offset = 0;
13580     }
13581
13582   addr = plus_constant (Pmode, base_reg_rtx, offset);
13583
13584   for (i = 0; i < nops; i++)
13585     {
13586       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13587       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13588                                               SImode, addr, 0);
13589     }
13590   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13591                                        write_back ? offset + i * 4 : 0));
13592   return true;
13593 }
13594
13595 /* Called from a peephole2 expander to turn a sequence of stores that are
13596    preceded by constant loads into an STM instruction.  OPERANDS are the
13597    operands found by the peephole matcher; NOPS indicates how many
13598    separate stores we are trying to combine; there are 2 * NOPS
13599    instructions in the peephole.
13600    Returns true iff we could generate a new instruction.  */
13601
13602 bool
13603 gen_const_stm_seq (rtx *operands, int nops)
13604 {
13605   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13606   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13607   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13608   rtx mems[MAX_LDM_STM_OPS];
13609   int base_reg;
13610   rtx base_reg_rtx;
13611   HOST_WIDE_INT offset;
13612   int write_back = FALSE;
13613   int stm_case;
13614   rtx addr;
13615   bool base_reg_dies;
13616   int i, j;
13617   HARD_REG_SET allocated;
13618
13619   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13620                                       mem_order, &base_reg, &offset, false);
13621
13622   if (stm_case == 0)
13623     return false;
13624
13625   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13626
13627   /* If the same register is used more than once, try to find a free
13628      register.  */
13629   CLEAR_HARD_REG_SET (allocated);
13630   for (i = 0; i < nops; i++)
13631     {
13632       for (j = i + 1; j < nops; j++)
13633         if (regs[i] == regs[j])
13634           {
13635             rtx t = peep2_find_free_register (0, nops * 2,
13636                                               TARGET_THUMB1 ? "l" : "r",
13637                                               SImode, &allocated);
13638             if (t == NULL_RTX)
13639               return false;
13640             reg_rtxs[i] = t;
13641             regs[i] = REGNO (t);
13642           }
13643     }
13644
13645   /* Compute an ordering that maps the register numbers to an ascending
13646      sequence.  */
13647   reg_order[0] = 0;
13648   for (i = 0; i < nops; i++)
13649     if (regs[i] < regs[reg_order[0]])
13650       reg_order[0] = i;
13651
13652   for (i = 1; i < nops; i++)
13653     {
13654       int this_order = reg_order[i - 1];
13655       for (j = 0; j < nops; j++)
13656         if (regs[j] > regs[reg_order[i - 1]]
13657             && (this_order == reg_order[i - 1]
13658                 || regs[j] < regs[this_order]))
13659           this_order = j;
13660       reg_order[i] = this_order;
13661     }
13662
13663   /* Ensure that registers that must be live after the instruction end
13664      up with the correct value.  */
13665   for (i = 0; i < nops; i++)
13666     {
13667       int this_order = reg_order[i];
13668       if ((this_order != mem_order[i]
13669            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13670           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13671         return false;
13672     }
13673
13674   /* Load the constants.  */
13675   for (i = 0; i < nops; i++)
13676     {
13677       rtx op = operands[2 * nops + mem_order[i]];
13678       sorted_regs[i] = regs[reg_order[i]];
13679       emit_move_insn (reg_rtxs[reg_order[i]], op);
13680     }
13681
13682   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13683
13684   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13685   if (TARGET_THUMB1)
13686     {
13687       gcc_assert (base_reg_dies);
13688       write_back = TRUE;
13689     }
13690
13691   if (stm_case == 5)
13692     {
13693       gcc_assert (base_reg_dies);
13694       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13695       offset = 0;
13696     }
13697
13698   addr = plus_constant (Pmode, base_reg_rtx, offset);
13699
13700   for (i = 0; i < nops; i++)
13701     {
13702       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13703       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13704                                               SImode, addr, 0);
13705     }
13706   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13707                                        write_back ? offset + i * 4 : 0));
13708   return true;
13709 }
13710
13711 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13712    unaligned copies on processors which support unaligned semantics for those
13713    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13714    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13715    An interleave factor of 1 (the minimum) will perform no interleaving.
13716    Load/store multiple are used for aligned addresses where possible.  */
13717
13718 static void
13719 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13720                                    HOST_WIDE_INT length,
13721                                    unsigned int interleave_factor)
13722 {
13723   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13724   int *regnos = XALLOCAVEC (int, interleave_factor);
13725   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13726   HOST_WIDE_INT i, j;
13727   HOST_WIDE_INT remaining = length, words;
13728   rtx halfword_tmp = NULL, byte_tmp = NULL;
13729   rtx dst, src;
13730   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13731   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13732   HOST_WIDE_INT srcoffset, dstoffset;
13733   HOST_WIDE_INT src_autoinc, dst_autoinc;
13734   rtx mem, addr;
13735
13736   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13737
13738   /* Use hard registers if we have aligned source or destination so we can use
13739      load/store multiple with contiguous registers.  */
13740   if (dst_aligned || src_aligned)
13741     for (i = 0; i < interleave_factor; i++)
13742       regs[i] = gen_rtx_REG (SImode, i);
13743   else
13744     for (i = 0; i < interleave_factor; i++)
13745       regs[i] = gen_reg_rtx (SImode);
13746
13747   dst = copy_addr_to_reg (XEXP (dstbase, 0));
13748   src = copy_addr_to_reg (XEXP (srcbase, 0));
13749
13750   srcoffset = dstoffset = 0;
13751
13752   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13753      For copying the last bytes we want to subtract this offset again.  */
13754   src_autoinc = dst_autoinc = 0;
13755
13756   for (i = 0; i < interleave_factor; i++)
13757     regnos[i] = i;
13758
13759   /* Copy BLOCK_SIZE_BYTES chunks.  */
13760
13761   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13762     {
13763       /* Load words.  */
13764       if (src_aligned && interleave_factor > 1)
13765         {
13766           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13767                                             TRUE, srcbase, &srcoffset));
13768           src_autoinc += UNITS_PER_WORD * interleave_factor;
13769         }
13770       else
13771         {
13772           for (j = 0; j < interleave_factor; j++)
13773             {
13774               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13775                                                  - src_autoinc));
13776               mem = adjust_automodify_address (srcbase, SImode, addr,
13777                                                srcoffset + j * UNITS_PER_WORD);
13778               emit_insn (gen_unaligned_loadsi (regs[j], mem));
13779             }
13780           srcoffset += block_size_bytes;
13781         }
13782
13783       /* Store words.  */
13784       if (dst_aligned && interleave_factor > 1)
13785         {
13786           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13787                                              TRUE, dstbase, &dstoffset));
13788           dst_autoinc += UNITS_PER_WORD * interleave_factor;
13789         }
13790       else
13791         {
13792           for (j = 0; j < interleave_factor; j++)
13793             {
13794               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13795                                                  - dst_autoinc));
13796               mem = adjust_automodify_address (dstbase, SImode, addr,
13797                                                dstoffset + j * UNITS_PER_WORD);
13798               emit_insn (gen_unaligned_storesi (mem, regs[j]));
13799             }
13800           dstoffset += block_size_bytes;
13801         }
13802
13803       remaining -= block_size_bytes;
13804     }
13805
13806   /* Copy any whole words left (note these aren't interleaved with any
13807      subsequent halfword/byte load/stores in the interests of simplicity).  */
13808
13809   words = remaining / UNITS_PER_WORD;
13810
13811   gcc_assert (words < interleave_factor);
13812
13813   if (src_aligned && words > 1)
13814     {
13815       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13816                                         &srcoffset));
13817       src_autoinc += UNITS_PER_WORD * words;
13818     }
13819   else
13820     {
13821       for (j = 0; j < words; j++)
13822         {
13823           addr = plus_constant (Pmode, src,
13824                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13825           mem = adjust_automodify_address (srcbase, SImode, addr,
13826                                            srcoffset + j * UNITS_PER_WORD);
13827           if (src_aligned)
13828             emit_move_insn (regs[j], mem);
13829           else
13830             emit_insn (gen_unaligned_loadsi (regs[j], mem));
13831         }
13832       srcoffset += words * UNITS_PER_WORD;
13833     }
13834
13835   if (dst_aligned && words > 1)
13836     {
13837       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13838                                          &dstoffset));
13839       dst_autoinc += words * UNITS_PER_WORD;
13840     }
13841   else
13842     {
13843       for (j = 0; j < words; j++)
13844         {
13845           addr = plus_constant (Pmode, dst,
13846                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13847           mem = adjust_automodify_address (dstbase, SImode, addr,
13848                                            dstoffset + j * UNITS_PER_WORD);
13849           if (dst_aligned)
13850             emit_move_insn (mem, regs[j]);
13851           else
13852             emit_insn (gen_unaligned_storesi (mem, regs[j]));
13853         }
13854       dstoffset += words * UNITS_PER_WORD;
13855     }
13856
13857   remaining -= words * UNITS_PER_WORD;
13858
13859   gcc_assert (remaining < 4);
13860
13861   /* Copy a halfword if necessary.  */
13862
13863   if (remaining >= 2)
13864     {
13865       halfword_tmp = gen_reg_rtx (SImode);
13866
13867       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13868       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13869       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13870
13871       /* Either write out immediately, or delay until we've loaded the last
13872          byte, depending on interleave factor.  */
13873       if (interleave_factor == 1)
13874         {
13875           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13876           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13877           emit_insn (gen_unaligned_storehi (mem,
13878                        gen_lowpart (HImode, halfword_tmp)));
13879           halfword_tmp = NULL;
13880           dstoffset += 2;
13881         }
13882
13883       remaining -= 2;
13884       srcoffset += 2;
13885     }
13886
13887   gcc_assert (remaining < 2);
13888
13889   /* Copy last byte.  */
13890
13891   if ((remaining & 1) != 0)
13892     {
13893       byte_tmp = gen_reg_rtx (SImode);
13894
13895       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13896       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
13897       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
13898
13899       if (interleave_factor == 1)
13900         {
13901           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13902           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13903           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13904           byte_tmp = NULL;
13905           dstoffset++;
13906         }
13907
13908       remaining--;
13909       srcoffset++;
13910     }
13911
13912   /* Store last halfword if we haven't done so already.  */
13913
13914   if (halfword_tmp)
13915     {
13916       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13917       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13918       emit_insn (gen_unaligned_storehi (mem,
13919                    gen_lowpart (HImode, halfword_tmp)));
13920       dstoffset += 2;
13921     }
13922
13923   /* Likewise for last byte.  */
13924
13925   if (byte_tmp)
13926     {
13927       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13928       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
13929       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
13930       dstoffset++;
13931     }
13932
13933   gcc_assert (remaining == 0 && srcoffset == dstoffset);
13934 }
13935
13936 /* From mips_adjust_block_mem:
13937
13938    Helper function for doing a loop-based block operation on memory
13939    reference MEM.  Each iteration of the loop will operate on LENGTH
13940    bytes of MEM.
13941
13942    Create a new base register for use within the loop and point it to
13943    the start of MEM.  Create a new memory reference that uses this
13944    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
13945
13946 static void
13947 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
13948                       rtx *loop_mem)
13949 {
13950   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
13951
13952   /* Although the new mem does not refer to a known location,
13953      it does keep up to LENGTH bytes of alignment.  */
13954   *loop_mem = change_address (mem, BLKmode, *loop_reg);
13955   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
13956 }
13957
13958 /* From mips_block_move_loop:
13959
13960    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13961    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
13962    the memory regions do not overlap.  */
13963
13964 static void
13965 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
13966                                unsigned int interleave_factor,
13967                                HOST_WIDE_INT bytes_per_iter)
13968 {
13969   rtx src_reg, dest_reg, final_src, test;
13970   HOST_WIDE_INT leftover;
13971
13972   leftover = length % bytes_per_iter;
13973   length -= leftover;
13974
13975   /* Create registers and memory references for use within the loop.  */
13976   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
13977   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
13978
13979   /* Calculate the value that SRC_REG should have after the last iteration of
13980      the loop.  */
13981   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
13982                                    0, 0, OPTAB_WIDEN);
13983
13984   /* Emit the start of the loop.  */
13985   rtx_code_label *label = gen_label_rtx ();
13986   emit_label (label);
13987
13988   /* Emit the loop body.  */
13989   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
13990                                      interleave_factor);
13991
13992   /* Move on to the next block.  */
13993   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
13994   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
13995
13996   /* Emit the loop condition.  */
13997   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
13998   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
13999
14000   /* Mop up any left-over bytes.  */
14001   if (leftover)
14002     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14003 }
14004
14005 /* Emit a block move when either the source or destination is unaligned (not
14006    aligned to a four-byte boundary).  This may need further tuning depending on
14007    core type, optimize_size setting, etc.  */
14008
14009 static int
14010 arm_movmemqi_unaligned (rtx *operands)
14011 {
14012   HOST_WIDE_INT length = INTVAL (operands[2]);
14013
14014   if (optimize_size)
14015     {
14016       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14017       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14018       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14019          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14020          or dst_aligned though: allow more interleaving in those cases since the
14021          resulting code can be smaller.  */
14022       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14023       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14024
14025       if (length > 12)
14026         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14027                                        interleave_factor, bytes_per_iter);
14028       else
14029         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14030                                            interleave_factor);
14031     }
14032   else
14033     {
14034       /* Note that the loop created by arm_block_move_unaligned_loop may be
14035          subject to loop unrolling, which makes tuning this condition a little
14036          redundant.  */
14037       if (length > 32)
14038         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14039       else
14040         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14041     }
14042
14043   return 1;
14044 }
14045
14046 int
14047 arm_gen_movmemqi (rtx *operands)
14048 {
14049   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14050   HOST_WIDE_INT srcoffset, dstoffset;
14051   int i;
14052   rtx src, dst, srcbase, dstbase;
14053   rtx part_bytes_reg = NULL;
14054   rtx mem;
14055
14056   if (!CONST_INT_P (operands[2])
14057       || !CONST_INT_P (operands[3])
14058       || INTVAL (operands[2]) > 64)
14059     return 0;
14060
14061   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14062     return arm_movmemqi_unaligned (operands);
14063
14064   if (INTVAL (operands[3]) & 3)
14065     return 0;
14066
14067   dstbase = operands[0];
14068   srcbase = operands[1];
14069
14070   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14071   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14072
14073   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14074   out_words_to_go = INTVAL (operands[2]) / 4;
14075   last_bytes = INTVAL (operands[2]) & 3;
14076   dstoffset = srcoffset = 0;
14077
14078   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14079     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14080
14081   for (i = 0; in_words_to_go >= 2; i+=4)
14082     {
14083       if (in_words_to_go > 4)
14084         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14085                                           TRUE, srcbase, &srcoffset));
14086       else
14087         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14088                                           src, FALSE, srcbase,
14089                                           &srcoffset));
14090
14091       if (out_words_to_go)
14092         {
14093           if (out_words_to_go > 4)
14094             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14095                                                TRUE, dstbase, &dstoffset));
14096           else if (out_words_to_go != 1)
14097             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14098                                                out_words_to_go, dst,
14099                                                (last_bytes == 0
14100                                                 ? FALSE : TRUE),
14101                                                dstbase, &dstoffset));
14102           else
14103             {
14104               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14105               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14106               if (last_bytes != 0)
14107                 {
14108                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14109                   dstoffset += 4;
14110                 }
14111             }
14112         }
14113
14114       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14115       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14116     }
14117
14118   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14119   if (out_words_to_go)
14120     {
14121       rtx sreg;
14122
14123       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14124       sreg = copy_to_reg (mem);
14125
14126       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14127       emit_move_insn (mem, sreg);
14128       in_words_to_go--;
14129
14130       gcc_assert (!in_words_to_go);     /* Sanity check */
14131     }
14132
14133   if (in_words_to_go)
14134     {
14135       gcc_assert (in_words_to_go > 0);
14136
14137       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14138       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14139     }
14140
14141   gcc_assert (!last_bytes || part_bytes_reg);
14142
14143   if (BYTES_BIG_ENDIAN && last_bytes)
14144     {
14145       rtx tmp = gen_reg_rtx (SImode);
14146
14147       /* The bytes we want are in the top end of the word.  */
14148       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14149                               GEN_INT (8 * (4 - last_bytes))));
14150       part_bytes_reg = tmp;
14151
14152       while (last_bytes)
14153         {
14154           mem = adjust_automodify_address (dstbase, QImode,
14155                                            plus_constant (Pmode, dst,
14156                                                           last_bytes - 1),
14157                                            dstoffset + last_bytes - 1);
14158           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14159
14160           if (--last_bytes)
14161             {
14162               tmp = gen_reg_rtx (SImode);
14163               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14164               part_bytes_reg = tmp;
14165             }
14166         }
14167
14168     }
14169   else
14170     {
14171       if (last_bytes > 1)
14172         {
14173           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14174           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14175           last_bytes -= 2;
14176           if (last_bytes)
14177             {
14178               rtx tmp = gen_reg_rtx (SImode);
14179               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14180               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14181               part_bytes_reg = tmp;
14182               dstoffset += 2;
14183             }
14184         }
14185
14186       if (last_bytes)
14187         {
14188           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14189           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14190         }
14191     }
14192
14193   return 1;
14194 }
14195
14196 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14197 by mode size.  */
14198 inline static rtx
14199 next_consecutive_mem (rtx mem)
14200 {
14201   machine_mode mode = GET_MODE (mem);
14202   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14203   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14204
14205   return adjust_automodify_address (mem, mode, addr, offset);
14206 }
14207
14208 /* Copy using LDRD/STRD instructions whenever possible.
14209    Returns true upon success. */
14210 bool
14211 gen_movmem_ldrd_strd (rtx *operands)
14212 {
14213   unsigned HOST_WIDE_INT len;
14214   HOST_WIDE_INT align;
14215   rtx src, dst, base;
14216   rtx reg0;
14217   bool src_aligned, dst_aligned;
14218   bool src_volatile, dst_volatile;
14219
14220   gcc_assert (CONST_INT_P (operands[2]));
14221   gcc_assert (CONST_INT_P (operands[3]));
14222
14223   len = UINTVAL (operands[2]);
14224   if (len > 64)
14225     return false;
14226
14227   /* Maximum alignment we can assume for both src and dst buffers.  */
14228   align = INTVAL (operands[3]);
14229
14230   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14231     return false;
14232
14233   /* Place src and dst addresses in registers
14234      and update the corresponding mem rtx.  */
14235   dst = operands[0];
14236   dst_volatile = MEM_VOLATILE_P (dst);
14237   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14238   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14239   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14240
14241   src = operands[1];
14242   src_volatile = MEM_VOLATILE_P (src);
14243   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14244   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14245   src = adjust_automodify_address (src, VOIDmode, base, 0);
14246
14247   if (!unaligned_access && !(src_aligned && dst_aligned))
14248     return false;
14249
14250   if (src_volatile || dst_volatile)
14251     return false;
14252
14253   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14254   if (!(dst_aligned || src_aligned))
14255     return arm_gen_movmemqi (operands);
14256
14257   /* If the either src or dst is unaligned we'll be accessing it as pairs
14258      of unaligned SImode accesses.  Otherwise we can generate DImode
14259      ldrd/strd instructions.  */
14260   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14261   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14262
14263   while (len >= 8)
14264     {
14265       len -= 8;
14266       reg0 = gen_reg_rtx (DImode);
14267       rtx low_reg = NULL_RTX;
14268       rtx hi_reg = NULL_RTX;
14269
14270       if (!src_aligned || !dst_aligned)
14271         {
14272           low_reg = gen_lowpart (SImode, reg0);
14273           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14274         }
14275       if (src_aligned)
14276         emit_move_insn (reg0, src);
14277       else
14278         {
14279           emit_insn (gen_unaligned_loadsi (low_reg, src));
14280           src = next_consecutive_mem (src);
14281           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14282         }
14283
14284       if (dst_aligned)
14285         emit_move_insn (dst, reg0);
14286       else
14287         {
14288           emit_insn (gen_unaligned_storesi (dst, low_reg));
14289           dst = next_consecutive_mem (dst);
14290           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14291         }
14292
14293       src = next_consecutive_mem (src);
14294       dst = next_consecutive_mem (dst);
14295     }
14296
14297   gcc_assert (len < 8);
14298   if (len >= 4)
14299     {
14300       /* More than a word but less than a double-word to copy.  Copy a word.  */
14301       reg0 = gen_reg_rtx (SImode);
14302       src = adjust_address (src, SImode, 0);
14303       dst = adjust_address (dst, SImode, 0);
14304       if (src_aligned)
14305         emit_move_insn (reg0, src);
14306       else
14307         emit_insn (gen_unaligned_loadsi (reg0, src));
14308
14309       if (dst_aligned)
14310         emit_move_insn (dst, reg0);
14311       else
14312         emit_insn (gen_unaligned_storesi (dst, reg0));
14313
14314       src = next_consecutive_mem (src);
14315       dst = next_consecutive_mem (dst);
14316       len -= 4;
14317     }
14318
14319   if (len == 0)
14320     return true;
14321
14322   /* Copy the remaining bytes.  */
14323   if (len >= 2)
14324     {
14325       dst = adjust_address (dst, HImode, 0);
14326       src = adjust_address (src, HImode, 0);
14327       reg0 = gen_reg_rtx (SImode);
14328       if (src_aligned)
14329         emit_insn (gen_zero_extendhisi2 (reg0, src));
14330       else
14331         emit_insn (gen_unaligned_loadhiu (reg0, src));
14332
14333       if (dst_aligned)
14334         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14335       else
14336         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14337
14338       src = next_consecutive_mem (src);
14339       dst = next_consecutive_mem (dst);
14340       if (len == 2)
14341         return true;
14342     }
14343
14344   dst = adjust_address (dst, QImode, 0);
14345   src = adjust_address (src, QImode, 0);
14346   reg0 = gen_reg_rtx (QImode);
14347   emit_move_insn (reg0, src);
14348   emit_move_insn (dst, reg0);
14349   return true;
14350 }
14351
14352 /* Select a dominance comparison mode if possible for a test of the general
14353    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14354    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14355    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14356    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14357    In all cases OP will be either EQ or NE, but we don't need to know which
14358    here.  If we are unable to support a dominance comparison we return
14359    CC mode.  This will then fail to match for the RTL expressions that
14360    generate this call.  */
14361 machine_mode
14362 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14363 {
14364   enum rtx_code cond1, cond2;
14365   int swapped = 0;
14366
14367   /* Currently we will probably get the wrong result if the individual
14368      comparisons are not simple.  This also ensures that it is safe to
14369      reverse a comparison if necessary.  */
14370   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14371        != CCmode)
14372       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14373           != CCmode))
14374     return CCmode;
14375
14376   /* The if_then_else variant of this tests the second condition if the
14377      first passes, but is true if the first fails.  Reverse the first
14378      condition to get a true "inclusive-or" expression.  */
14379   if (cond_or == DOM_CC_NX_OR_Y)
14380     cond1 = reverse_condition (cond1);
14381
14382   /* If the comparisons are not equal, and one doesn't dominate the other,
14383      then we can't do this.  */
14384   if (cond1 != cond2
14385       && !comparison_dominates_p (cond1, cond2)
14386       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14387     return CCmode;
14388
14389   if (swapped)
14390     std::swap (cond1, cond2);
14391
14392   switch (cond1)
14393     {
14394     case EQ:
14395       if (cond_or == DOM_CC_X_AND_Y)
14396         return CC_DEQmode;
14397
14398       switch (cond2)
14399         {
14400         case EQ: return CC_DEQmode;
14401         case LE: return CC_DLEmode;
14402         case LEU: return CC_DLEUmode;
14403         case GE: return CC_DGEmode;
14404         case GEU: return CC_DGEUmode;
14405         default: gcc_unreachable ();
14406         }
14407
14408     case LT:
14409       if (cond_or == DOM_CC_X_AND_Y)
14410         return CC_DLTmode;
14411
14412       switch (cond2)
14413         {
14414         case  LT:
14415             return CC_DLTmode;
14416         case LE:
14417           return CC_DLEmode;
14418         case NE:
14419           return CC_DNEmode;
14420         default:
14421           gcc_unreachable ();
14422         }
14423
14424     case GT:
14425       if (cond_or == DOM_CC_X_AND_Y)
14426         return CC_DGTmode;
14427
14428       switch (cond2)
14429         {
14430         case GT:
14431           return CC_DGTmode;
14432         case GE:
14433           return CC_DGEmode;
14434         case NE:
14435           return CC_DNEmode;
14436         default:
14437           gcc_unreachable ();
14438         }
14439
14440     case LTU:
14441       if (cond_or == DOM_CC_X_AND_Y)
14442         return CC_DLTUmode;
14443
14444       switch (cond2)
14445         {
14446         case LTU:
14447           return CC_DLTUmode;
14448         case LEU:
14449           return CC_DLEUmode;
14450         case NE:
14451           return CC_DNEmode;
14452         default:
14453           gcc_unreachable ();
14454         }
14455
14456     case GTU:
14457       if (cond_or == DOM_CC_X_AND_Y)
14458         return CC_DGTUmode;
14459
14460       switch (cond2)
14461         {
14462         case GTU:
14463           return CC_DGTUmode;
14464         case GEU:
14465           return CC_DGEUmode;
14466         case NE:
14467           return CC_DNEmode;
14468         default:
14469           gcc_unreachable ();
14470         }
14471
14472     /* The remaining cases only occur when both comparisons are the
14473        same.  */
14474     case NE:
14475       gcc_assert (cond1 == cond2);
14476       return CC_DNEmode;
14477
14478     case LE:
14479       gcc_assert (cond1 == cond2);
14480       return CC_DLEmode;
14481
14482     case GE:
14483       gcc_assert (cond1 == cond2);
14484       return CC_DGEmode;
14485
14486     case LEU:
14487       gcc_assert (cond1 == cond2);
14488       return CC_DLEUmode;
14489
14490     case GEU:
14491       gcc_assert (cond1 == cond2);
14492       return CC_DGEUmode;
14493
14494     default:
14495       gcc_unreachable ();
14496     }
14497 }
14498
14499 machine_mode
14500 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14501 {
14502   /* All floating point compares return CCFP if it is an equality
14503      comparison, and CCFPE otherwise.  */
14504   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14505     {
14506       switch (op)
14507         {
14508         case EQ:
14509         case NE:
14510         case UNORDERED:
14511         case ORDERED:
14512         case UNLT:
14513         case UNLE:
14514         case UNGT:
14515         case UNGE:
14516         case UNEQ:
14517         case LTGT:
14518           return CCFPmode;
14519
14520         case LT:
14521         case LE:
14522         case GT:
14523         case GE:
14524           return CCFPEmode;
14525
14526         default:
14527           gcc_unreachable ();
14528         }
14529     }
14530
14531   /* A compare with a shifted operand.  Because of canonicalization, the
14532      comparison will have to be swapped when we emit the assembler.  */
14533   if (GET_MODE (y) == SImode
14534       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14535       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14536           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14537           || GET_CODE (x) == ROTATERT))
14538     return CC_SWPmode;
14539
14540   /* This operation is performed swapped, but since we only rely on the Z
14541      flag we don't need an additional mode.  */
14542   if (GET_MODE (y) == SImode
14543       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14544       && GET_CODE (x) == NEG
14545       && (op == EQ || op == NE))
14546     return CC_Zmode;
14547
14548   /* This is a special case that is used by combine to allow a
14549      comparison of a shifted byte load to be split into a zero-extend
14550      followed by a comparison of the shifted integer (only valid for
14551      equalities and unsigned inequalities).  */
14552   if (GET_MODE (x) == SImode
14553       && GET_CODE (x) == ASHIFT
14554       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14555       && GET_CODE (XEXP (x, 0)) == SUBREG
14556       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14557       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14558       && (op == EQ || op == NE
14559           || op == GEU || op == GTU || op == LTU || op == LEU)
14560       && CONST_INT_P (y))
14561     return CC_Zmode;
14562
14563   /* A construct for a conditional compare, if the false arm contains
14564      0, then both conditions must be true, otherwise either condition
14565      must be true.  Not all conditions are possible, so CCmode is
14566      returned if it can't be done.  */
14567   if (GET_CODE (x) == IF_THEN_ELSE
14568       && (XEXP (x, 2) == const0_rtx
14569           || XEXP (x, 2) == const1_rtx)
14570       && COMPARISON_P (XEXP (x, 0))
14571       && COMPARISON_P (XEXP (x, 1)))
14572     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14573                                          INTVAL (XEXP (x, 2)));
14574
14575   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14576   if (GET_CODE (x) == AND
14577       && (op == EQ || op == NE)
14578       && COMPARISON_P (XEXP (x, 0))
14579       && COMPARISON_P (XEXP (x, 1)))
14580     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14581                                          DOM_CC_X_AND_Y);
14582
14583   if (GET_CODE (x) == IOR
14584       && (op == EQ || op == NE)
14585       && COMPARISON_P (XEXP (x, 0))
14586       && COMPARISON_P (XEXP (x, 1)))
14587     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14588                                          DOM_CC_X_OR_Y);
14589
14590   /* An operation (on Thumb) where we want to test for a single bit.
14591      This is done by shifting that bit up into the top bit of a
14592      scratch register; we can then branch on the sign bit.  */
14593   if (TARGET_THUMB1
14594       && GET_MODE (x) == SImode
14595       && (op == EQ || op == NE)
14596       && GET_CODE (x) == ZERO_EXTRACT
14597       && XEXP (x, 1) == const1_rtx)
14598     return CC_Nmode;
14599
14600   /* An operation that sets the condition codes as a side-effect, the
14601      V flag is not set correctly, so we can only use comparisons where
14602      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14603      instead.)  */
14604   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14605   if (GET_MODE (x) == SImode
14606       && y == const0_rtx
14607       && (op == EQ || op == NE || op == LT || op == GE)
14608       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14609           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14610           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14611           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14612           || GET_CODE (x) == LSHIFTRT
14613           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14614           || GET_CODE (x) == ROTATERT
14615           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14616     return CC_NOOVmode;
14617
14618   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14619     return CC_Zmode;
14620
14621   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14622       && GET_CODE (x) == PLUS
14623       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14624     return CC_Cmode;
14625
14626   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14627     {
14628       switch (op)
14629         {
14630         case EQ:
14631         case NE:
14632           /* A DImode comparison against zero can be implemented by
14633              or'ing the two halves together.  */
14634           if (y == const0_rtx)
14635             return CC_Zmode;
14636
14637           /* We can do an equality test in three Thumb instructions.  */
14638           if (!TARGET_32BIT)
14639             return CC_Zmode;
14640
14641           /* FALLTHROUGH */
14642
14643         case LTU:
14644         case LEU:
14645         case GTU:
14646         case GEU:
14647           /* DImode unsigned comparisons can be implemented by cmp +
14648              cmpeq without a scratch register.  Not worth doing in
14649              Thumb-2.  */
14650           if (TARGET_32BIT)
14651             return CC_CZmode;
14652
14653           /* FALLTHROUGH */
14654
14655         case LT:
14656         case LE:
14657         case GT:
14658         case GE:
14659           /* DImode signed and unsigned comparisons can be implemented
14660              by cmp + sbcs with a scratch register, but that does not
14661              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14662           gcc_assert (op != EQ && op != NE);
14663           return CC_NCVmode;
14664
14665         default:
14666           gcc_unreachable ();
14667         }
14668     }
14669
14670   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14671     return GET_MODE (x);
14672
14673   return CCmode;
14674 }
14675
14676 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14677    return the rtx for register 0 in the proper mode.  FP means this is a
14678    floating point compare: I don't think that it is needed on the arm.  */
14679 rtx
14680 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14681 {
14682   machine_mode mode;
14683   rtx cc_reg;
14684   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14685
14686   /* We might have X as a constant, Y as a register because of the predicates
14687      used for cmpdi.  If so, force X to a register here.  */
14688   if (dimode_comparison && !REG_P (x))
14689     x = force_reg (DImode, x);
14690
14691   mode = SELECT_CC_MODE (code, x, y);
14692   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14693
14694   if (dimode_comparison
14695       && mode != CC_CZmode)
14696     {
14697       rtx clobber, set;
14698
14699       /* To compare two non-zero values for equality, XOR them and
14700          then compare against zero.  Not used for ARM mode; there
14701          CC_CZmode is cheaper.  */
14702       if (mode == CC_Zmode && y != const0_rtx)
14703         {
14704           gcc_assert (!reload_completed);
14705           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14706           y = const0_rtx;
14707         }
14708
14709       /* A scratch register is required.  */
14710       if (reload_completed)
14711         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14712       else
14713         scratch = gen_rtx_SCRATCH (SImode);
14714
14715       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14716       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14717       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14718     }
14719   else
14720     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14721
14722   return cc_reg;
14723 }
14724
14725 /* Generate a sequence of insns that will generate the correct return
14726    address mask depending on the physical architecture that the program
14727    is running on.  */
14728 rtx
14729 arm_gen_return_addr_mask (void)
14730 {
14731   rtx reg = gen_reg_rtx (Pmode);
14732
14733   emit_insn (gen_return_addr_mask (reg));
14734   return reg;
14735 }
14736
14737 void
14738 arm_reload_in_hi (rtx *operands)
14739 {
14740   rtx ref = operands[1];
14741   rtx base, scratch;
14742   HOST_WIDE_INT offset = 0;
14743
14744   if (GET_CODE (ref) == SUBREG)
14745     {
14746       offset = SUBREG_BYTE (ref);
14747       ref = SUBREG_REG (ref);
14748     }
14749
14750   if (REG_P (ref))
14751     {
14752       /* We have a pseudo which has been spilt onto the stack; there
14753          are two cases here: the first where there is a simple
14754          stack-slot replacement and a second where the stack-slot is
14755          out of range, or is used as a subreg.  */
14756       if (reg_equiv_mem (REGNO (ref)))
14757         {
14758           ref = reg_equiv_mem (REGNO (ref));
14759           base = find_replacement (&XEXP (ref, 0));
14760         }
14761       else
14762         /* The slot is out of range, or was dressed up in a SUBREG.  */
14763         base = reg_equiv_address (REGNO (ref));
14764
14765       /* PR 62554: If there is no equivalent memory location then just move
14766          the value as an SImode register move.  This happens when the target
14767          architecture variant does not have an HImode register move.  */
14768       if (base == NULL)
14769         {
14770           gcc_assert (REG_P (operands[0]));
14771           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14772                                 gen_rtx_SUBREG (SImode, ref, 0)));
14773           return;
14774         }
14775     }
14776   else
14777     base = find_replacement (&XEXP (ref, 0));
14778
14779   /* Handle the case where the address is too complex to be offset by 1.  */
14780   if (GET_CODE (base) == MINUS
14781       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14782     {
14783       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14784
14785       emit_set_insn (base_plus, base);
14786       base = base_plus;
14787     }
14788   else if (GET_CODE (base) == PLUS)
14789     {
14790       /* The addend must be CONST_INT, or we would have dealt with it above.  */
14791       HOST_WIDE_INT hi, lo;
14792
14793       offset += INTVAL (XEXP (base, 1));
14794       base = XEXP (base, 0);
14795
14796       /* Rework the address into a legal sequence of insns.  */
14797       /* Valid range for lo is -4095 -> 4095 */
14798       lo = (offset >= 0
14799             ? (offset & 0xfff)
14800             : -((-offset) & 0xfff));
14801
14802       /* Corner case, if lo is the max offset then we would be out of range
14803          once we have added the additional 1 below, so bump the msb into the
14804          pre-loading insn(s).  */
14805       if (lo == 4095)
14806         lo &= 0x7ff;
14807
14808       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14809              ^ (HOST_WIDE_INT) 0x80000000)
14810             - (HOST_WIDE_INT) 0x80000000);
14811
14812       gcc_assert (hi + lo == offset);
14813
14814       if (hi != 0)
14815         {
14816           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14817
14818           /* Get the base address; addsi3 knows how to handle constants
14819              that require more than one insn.  */
14820           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14821           base = base_plus;
14822           offset = lo;
14823         }
14824     }
14825
14826   /* Operands[2] may overlap operands[0] (though it won't overlap
14827      operands[1]), that's why we asked for a DImode reg -- so we can
14828      use the bit that does not overlap.  */
14829   if (REGNO (operands[2]) == REGNO (operands[0]))
14830     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14831   else
14832     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14833
14834   emit_insn (gen_zero_extendqisi2 (scratch,
14835                                    gen_rtx_MEM (QImode,
14836                                                 plus_constant (Pmode, base,
14837                                                                offset))));
14838   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14839                                    gen_rtx_MEM (QImode,
14840                                                 plus_constant (Pmode, base,
14841                                                                offset + 1))));
14842   if (!BYTES_BIG_ENDIAN)
14843     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14844                    gen_rtx_IOR (SImode,
14845                                 gen_rtx_ASHIFT
14846                                 (SImode,
14847                                  gen_rtx_SUBREG (SImode, operands[0], 0),
14848                                  GEN_INT (8)),
14849                                 scratch));
14850   else
14851     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14852                    gen_rtx_IOR (SImode,
14853                                 gen_rtx_ASHIFT (SImode, scratch,
14854                                                 GEN_INT (8)),
14855                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
14856 }
14857
14858 /* Handle storing a half-word to memory during reload by synthesizing as two
14859    byte stores.  Take care not to clobber the input values until after we
14860    have moved them somewhere safe.  This code assumes that if the DImode
14861    scratch in operands[2] overlaps either the input value or output address
14862    in some way, then that value must die in this insn (we absolutely need
14863    two scratch registers for some corner cases).  */
14864 void
14865 arm_reload_out_hi (rtx *operands)
14866 {
14867   rtx ref = operands[0];
14868   rtx outval = operands[1];
14869   rtx base, scratch;
14870   HOST_WIDE_INT offset = 0;
14871
14872   if (GET_CODE (ref) == SUBREG)
14873     {
14874       offset = SUBREG_BYTE (ref);
14875       ref = SUBREG_REG (ref);
14876     }
14877
14878   if (REG_P (ref))
14879     {
14880       /* We have a pseudo which has been spilt onto the stack; there
14881          are two cases here: the first where there is a simple
14882          stack-slot replacement and a second where the stack-slot is
14883          out of range, or is used as a subreg.  */
14884       if (reg_equiv_mem (REGNO (ref)))
14885         {
14886           ref = reg_equiv_mem (REGNO (ref));
14887           base = find_replacement (&XEXP (ref, 0));
14888         }
14889       else
14890         /* The slot is out of range, or was dressed up in a SUBREG.  */
14891         base = reg_equiv_address (REGNO (ref));
14892
14893       /* PR 62254: If there is no equivalent memory location then just move
14894          the value as an SImode register move.  This happens when the target
14895          architecture variant does not have an HImode register move.  */
14896       if (base == NULL)
14897         {
14898           gcc_assert (REG_P (outval) || SUBREG_P (outval));
14899
14900           if (REG_P (outval))
14901             {
14902               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14903                                     gen_rtx_SUBREG (SImode, outval, 0)));
14904             }
14905           else /* SUBREG_P (outval)  */
14906             {
14907               if (GET_MODE (SUBREG_REG (outval)) == SImode)
14908                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
14909                                       SUBREG_REG (outval)));
14910               else
14911                 /* FIXME: Handle other cases ?  */
14912                 gcc_unreachable ();
14913             }
14914           return;
14915         }
14916     }
14917   else
14918     base = find_replacement (&XEXP (ref, 0));
14919
14920   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14921
14922   /* Handle the case where the address is too complex to be offset by 1.  */
14923   if (GET_CODE (base) == MINUS
14924       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14925     {
14926       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14927
14928       /* Be careful not to destroy OUTVAL.  */
14929       if (reg_overlap_mentioned_p (base_plus, outval))
14930         {
14931           /* Updating base_plus might destroy outval, see if we can
14932              swap the scratch and base_plus.  */
14933           if (!reg_overlap_mentioned_p (scratch, outval))
14934             std::swap (scratch, base_plus);
14935           else
14936             {
14937               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14938
14939               /* Be conservative and copy OUTVAL into the scratch now,
14940                  this should only be necessary if outval is a subreg
14941                  of something larger than a word.  */
14942               /* XXX Might this clobber base?  I can't see how it can,
14943                  since scratch is known to overlap with OUTVAL, and
14944                  must be wider than a word.  */
14945               emit_insn (gen_movhi (scratch_hi, outval));
14946               outval = scratch_hi;
14947             }
14948         }
14949
14950       emit_set_insn (base_plus, base);
14951       base = base_plus;
14952     }
14953   else if (GET_CODE (base) == PLUS)
14954     {
14955       /* The addend must be CONST_INT, or we would have dealt with it above.  */
14956       HOST_WIDE_INT hi, lo;
14957
14958       offset += INTVAL (XEXP (base, 1));
14959       base = XEXP (base, 0);
14960
14961       /* Rework the address into a legal sequence of insns.  */
14962       /* Valid range for lo is -4095 -> 4095 */
14963       lo = (offset >= 0
14964             ? (offset & 0xfff)
14965             : -((-offset) & 0xfff));
14966
14967       /* Corner case, if lo is the max offset then we would be out of range
14968          once we have added the additional 1 below, so bump the msb into the
14969          pre-loading insn(s).  */
14970       if (lo == 4095)
14971         lo &= 0x7ff;
14972
14973       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14974              ^ (HOST_WIDE_INT) 0x80000000)
14975             - (HOST_WIDE_INT) 0x80000000);
14976
14977       gcc_assert (hi + lo == offset);
14978
14979       if (hi != 0)
14980         {
14981           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14982
14983           /* Be careful not to destroy OUTVAL.  */
14984           if (reg_overlap_mentioned_p (base_plus, outval))
14985             {
14986               /* Updating base_plus might destroy outval, see if we
14987                  can swap the scratch and base_plus.  */
14988               if (!reg_overlap_mentioned_p (scratch, outval))
14989                 std::swap (scratch, base_plus);
14990               else
14991                 {
14992                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
14993
14994                   /* Be conservative and copy outval into scratch now,
14995                      this should only be necessary if outval is a
14996                      subreg of something larger than a word.  */
14997                   /* XXX Might this clobber base?  I can't see how it
14998                      can, since scratch is known to overlap with
14999                      outval.  */
15000                   emit_insn (gen_movhi (scratch_hi, outval));
15001                   outval = scratch_hi;
15002                 }
15003             }
15004
15005           /* Get the base address; addsi3 knows how to handle constants
15006              that require more than one insn.  */
15007           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15008           base = base_plus;
15009           offset = lo;
15010         }
15011     }
15012
15013   if (BYTES_BIG_ENDIAN)
15014     {
15015       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15016                                          plus_constant (Pmode, base,
15017                                                         offset + 1)),
15018                             gen_lowpart (QImode, outval)));
15019       emit_insn (gen_lshrsi3 (scratch,
15020                               gen_rtx_SUBREG (SImode, outval, 0),
15021                               GEN_INT (8)));
15022       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15023                                                                 offset)),
15024                             gen_lowpart (QImode, scratch)));
15025     }
15026   else
15027     {
15028       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15029                                                                 offset)),
15030                             gen_lowpart (QImode, outval)));
15031       emit_insn (gen_lshrsi3 (scratch,
15032                               gen_rtx_SUBREG (SImode, outval, 0),
15033                               GEN_INT (8)));
15034       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15035                                          plus_constant (Pmode, base,
15036                                                         offset + 1)),
15037                             gen_lowpart (QImode, scratch)));
15038     }
15039 }
15040
15041 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15042    (padded to the size of a word) should be passed in a register.  */
15043
15044 static bool
15045 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15046 {
15047   if (TARGET_AAPCS_BASED)
15048     return must_pass_in_stack_var_size (mode, type);
15049   else
15050     return must_pass_in_stack_var_size_or_pad (mode, type);
15051 }
15052
15053
15054 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15055    Return true if an argument passed on the stack should be padded upwards,
15056    i.e. if the least-significant byte has useful data.
15057    For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
15058    aggregate types are placed in the lowest memory address.  */
15059
15060 bool
15061 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15062 {
15063   if (!TARGET_AAPCS_BASED)
15064     return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15065
15066   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15067     return false;
15068
15069   return true;
15070 }
15071
15072
15073 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15074    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15075    register has useful data, and return the opposite if the most
15076    significant byte does.  */
15077
15078 bool
15079 arm_pad_reg_upward (machine_mode mode,
15080                     tree type, int first ATTRIBUTE_UNUSED)
15081 {
15082   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15083     {
15084       /* For AAPCS, small aggregates, small fixed-point types,
15085          and small complex types are always padded upwards.  */
15086       if (type)
15087         {
15088           if ((AGGREGATE_TYPE_P (type)
15089                || TREE_CODE (type) == COMPLEX_TYPE
15090                || FIXED_POINT_TYPE_P (type))
15091               && int_size_in_bytes (type) <= 4)
15092             return true;
15093         }
15094       else
15095         {
15096           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15097               && GET_MODE_SIZE (mode) <= 4)
15098             return true;
15099         }
15100     }
15101
15102   /* Otherwise, use default padding.  */
15103   return !BYTES_BIG_ENDIAN;
15104 }
15105
15106 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15107    assuming that the address in the base register is word aligned.  */
15108 bool
15109 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15110 {
15111   HOST_WIDE_INT max_offset;
15112
15113   /* Offset must be a multiple of 4 in Thumb mode.  */
15114   if (TARGET_THUMB2 && ((offset & 3) != 0))
15115     return false;
15116
15117   if (TARGET_THUMB2)
15118     max_offset = 1020;
15119   else if (TARGET_ARM)
15120     max_offset = 255;
15121   else
15122     return false;
15123
15124   return ((offset <= max_offset) && (offset >= -max_offset));
15125 }
15126
15127 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15128    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15129    Assumes that the address in the base register RN is word aligned.  Pattern
15130    guarantees that both memory accesses use the same base register,
15131    the offsets are constants within the range, and the gap between the offsets is 4.
15132    If preload complete then check that registers are legal.  WBACK indicates whether
15133    address is updated.  LOAD indicates whether memory access is load or store.  */
15134 bool
15135 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15136                        bool wback, bool load)
15137 {
15138   unsigned int t, t2, n;
15139
15140   if (!reload_completed)
15141     return true;
15142
15143   if (!offset_ok_for_ldrd_strd (offset))
15144     return false;
15145
15146   t = REGNO (rt);
15147   t2 = REGNO (rt2);
15148   n = REGNO (rn);
15149
15150   if ((TARGET_THUMB2)
15151       && ((wback && (n == t || n == t2))
15152           || (t == SP_REGNUM)
15153           || (t == PC_REGNUM)
15154           || (t2 == SP_REGNUM)
15155           || (t2 == PC_REGNUM)
15156           || (!load && (n == PC_REGNUM))
15157           || (load && (t == t2))
15158           /* Triggers Cortex-M3 LDRD errata.  */
15159           || (!wback && load && fix_cm3_ldrd && (n == t))))
15160     return false;
15161
15162   if ((TARGET_ARM)
15163       && ((wback && (n == t || n == t2))
15164           || (t2 == PC_REGNUM)
15165           || (t % 2 != 0)   /* First destination register is not even.  */
15166           || (t2 != t + 1)
15167           /* PC can be used as base register (for offset addressing only),
15168              but it is depricated.  */
15169           || (n == PC_REGNUM)))
15170     return false;
15171
15172   return true;
15173 }
15174
15175 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15176    operand MEM's address contains an immediate offset from the base
15177    register and has no side effects, in which case it sets BASE and
15178    OFFSET accordingly.  */
15179 static bool
15180 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15181 {
15182   rtx addr;
15183
15184   gcc_assert (base != NULL && offset != NULL);
15185
15186   /* TODO: Handle more general memory operand patterns, such as
15187      PRE_DEC and PRE_INC.  */
15188
15189   if (side_effects_p (mem))
15190     return false;
15191
15192   /* Can't deal with subregs.  */
15193   if (GET_CODE (mem) == SUBREG)
15194     return false;
15195
15196   gcc_assert (MEM_P (mem));
15197
15198   *offset = const0_rtx;
15199
15200   addr = XEXP (mem, 0);
15201
15202   /* If addr isn't valid for DImode, then we can't handle it.  */
15203   if (!arm_legitimate_address_p (DImode, addr,
15204                                  reload_in_progress || reload_completed))
15205     return false;
15206
15207   if (REG_P (addr))
15208     {
15209       *base = addr;
15210       return true;
15211     }
15212   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15213     {
15214       *base = XEXP (addr, 0);
15215       *offset = XEXP (addr, 1);
15216       return (REG_P (*base) && CONST_INT_P (*offset));
15217     }
15218
15219   return false;
15220 }
15221
15222 /* Called from a peephole2 to replace two word-size accesses with a
15223    single LDRD/STRD instruction.  Returns true iff we can generate a
15224    new instruction sequence.  That is, both accesses use the same base
15225    register and the gap between constant offsets is 4.  This function
15226    may reorder its operands to match ldrd/strd RTL templates.
15227    OPERANDS are the operands found by the peephole matcher;
15228    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15229    corresponding memory operands.  LOAD indicaates whether the access
15230    is load or store.  CONST_STORE indicates a store of constant
15231    integer values held in OPERANDS[4,5] and assumes that the pattern
15232    is of length 4 insn, for the purpose of checking dead registers.
15233    COMMUTE indicates that register operands may be reordered.  */
15234 bool
15235 gen_operands_ldrd_strd (rtx *operands, bool load,
15236                         bool const_store, bool commute)
15237 {
15238   int nops = 2;
15239   HOST_WIDE_INT offsets[2], offset;
15240   rtx base = NULL_RTX;
15241   rtx cur_base, cur_offset, tmp;
15242   int i, gap;
15243   HARD_REG_SET regset;
15244
15245   gcc_assert (!const_store || !load);
15246   /* Check that the memory references are immediate offsets from the
15247      same base register.  Extract the base register, the destination
15248      registers, and the corresponding memory offsets.  */
15249   for (i = 0; i < nops; i++)
15250     {
15251       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15252         return false;
15253
15254       if (i == 0)
15255         base = cur_base;
15256       else if (REGNO (base) != REGNO (cur_base))
15257         return false;
15258
15259       offsets[i] = INTVAL (cur_offset);
15260       if (GET_CODE (operands[i]) == SUBREG)
15261         {
15262           tmp = SUBREG_REG (operands[i]);
15263           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15264           operands[i] = tmp;
15265         }
15266     }
15267
15268   /* Make sure there is no dependency between the individual loads.  */
15269   if (load && REGNO (operands[0]) == REGNO (base))
15270     return false; /* RAW */
15271
15272   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15273     return false; /* WAW */
15274
15275   /* If the same input register is used in both stores
15276      when storing different constants, try to find a free register.
15277      For example, the code
15278         mov r0, 0
15279         str r0, [r2]
15280         mov r0, 1
15281         str r0, [r2, #4]
15282      can be transformed into
15283         mov r1, 0
15284         mov r0, 1
15285         strd r1, r0, [r2]
15286      in Thumb mode assuming that r1 is free.
15287      For ARM mode do the same but only if the starting register
15288      can be made to be even.  */
15289   if (const_store
15290       && REGNO (operands[0]) == REGNO (operands[1])
15291       && INTVAL (operands[4]) != INTVAL (operands[5]))
15292     {
15293     if (TARGET_THUMB2)
15294       {
15295         CLEAR_HARD_REG_SET (regset);
15296         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15297         if (tmp == NULL_RTX)
15298           return false;
15299
15300         /* Use the new register in the first load to ensure that
15301            if the original input register is not dead after peephole,
15302            then it will have the correct constant value.  */
15303         operands[0] = tmp;
15304       }
15305     else if (TARGET_ARM)
15306       {
15307         int regno = REGNO (operands[0]);
15308         if (!peep2_reg_dead_p (4, operands[0]))
15309           {
15310             /* When the input register is even and is not dead after the
15311                pattern, it has to hold the second constant but we cannot
15312                form a legal STRD in ARM mode with this register as the second
15313                register.  */
15314             if (regno % 2 == 0)
15315               return false;
15316
15317             /* Is regno-1 free? */
15318             SET_HARD_REG_SET (regset);
15319             CLEAR_HARD_REG_BIT(regset, regno - 1);
15320             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15321             if (tmp == NULL_RTX)
15322               return false;
15323
15324             operands[0] = tmp;
15325           }
15326         else
15327           {
15328             /* Find a DImode register.  */
15329             CLEAR_HARD_REG_SET (regset);
15330             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15331             if (tmp != NULL_RTX)
15332               {
15333                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15334                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15335               }
15336             else
15337               {
15338                 /* Can we use the input register to form a DI register?  */
15339                 SET_HARD_REG_SET (regset);
15340                 CLEAR_HARD_REG_BIT(regset,
15341                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15342                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15343                 if (tmp == NULL_RTX)
15344                   return false;
15345                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15346               }
15347           }
15348
15349         gcc_assert (operands[0] != NULL_RTX);
15350         gcc_assert (operands[1] != NULL_RTX);
15351         gcc_assert (REGNO (operands[0]) % 2 == 0);
15352         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15353       }
15354     }
15355
15356   /* Make sure the instructions are ordered with lower memory access first.  */
15357   if (offsets[0] > offsets[1])
15358     {
15359       gap = offsets[0] - offsets[1];
15360       offset = offsets[1];
15361
15362       /* Swap the instructions such that lower memory is accessed first.  */
15363       std::swap (operands[0], operands[1]);
15364       std::swap (operands[2], operands[3]);
15365       if (const_store)
15366         std::swap (operands[4], operands[5]);
15367     }
15368   else
15369     {
15370       gap = offsets[1] - offsets[0];
15371       offset = offsets[0];
15372     }
15373
15374   /* Make sure accesses are to consecutive memory locations.  */
15375   if (gap != 4)
15376     return false;
15377
15378   /* Make sure we generate legal instructions.  */
15379   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15380                              false, load))
15381     return true;
15382
15383   /* In Thumb state, where registers are almost unconstrained, there
15384      is little hope to fix it.  */
15385   if (TARGET_THUMB2)
15386     return false;
15387
15388   if (load && commute)
15389     {
15390       /* Try reordering registers.  */
15391       std::swap (operands[0], operands[1]);
15392       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15393                                  false, load))
15394         return true;
15395     }
15396
15397   if (const_store)
15398     {
15399       /* If input registers are dead after this pattern, they can be
15400          reordered or replaced by other registers that are free in the
15401          current pattern.  */
15402       if (!peep2_reg_dead_p (4, operands[0])
15403           || !peep2_reg_dead_p (4, operands[1]))
15404         return false;
15405
15406       /* Try to reorder the input registers.  */
15407       /* For example, the code
15408            mov r0, 0
15409            mov r1, 1
15410            str r1, [r2]
15411            str r0, [r2, #4]
15412          can be transformed into
15413            mov r1, 0
15414            mov r0, 1
15415            strd r0, [r2]
15416       */
15417       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15418                                   false, false))
15419         {
15420           std::swap (operands[0], operands[1]);
15421           return true;
15422         }
15423
15424       /* Try to find a free DI register.  */
15425       CLEAR_HARD_REG_SET (regset);
15426       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15427       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15428       while (true)
15429         {
15430           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15431           if (tmp == NULL_RTX)
15432             return false;
15433
15434           /* DREG must be an even-numbered register in DImode.
15435              Split it into SI registers.  */
15436           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15437           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15438           gcc_assert (operands[0] != NULL_RTX);
15439           gcc_assert (operands[1] != NULL_RTX);
15440           gcc_assert (REGNO (operands[0]) % 2 == 0);
15441           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15442
15443           return (operands_ok_ldrd_strd (operands[0], operands[1],
15444                                          base, offset,
15445                                          false, load));
15446         }
15447     }
15448
15449   return false;
15450 }
15451
15452
15453
15454 \f
15455 /* Print a symbolic form of X to the debug file, F.  */
15456 static void
15457 arm_print_value (FILE *f, rtx x)
15458 {
15459   switch (GET_CODE (x))
15460     {
15461     case CONST_INT:
15462       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15463       return;
15464
15465     case CONST_DOUBLE:
15466       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15467       return;
15468
15469     case CONST_VECTOR:
15470       {
15471         int i;
15472
15473         fprintf (f, "<");
15474         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15475           {
15476             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15477             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15478               fputc (',', f);
15479           }
15480         fprintf (f, ">");
15481       }
15482       return;
15483
15484     case CONST_STRING:
15485       fprintf (f, "\"%s\"", XSTR (x, 0));
15486       return;
15487
15488     case SYMBOL_REF:
15489       fprintf (f, "`%s'", XSTR (x, 0));
15490       return;
15491
15492     case LABEL_REF:
15493       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15494       return;
15495
15496     case CONST:
15497       arm_print_value (f, XEXP (x, 0));
15498       return;
15499
15500     case PLUS:
15501       arm_print_value (f, XEXP (x, 0));
15502       fprintf (f, "+");
15503       arm_print_value (f, XEXP (x, 1));
15504       return;
15505
15506     case PC:
15507       fprintf (f, "pc");
15508       return;
15509
15510     default:
15511       fprintf (f, "????");
15512       return;
15513     }
15514 }
15515 \f
15516 /* Routines for manipulation of the constant pool.  */
15517
15518 /* Arm instructions cannot load a large constant directly into a
15519    register; they have to come from a pc relative load.  The constant
15520    must therefore be placed in the addressable range of the pc
15521    relative load.  Depending on the precise pc relative load
15522    instruction the range is somewhere between 256 bytes and 4k.  This
15523    means that we often have to dump a constant inside a function, and
15524    generate code to branch around it.
15525
15526    It is important to minimize this, since the branches will slow
15527    things down and make the code larger.
15528
15529    Normally we can hide the table after an existing unconditional
15530    branch so that there is no interruption of the flow, but in the
15531    worst case the code looks like this:
15532
15533         ldr     rn, L1
15534         ...
15535         b       L2
15536         align
15537         L1:     .long value
15538         L2:
15539         ...
15540
15541         ldr     rn, L3
15542         ...
15543         b       L4
15544         align
15545         L3:     .long value
15546         L4:
15547         ...
15548
15549    We fix this by performing a scan after scheduling, which notices
15550    which instructions need to have their operands fetched from the
15551    constant table and builds the table.
15552
15553    The algorithm starts by building a table of all the constants that
15554    need fixing up and all the natural barriers in the function (places
15555    where a constant table can be dropped without breaking the flow).
15556    For each fixup we note how far the pc-relative replacement will be
15557    able to reach and the offset of the instruction into the function.
15558
15559    Having built the table we then group the fixes together to form
15560    tables that are as large as possible (subject to addressing
15561    constraints) and emit each table of constants after the last
15562    barrier that is within range of all the instructions in the group.
15563    If a group does not contain a barrier, then we forcibly create one
15564    by inserting a jump instruction into the flow.  Once the table has
15565    been inserted, the insns are then modified to reference the
15566    relevant entry in the pool.
15567
15568    Possible enhancements to the algorithm (not implemented) are:
15569
15570    1) For some processors and object formats, there may be benefit in
15571    aligning the pools to the start of cache lines; this alignment
15572    would need to be taken into account when calculating addressability
15573    of a pool.  */
15574
15575 /* These typedefs are located at the start of this file, so that
15576    they can be used in the prototypes there.  This comment is to
15577    remind readers of that fact so that the following structures
15578    can be understood more easily.
15579
15580      typedef struct minipool_node    Mnode;
15581      typedef struct minipool_fixup   Mfix;  */
15582
15583 struct minipool_node
15584 {
15585   /* Doubly linked chain of entries.  */
15586   Mnode * next;
15587   Mnode * prev;
15588   /* The maximum offset into the code that this entry can be placed.  While
15589      pushing fixes for forward references, all entries are sorted in order
15590      of increasing max_address.  */
15591   HOST_WIDE_INT max_address;
15592   /* Similarly for an entry inserted for a backwards ref.  */
15593   HOST_WIDE_INT min_address;
15594   /* The number of fixes referencing this entry.  This can become zero
15595      if we "unpush" an entry.  In this case we ignore the entry when we
15596      come to emit the code.  */
15597   int refcount;
15598   /* The offset from the start of the minipool.  */
15599   HOST_WIDE_INT offset;
15600   /* The value in table.  */
15601   rtx value;
15602   /* The mode of value.  */
15603   machine_mode mode;
15604   /* The size of the value.  With iWMMXt enabled
15605      sizes > 4 also imply an alignment of 8-bytes.  */
15606   int fix_size;
15607 };
15608
15609 struct minipool_fixup
15610 {
15611   Mfix *            next;
15612   rtx_insn *        insn;
15613   HOST_WIDE_INT     address;
15614   rtx *             loc;
15615   machine_mode mode;
15616   int               fix_size;
15617   rtx               value;
15618   Mnode *           minipool;
15619   HOST_WIDE_INT     forwards;
15620   HOST_WIDE_INT     backwards;
15621 };
15622
15623 /* Fixes less than a word need padding out to a word boundary.  */
15624 #define MINIPOOL_FIX_SIZE(mode) \
15625   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15626
15627 static Mnode *  minipool_vector_head;
15628 static Mnode *  minipool_vector_tail;
15629 static rtx_code_label   *minipool_vector_label;
15630 static int      minipool_pad;
15631
15632 /* The linked list of all minipool fixes required for this function.  */
15633 Mfix *          minipool_fix_head;
15634 Mfix *          minipool_fix_tail;
15635 /* The fix entry for the current minipool, once it has been placed.  */
15636 Mfix *          minipool_barrier;
15637
15638 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15639 #define JUMP_TABLES_IN_TEXT_SECTION 0
15640 #endif
15641
15642 static HOST_WIDE_INT
15643 get_jump_table_size (rtx_jump_table_data *insn)
15644 {
15645   /* ADDR_VECs only take room if read-only data does into the text
15646      section.  */
15647   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15648     {
15649       rtx body = PATTERN (insn);
15650       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15651       HOST_WIDE_INT size;
15652       HOST_WIDE_INT modesize;
15653
15654       modesize = GET_MODE_SIZE (GET_MODE (body));
15655       size = modesize * XVECLEN (body, elt);
15656       switch (modesize)
15657         {
15658         case 1:
15659           /* Round up size  of TBB table to a halfword boundary.  */
15660           size = (size + 1) & ~HOST_WIDE_INT_1;
15661           break;
15662         case 2:
15663           /* No padding necessary for TBH.  */
15664           break;
15665         case 4:
15666           /* Add two bytes for alignment on Thumb.  */
15667           if (TARGET_THUMB)
15668             size += 2;
15669           break;
15670         default:
15671           gcc_unreachable ();
15672         }
15673       return size;
15674     }
15675
15676   return 0;
15677 }
15678
15679 /* Return the maximum amount of padding that will be inserted before
15680    label LABEL.  */
15681
15682 static HOST_WIDE_INT
15683 get_label_padding (rtx label)
15684 {
15685   HOST_WIDE_INT align, min_insn_size;
15686
15687   align = 1 << label_to_alignment (label);
15688   min_insn_size = TARGET_THUMB ? 2 : 4;
15689   return align > min_insn_size ? align - min_insn_size : 0;
15690 }
15691
15692 /* Move a minipool fix MP from its current location to before MAX_MP.
15693    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15694    constraints may need updating.  */
15695 static Mnode *
15696 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15697                                HOST_WIDE_INT max_address)
15698 {
15699   /* The code below assumes these are different.  */
15700   gcc_assert (mp != max_mp);
15701
15702   if (max_mp == NULL)
15703     {
15704       if (max_address < mp->max_address)
15705         mp->max_address = max_address;
15706     }
15707   else
15708     {
15709       if (max_address > max_mp->max_address - mp->fix_size)
15710         mp->max_address = max_mp->max_address - mp->fix_size;
15711       else
15712         mp->max_address = max_address;
15713
15714       /* Unlink MP from its current position.  Since max_mp is non-null,
15715        mp->prev must be non-null.  */
15716       mp->prev->next = mp->next;
15717       if (mp->next != NULL)
15718         mp->next->prev = mp->prev;
15719       else
15720         minipool_vector_tail = mp->prev;
15721
15722       /* Re-insert it before MAX_MP.  */
15723       mp->next = max_mp;
15724       mp->prev = max_mp->prev;
15725       max_mp->prev = mp;
15726
15727       if (mp->prev != NULL)
15728         mp->prev->next = mp;
15729       else
15730         minipool_vector_head = mp;
15731     }
15732
15733   /* Save the new entry.  */
15734   max_mp = mp;
15735
15736   /* Scan over the preceding entries and adjust their addresses as
15737      required.  */
15738   while (mp->prev != NULL
15739          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15740     {
15741       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15742       mp = mp->prev;
15743     }
15744
15745   return max_mp;
15746 }
15747
15748 /* Add a constant to the minipool for a forward reference.  Returns the
15749    node added or NULL if the constant will not fit in this pool.  */
15750 static Mnode *
15751 add_minipool_forward_ref (Mfix *fix)
15752 {
15753   /* If set, max_mp is the first pool_entry that has a lower
15754      constraint than the one we are trying to add.  */
15755   Mnode *       max_mp = NULL;
15756   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15757   Mnode *       mp;
15758
15759   /* If the minipool starts before the end of FIX->INSN then this FIX
15760      can not be placed into the current pool.  Furthermore, adding the
15761      new constant pool entry may cause the pool to start FIX_SIZE bytes
15762      earlier.  */
15763   if (minipool_vector_head &&
15764       (fix->address + get_attr_length (fix->insn)
15765        >= minipool_vector_head->max_address - fix->fix_size))
15766     return NULL;
15767
15768   /* Scan the pool to see if a constant with the same value has
15769      already been added.  While we are doing this, also note the
15770      location where we must insert the constant if it doesn't already
15771      exist.  */
15772   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15773     {
15774       if (GET_CODE (fix->value) == GET_CODE (mp->value)
15775           && fix->mode == mp->mode
15776           && (!LABEL_P (fix->value)
15777               || (CODE_LABEL_NUMBER (fix->value)
15778                   == CODE_LABEL_NUMBER (mp->value)))
15779           && rtx_equal_p (fix->value, mp->value))
15780         {
15781           /* More than one fix references this entry.  */
15782           mp->refcount++;
15783           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15784         }
15785
15786       /* Note the insertion point if necessary.  */
15787       if (max_mp == NULL
15788           && mp->max_address > max_address)
15789         max_mp = mp;
15790
15791       /* If we are inserting an 8-bytes aligned quantity and
15792          we have not already found an insertion point, then
15793          make sure that all such 8-byte aligned quantities are
15794          placed at the start of the pool.  */
15795       if (ARM_DOUBLEWORD_ALIGN
15796           && max_mp == NULL
15797           && fix->fix_size >= 8
15798           && mp->fix_size < 8)
15799         {
15800           max_mp = mp;
15801           max_address = mp->max_address;
15802         }
15803     }
15804
15805   /* The value is not currently in the minipool, so we need to create
15806      a new entry for it.  If MAX_MP is NULL, the entry will be put on
15807      the end of the list since the placement is less constrained than
15808      any existing entry.  Otherwise, we insert the new fix before
15809      MAX_MP and, if necessary, adjust the constraints on the other
15810      entries.  */
15811   mp = XNEW (Mnode);
15812   mp->fix_size = fix->fix_size;
15813   mp->mode = fix->mode;
15814   mp->value = fix->value;
15815   mp->refcount = 1;
15816   /* Not yet required for a backwards ref.  */
15817   mp->min_address = -65536;
15818
15819   if (max_mp == NULL)
15820     {
15821       mp->max_address = max_address;
15822       mp->next = NULL;
15823       mp->prev = minipool_vector_tail;
15824
15825       if (mp->prev == NULL)
15826         {
15827           minipool_vector_head = mp;
15828           minipool_vector_label = gen_label_rtx ();
15829         }
15830       else
15831         mp->prev->next = mp;
15832
15833       minipool_vector_tail = mp;
15834     }
15835   else
15836     {
15837       if (max_address > max_mp->max_address - mp->fix_size)
15838         mp->max_address = max_mp->max_address - mp->fix_size;
15839       else
15840         mp->max_address = max_address;
15841
15842       mp->next = max_mp;
15843       mp->prev = max_mp->prev;
15844       max_mp->prev = mp;
15845       if (mp->prev != NULL)
15846         mp->prev->next = mp;
15847       else
15848         minipool_vector_head = mp;
15849     }
15850
15851   /* Save the new entry.  */
15852   max_mp = mp;
15853
15854   /* Scan over the preceding entries and adjust their addresses as
15855      required.  */
15856   while (mp->prev != NULL
15857          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15858     {
15859       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15860       mp = mp->prev;
15861     }
15862
15863   return max_mp;
15864 }
15865
15866 static Mnode *
15867 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15868                                 HOST_WIDE_INT  min_address)
15869 {
15870   HOST_WIDE_INT offset;
15871
15872   /* The code below assumes these are different.  */
15873   gcc_assert (mp != min_mp);
15874
15875   if (min_mp == NULL)
15876     {
15877       if (min_address > mp->min_address)
15878         mp->min_address = min_address;
15879     }
15880   else
15881     {
15882       /* We will adjust this below if it is too loose.  */
15883       mp->min_address = min_address;
15884
15885       /* Unlink MP from its current position.  Since min_mp is non-null,
15886          mp->next must be non-null.  */
15887       mp->next->prev = mp->prev;
15888       if (mp->prev != NULL)
15889         mp->prev->next = mp->next;
15890       else
15891         minipool_vector_head = mp->next;
15892
15893       /* Reinsert it after MIN_MP.  */
15894       mp->prev = min_mp;
15895       mp->next = min_mp->next;
15896       min_mp->next = mp;
15897       if (mp->next != NULL)
15898         mp->next->prev = mp;
15899       else
15900         minipool_vector_tail = mp;
15901     }
15902
15903   min_mp = mp;
15904
15905   offset = 0;
15906   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15907     {
15908       mp->offset = offset;
15909       if (mp->refcount > 0)
15910         offset += mp->fix_size;
15911
15912       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
15913         mp->next->min_address = mp->min_address + mp->fix_size;
15914     }
15915
15916   return min_mp;
15917 }
15918
15919 /* Add a constant to the minipool for a backward reference.  Returns the
15920    node added or NULL if the constant will not fit in this pool.
15921
15922    Note that the code for insertion for a backwards reference can be
15923    somewhat confusing because the calculated offsets for each fix do
15924    not take into account the size of the pool (which is still under
15925    construction.  */
15926 static Mnode *
15927 add_minipool_backward_ref (Mfix *fix)
15928 {
15929   /* If set, min_mp is the last pool_entry that has a lower constraint
15930      than the one we are trying to add.  */
15931   Mnode *min_mp = NULL;
15932   /* This can be negative, since it is only a constraint.  */
15933   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
15934   Mnode *mp;
15935
15936   /* If we can't reach the current pool from this insn, or if we can't
15937      insert this entry at the end of the pool without pushing other
15938      fixes out of range, then we don't try.  This ensures that we
15939      can't fail later on.  */
15940   if (min_address >= minipool_barrier->address
15941       || (minipool_vector_tail->min_address + fix->fix_size
15942           >= minipool_barrier->address))
15943     return NULL;
15944
15945   /* Scan the pool to see if a constant with the same value has
15946      already been added.  While we are doing this, also note the
15947      location where we must insert the constant if it doesn't already
15948      exist.  */
15949   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
15950     {
15951       if (GET_CODE (fix->value) == GET_CODE (mp->value)
15952           && fix->mode == mp->mode
15953           && (!LABEL_P (fix->value)
15954               || (CODE_LABEL_NUMBER (fix->value)
15955                   == CODE_LABEL_NUMBER (mp->value)))
15956           && rtx_equal_p (fix->value, mp->value)
15957           /* Check that there is enough slack to move this entry to the
15958              end of the table (this is conservative).  */
15959           && (mp->max_address
15960               > (minipool_barrier->address
15961                  + minipool_vector_tail->offset
15962                  + minipool_vector_tail->fix_size)))
15963         {
15964           mp->refcount++;
15965           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
15966         }
15967
15968       if (min_mp != NULL)
15969         mp->min_address += fix->fix_size;
15970       else
15971         {
15972           /* Note the insertion point if necessary.  */
15973           if (mp->min_address < min_address)
15974             {
15975               /* For now, we do not allow the insertion of 8-byte alignment
15976                  requiring nodes anywhere but at the start of the pool.  */
15977               if (ARM_DOUBLEWORD_ALIGN
15978                   && fix->fix_size >= 8 && mp->fix_size < 8)
15979                 return NULL;
15980               else
15981                 min_mp = mp;
15982             }
15983           else if (mp->max_address
15984                    < minipool_barrier->address + mp->offset + fix->fix_size)
15985             {
15986               /* Inserting before this entry would push the fix beyond
15987                  its maximum address (which can happen if we have
15988                  re-located a forwards fix); force the new fix to come
15989                  after it.  */
15990               if (ARM_DOUBLEWORD_ALIGN
15991                   && fix->fix_size >= 8 && mp->fix_size < 8)
15992                 return NULL;
15993               else
15994                 {
15995                   min_mp = mp;
15996                   min_address = mp->min_address + fix->fix_size;
15997                 }
15998             }
15999           /* Do not insert a non-8-byte aligned quantity before 8-byte
16000              aligned quantities.  */
16001           else if (ARM_DOUBLEWORD_ALIGN
16002                    && fix->fix_size < 8
16003                    && mp->fix_size >= 8)
16004             {
16005               min_mp = mp;
16006               min_address = mp->min_address + fix->fix_size;
16007             }
16008         }
16009     }
16010
16011   /* We need to create a new entry.  */
16012   mp = XNEW (Mnode);
16013   mp->fix_size = fix->fix_size;
16014   mp->mode = fix->mode;
16015   mp->value = fix->value;
16016   mp->refcount = 1;
16017   mp->max_address = minipool_barrier->address + 65536;
16018
16019   mp->min_address = min_address;
16020
16021   if (min_mp == NULL)
16022     {
16023       mp->prev = NULL;
16024       mp->next = minipool_vector_head;
16025
16026       if (mp->next == NULL)
16027         {
16028           minipool_vector_tail = mp;
16029           minipool_vector_label = gen_label_rtx ();
16030         }
16031       else
16032         mp->next->prev = mp;
16033
16034       minipool_vector_head = mp;
16035     }
16036   else
16037     {
16038       mp->next = min_mp->next;
16039       mp->prev = min_mp;
16040       min_mp->next = mp;
16041
16042       if (mp->next != NULL)
16043         mp->next->prev = mp;
16044       else
16045         minipool_vector_tail = mp;
16046     }
16047
16048   /* Save the new entry.  */
16049   min_mp = mp;
16050
16051   if (mp->prev)
16052     mp = mp->prev;
16053   else
16054     mp->offset = 0;
16055
16056   /* Scan over the following entries and adjust their offsets.  */
16057   while (mp->next != NULL)
16058     {
16059       if (mp->next->min_address < mp->min_address + mp->fix_size)
16060         mp->next->min_address = mp->min_address + mp->fix_size;
16061
16062       if (mp->refcount)
16063         mp->next->offset = mp->offset + mp->fix_size;
16064       else
16065         mp->next->offset = mp->offset;
16066
16067       mp = mp->next;
16068     }
16069
16070   return min_mp;
16071 }
16072
16073 static void
16074 assign_minipool_offsets (Mfix *barrier)
16075 {
16076   HOST_WIDE_INT offset = 0;
16077   Mnode *mp;
16078
16079   minipool_barrier = barrier;
16080
16081   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16082     {
16083       mp->offset = offset;
16084
16085       if (mp->refcount > 0)
16086         offset += mp->fix_size;
16087     }
16088 }
16089
16090 /* Output the literal table */
16091 static void
16092 dump_minipool (rtx_insn *scan)
16093 {
16094   Mnode * mp;
16095   Mnode * nmp;
16096   int align64 = 0;
16097
16098   if (ARM_DOUBLEWORD_ALIGN)
16099     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16100       if (mp->refcount > 0 && mp->fix_size >= 8)
16101         {
16102           align64 = 1;
16103           break;
16104         }
16105
16106   if (dump_file)
16107     fprintf (dump_file,
16108              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16109              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16110
16111   scan = emit_label_after (gen_label_rtx (), scan);
16112   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16113   scan = emit_label_after (minipool_vector_label, scan);
16114
16115   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16116     {
16117       if (mp->refcount > 0)
16118         {
16119           if (dump_file)
16120             {
16121               fprintf (dump_file,
16122                        ";;  Offset %u, min %ld, max %ld ",
16123                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16124                        (unsigned long) mp->max_address);
16125               arm_print_value (dump_file, mp->value);
16126               fputc ('\n', dump_file);
16127             }
16128
16129           rtx val = copy_rtx (mp->value);
16130
16131           switch (GET_MODE_SIZE (mp->mode))
16132             {
16133 #ifdef HAVE_consttable_1
16134             case 1:
16135               scan = emit_insn_after (gen_consttable_1 (val), scan);
16136               break;
16137
16138 #endif
16139 #ifdef HAVE_consttable_2
16140             case 2:
16141               scan = emit_insn_after (gen_consttable_2 (val), scan);
16142               break;
16143
16144 #endif
16145 #ifdef HAVE_consttable_4
16146             case 4:
16147               scan = emit_insn_after (gen_consttable_4 (val), scan);
16148               break;
16149
16150 #endif
16151 #ifdef HAVE_consttable_8
16152             case 8:
16153               scan = emit_insn_after (gen_consttable_8 (val), scan);
16154               break;
16155
16156 #endif
16157 #ifdef HAVE_consttable_16
16158             case 16:
16159               scan = emit_insn_after (gen_consttable_16 (val), scan);
16160               break;
16161
16162 #endif
16163             default:
16164               gcc_unreachable ();
16165             }
16166         }
16167
16168       nmp = mp->next;
16169       free (mp);
16170     }
16171
16172   minipool_vector_head = minipool_vector_tail = NULL;
16173   scan = emit_insn_after (gen_consttable_end (), scan);
16174   scan = emit_barrier_after (scan);
16175 }
16176
16177 /* Return the cost of forcibly inserting a barrier after INSN.  */
16178 static int
16179 arm_barrier_cost (rtx_insn *insn)
16180 {
16181   /* Basing the location of the pool on the loop depth is preferable,
16182      but at the moment, the basic block information seems to be
16183      corrupt by this stage of the compilation.  */
16184   int base_cost = 50;
16185   rtx_insn *next = next_nonnote_insn (insn);
16186
16187   if (next != NULL && LABEL_P (next))
16188     base_cost -= 20;
16189
16190   switch (GET_CODE (insn))
16191     {
16192     case CODE_LABEL:
16193       /* It will always be better to place the table before the label, rather
16194          than after it.  */
16195       return 50;
16196
16197     case INSN:
16198     case CALL_INSN:
16199       return base_cost;
16200
16201     case JUMP_INSN:
16202       return base_cost - 10;
16203
16204     default:
16205       return base_cost + 10;
16206     }
16207 }
16208
16209 /* Find the best place in the insn stream in the range
16210    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16211    Create the barrier by inserting a jump and add a new fix entry for
16212    it.  */
16213 static Mfix *
16214 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16215 {
16216   HOST_WIDE_INT count = 0;
16217   rtx_barrier *barrier;
16218   rtx_insn *from = fix->insn;
16219   /* The instruction after which we will insert the jump.  */
16220   rtx_insn *selected = NULL;
16221   int selected_cost;
16222   /* The address at which the jump instruction will be placed.  */
16223   HOST_WIDE_INT selected_address;
16224   Mfix * new_fix;
16225   HOST_WIDE_INT max_count = max_address - fix->address;
16226   rtx_code_label *label = gen_label_rtx ();
16227
16228   selected_cost = arm_barrier_cost (from);
16229   selected_address = fix->address;
16230
16231   while (from && count < max_count)
16232     {
16233       rtx_jump_table_data *tmp;
16234       int new_cost;
16235
16236       /* This code shouldn't have been called if there was a natural barrier
16237          within range.  */
16238       gcc_assert (!BARRIER_P (from));
16239
16240       /* Count the length of this insn.  This must stay in sync with the
16241          code that pushes minipool fixes.  */
16242       if (LABEL_P (from))
16243         count += get_label_padding (from);
16244       else
16245         count += get_attr_length (from);
16246
16247       /* If there is a jump table, add its length.  */
16248       if (tablejump_p (from, NULL, &tmp))
16249         {
16250           count += get_jump_table_size (tmp);
16251
16252           /* Jump tables aren't in a basic block, so base the cost on
16253              the dispatch insn.  If we select this location, we will
16254              still put the pool after the table.  */
16255           new_cost = arm_barrier_cost (from);
16256
16257           if (count < max_count
16258               && (!selected || new_cost <= selected_cost))
16259             {
16260               selected = tmp;
16261               selected_cost = new_cost;
16262               selected_address = fix->address + count;
16263             }
16264
16265           /* Continue after the dispatch table.  */
16266           from = NEXT_INSN (tmp);
16267           continue;
16268         }
16269
16270       new_cost = arm_barrier_cost (from);
16271
16272       if (count < max_count
16273           && (!selected || new_cost <= selected_cost))
16274         {
16275           selected = from;
16276           selected_cost = new_cost;
16277           selected_address = fix->address + count;
16278         }
16279
16280       from = NEXT_INSN (from);
16281     }
16282
16283   /* Make sure that we found a place to insert the jump.  */
16284   gcc_assert (selected);
16285
16286   /* Make sure we do not split a call and its corresponding
16287      CALL_ARG_LOCATION note.  */
16288   if (CALL_P (selected))
16289     {
16290       rtx_insn *next = NEXT_INSN (selected);
16291       if (next && NOTE_P (next)
16292           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16293           selected = next;
16294     }
16295
16296   /* Create a new JUMP_INSN that branches around a barrier.  */
16297   from = emit_jump_insn_after (gen_jump (label), selected);
16298   JUMP_LABEL (from) = label;
16299   barrier = emit_barrier_after (from);
16300   emit_label_after (label, barrier);
16301
16302   /* Create a minipool barrier entry for the new barrier.  */
16303   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16304   new_fix->insn = barrier;
16305   new_fix->address = selected_address;
16306   new_fix->next = fix->next;
16307   fix->next = new_fix;
16308
16309   return new_fix;
16310 }
16311
16312 /* Record that there is a natural barrier in the insn stream at
16313    ADDRESS.  */
16314 static void
16315 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16316 {
16317   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16318
16319   fix->insn = insn;
16320   fix->address = address;
16321
16322   fix->next = NULL;
16323   if (minipool_fix_head != NULL)
16324     minipool_fix_tail->next = fix;
16325   else
16326     minipool_fix_head = fix;
16327
16328   minipool_fix_tail = fix;
16329 }
16330
16331 /* Record INSN, which will need fixing up to load a value from the
16332    minipool.  ADDRESS is the offset of the insn since the start of the
16333    function; LOC is a pointer to the part of the insn which requires
16334    fixing; VALUE is the constant that must be loaded, which is of type
16335    MODE.  */
16336 static void
16337 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16338                    machine_mode mode, rtx value)
16339 {
16340   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16341
16342   fix->insn = insn;
16343   fix->address = address;
16344   fix->loc = loc;
16345   fix->mode = mode;
16346   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16347   fix->value = value;
16348   fix->forwards = get_attr_pool_range (insn);
16349   fix->backwards = get_attr_neg_pool_range (insn);
16350   fix->minipool = NULL;
16351
16352   /* If an insn doesn't have a range defined for it, then it isn't
16353      expecting to be reworked by this code.  Better to stop now than
16354      to generate duff assembly code.  */
16355   gcc_assert (fix->forwards || fix->backwards);
16356
16357   /* If an entry requires 8-byte alignment then assume all constant pools
16358      require 4 bytes of padding.  Trying to do this later on a per-pool
16359      basis is awkward because existing pool entries have to be modified.  */
16360   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16361     minipool_pad = 4;
16362
16363   if (dump_file)
16364     {
16365       fprintf (dump_file,
16366                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16367                GET_MODE_NAME (mode),
16368                INSN_UID (insn), (unsigned long) address,
16369                -1 * (long)fix->backwards, (long)fix->forwards);
16370       arm_print_value (dump_file, fix->value);
16371       fprintf (dump_file, "\n");
16372     }
16373
16374   /* Add it to the chain of fixes.  */
16375   fix->next = NULL;
16376
16377   if (minipool_fix_head != NULL)
16378     minipool_fix_tail->next = fix;
16379   else
16380     minipool_fix_head = fix;
16381
16382   minipool_fix_tail = fix;
16383 }
16384
16385 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16386    Returns the number of insns needed, or 99 if we always want to synthesize
16387    the value.  */
16388 int
16389 arm_max_const_double_inline_cost ()
16390 {
16391   /* Let the value get synthesized to avoid the use of literal pools.  */
16392   if (arm_disable_literal_pool)
16393     return 99;
16394
16395   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16396 }
16397
16398 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16399    Returns the number of insns needed, or 99 if we don't know how to
16400    do it.  */
16401 int
16402 arm_const_double_inline_cost (rtx val)
16403 {
16404   rtx lowpart, highpart;
16405   machine_mode mode;
16406
16407   mode = GET_MODE (val);
16408
16409   if (mode == VOIDmode)
16410     mode = DImode;
16411
16412   gcc_assert (GET_MODE_SIZE (mode) == 8);
16413
16414   lowpart = gen_lowpart (SImode, val);
16415   highpart = gen_highpart_mode (SImode, mode, val);
16416
16417   gcc_assert (CONST_INT_P (lowpart));
16418   gcc_assert (CONST_INT_P (highpart));
16419
16420   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16421                             NULL_RTX, NULL_RTX, 0, 0)
16422           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16423                               NULL_RTX, NULL_RTX, 0, 0));
16424 }
16425
16426 /* Cost of loading a SImode constant.  */
16427 static inline int
16428 arm_const_inline_cost (enum rtx_code code, rtx val)
16429 {
16430   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16431                            NULL_RTX, NULL_RTX, 1, 0);
16432 }
16433
16434 /* Return true if it is worthwhile to split a 64-bit constant into two
16435    32-bit operations.  This is the case if optimizing for size, or
16436    if we have load delay slots, or if one 32-bit part can be done with
16437    a single data operation.  */
16438 bool
16439 arm_const_double_by_parts (rtx val)
16440 {
16441   machine_mode mode = GET_MODE (val);
16442   rtx part;
16443
16444   if (optimize_size || arm_ld_sched)
16445     return true;
16446
16447   if (mode == VOIDmode)
16448     mode = DImode;
16449
16450   part = gen_highpart_mode (SImode, mode, val);
16451
16452   gcc_assert (CONST_INT_P (part));
16453
16454   if (const_ok_for_arm (INTVAL (part))
16455       || const_ok_for_arm (~INTVAL (part)))
16456     return true;
16457
16458   part = gen_lowpart (SImode, val);
16459
16460   gcc_assert (CONST_INT_P (part));
16461
16462   if (const_ok_for_arm (INTVAL (part))
16463       || const_ok_for_arm (~INTVAL (part)))
16464     return true;
16465
16466   return false;
16467 }
16468
16469 /* Return true if it is possible to inline both the high and low parts
16470    of a 64-bit constant into 32-bit data processing instructions.  */
16471 bool
16472 arm_const_double_by_immediates (rtx val)
16473 {
16474   machine_mode mode = GET_MODE (val);
16475   rtx part;
16476
16477   if (mode == VOIDmode)
16478     mode = DImode;
16479
16480   part = gen_highpart_mode (SImode, mode, val);
16481
16482   gcc_assert (CONST_INT_P (part));
16483
16484   if (!const_ok_for_arm (INTVAL (part)))
16485     return false;
16486
16487   part = gen_lowpart (SImode, val);
16488
16489   gcc_assert (CONST_INT_P (part));
16490
16491   if (!const_ok_for_arm (INTVAL (part)))
16492     return false;
16493
16494   return true;
16495 }
16496
16497 /* Scan INSN and note any of its operands that need fixing.
16498    If DO_PUSHES is false we do not actually push any of the fixups
16499    needed.  */
16500 static void
16501 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16502 {
16503   int opno;
16504
16505   extract_constrain_insn (insn);
16506
16507   if (recog_data.n_alternatives == 0)
16508     return;
16509
16510   /* Fill in recog_op_alt with information about the constraints of
16511      this insn.  */
16512   preprocess_constraints (insn);
16513
16514   const operand_alternative *op_alt = which_op_alt ();
16515   for (opno = 0; opno < recog_data.n_operands; opno++)
16516     {
16517       /* Things we need to fix can only occur in inputs.  */
16518       if (recog_data.operand_type[opno] != OP_IN)
16519         continue;
16520
16521       /* If this alternative is a memory reference, then any mention
16522          of constants in this alternative is really to fool reload
16523          into allowing us to accept one there.  We need to fix them up
16524          now so that we output the right code.  */
16525       if (op_alt[opno].memory_ok)
16526         {
16527           rtx op = recog_data.operand[opno];
16528
16529           if (CONSTANT_P (op))
16530             {
16531               if (do_pushes)
16532                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16533                                    recog_data.operand_mode[opno], op);
16534             }
16535           else if (MEM_P (op)
16536                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16537                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16538             {
16539               if (do_pushes)
16540                 {
16541                   rtx cop = avoid_constant_pool_reference (op);
16542
16543                   /* Casting the address of something to a mode narrower
16544                      than a word can cause avoid_constant_pool_reference()
16545                      to return the pool reference itself.  That's no good to
16546                      us here.  Lets just hope that we can use the
16547                      constant pool value directly.  */
16548                   if (op == cop)
16549                     cop = get_pool_constant (XEXP (op, 0));
16550
16551                   push_minipool_fix (insn, address,
16552                                      recog_data.operand_loc[opno],
16553                                      recog_data.operand_mode[opno], cop);
16554                 }
16555
16556             }
16557         }
16558     }
16559
16560   return;
16561 }
16562
16563 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16564    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16565    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16566    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16567    or four masks, depending on whether it is being computed for a
16568    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16569    respectively.  The tree for the type of the argument or a field within an
16570    argument is passed in ARG_TYPE, the current register this argument or field
16571    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16572    argument or field starts at is passed in STARTING_BIT and the last used bit
16573    is kept in LAST_USED_BIT which is also updated accordingly.  */
16574
16575 static unsigned HOST_WIDE_INT
16576 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16577                                uint32_t * padding_bits_to_clear,
16578                                unsigned starting_bit, int * last_used_bit)
16579
16580 {
16581   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16582
16583   if (TREE_CODE (arg_type) == RECORD_TYPE)
16584     {
16585       unsigned current_bit = starting_bit;
16586       tree field;
16587       long int offset, size;
16588
16589
16590       field = TYPE_FIELDS (arg_type);
16591       while (field)
16592         {
16593           /* The offset within a structure is always an offset from
16594              the start of that structure.  Make sure we take that into the
16595              calculation of the register based offset that we use here.  */
16596           offset = starting_bit;
16597           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16598           offset %= 32;
16599
16600           /* This is the actual size of the field, for bitfields this is the
16601              bitfield width and not the container size.  */
16602           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16603
16604           if (*last_used_bit != offset)
16605             {
16606               if (offset < *last_used_bit)
16607                 {
16608                   /* This field's offset is before the 'last_used_bit', that
16609                      means this field goes on the next register.  So we need to
16610                      pad the rest of the current register and increase the
16611                      register number.  */
16612                   uint32_t mask;
16613                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16614                   mask++;
16615
16616                   padding_bits_to_clear[*regno] |= mask;
16617                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16618                   (*regno)++;
16619                 }
16620               else
16621                 {
16622                   /* Otherwise we pad the bits between the last field's end and
16623                      the start of the new field.  */
16624                   uint32_t mask;
16625
16626                   mask = ((uint32_t)-1) >> (32 - offset);
16627                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16628                   padding_bits_to_clear[*regno] |= mask;
16629                 }
16630               current_bit = offset;
16631             }
16632
16633           /* Calculate further padding bits for inner structs/unions too.  */
16634           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16635             {
16636               *last_used_bit = current_bit;
16637               not_to_clear_reg_mask
16638                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16639                                                   padding_bits_to_clear, offset,
16640                                                   last_used_bit);
16641             }
16642           else
16643             {
16644               /* Update 'current_bit' with this field's size.  If the
16645                  'current_bit' lies in a subsequent register, update 'regno' and
16646                  reset 'current_bit' to point to the current bit in that new
16647                  register.  */
16648               current_bit += size;
16649               while (current_bit >= 32)
16650                 {
16651                   current_bit-=32;
16652                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16653                   (*regno)++;
16654                 }
16655               *last_used_bit = current_bit;
16656             }
16657
16658           field = TREE_CHAIN (field);
16659         }
16660       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16661     }
16662   else if (TREE_CODE (arg_type) == UNION_TYPE)
16663     {
16664       tree field, field_t;
16665       int i, regno_t, field_size;
16666       int max_reg = -1;
16667       int max_bit = -1;
16668       uint32_t mask;
16669       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16670         = {-1, -1, -1, -1};
16671
16672       /* To compute the padding bits in a union we only consider bits as
16673          padding bits if they are always either a padding bit or fall outside a
16674          fields size for all fields in the union.  */
16675       field = TYPE_FIELDS (arg_type);
16676       while (field)
16677         {
16678           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16679             = {0U, 0U, 0U, 0U};
16680           int last_used_bit_t = *last_used_bit;
16681           regno_t = *regno;
16682           field_t = TREE_TYPE (field);
16683
16684           /* If the field's type is either a record or a union make sure to
16685              compute their padding bits too.  */
16686           if (RECORD_OR_UNION_TYPE_P (field_t))
16687             not_to_clear_reg_mask
16688               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16689                                                 &padding_bits_to_clear_t[0],
16690                                                 starting_bit, &last_used_bit_t);
16691           else
16692             {
16693               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16694               regno_t = (field_size / 32) + *regno;
16695               last_used_bit_t = (starting_bit + field_size) % 32;
16696             }
16697
16698           for (i = *regno; i < regno_t; i++)
16699             {
16700               /* For all but the last register used by this field only keep the
16701                  padding bits that were padding bits in this field.  */
16702               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16703             }
16704
16705             /* For the last register, keep all padding bits that were padding
16706                bits in this field and any padding bits that are still valid
16707                as padding bits but fall outside of this field's size.  */
16708             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16709             padding_bits_to_clear_res[regno_t]
16710               &= padding_bits_to_clear_t[regno_t] | mask;
16711
16712           /* Update the maximum size of the fields in terms of registers used
16713              ('max_reg') and the 'last_used_bit' in said register.  */
16714           if (max_reg < regno_t)
16715             {
16716               max_reg = regno_t;
16717               max_bit = last_used_bit_t;
16718             }
16719           else if (max_reg == regno_t && max_bit < last_used_bit_t)
16720             max_bit = last_used_bit_t;
16721
16722           field = TREE_CHAIN (field);
16723         }
16724
16725       /* Update the current padding_bits_to_clear using the intersection of the
16726          padding bits of all the fields.  */
16727       for (i=*regno; i < max_reg; i++)
16728         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16729
16730       /* Do not keep trailing padding bits, we do not know yet whether this
16731          is the end of the argument.  */
16732       mask = ((uint32_t) 1 << max_bit) - 1;
16733       padding_bits_to_clear[max_reg]
16734         |= padding_bits_to_clear_res[max_reg] & mask;
16735
16736       *regno = max_reg;
16737       *last_used_bit = max_bit;
16738     }
16739   else
16740     /* This function should only be used for structs and unions.  */
16741     gcc_unreachable ();
16742
16743   return not_to_clear_reg_mask;
16744 }
16745
16746 /* In the context of ARMv8-M Security Extensions, this function is used for both
16747    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16748    registers are used when returning or passing arguments, which is then
16749    returned as a mask.  It will also compute a mask to indicate padding/unused
16750    bits for each of these registers, and passes this through the
16751    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
16752    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16753    the starting register used to pass this argument or return value is passed
16754    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16755    for struct and union types.  */
16756
16757 static unsigned HOST_WIDE_INT
16758 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16759                              uint32_t * padding_bits_to_clear)
16760
16761 {
16762   int last_used_bit = 0;
16763   unsigned HOST_WIDE_INT not_to_clear_mask;
16764
16765   if (RECORD_OR_UNION_TYPE_P (arg_type))
16766     {
16767       not_to_clear_mask
16768         = comp_not_to_clear_mask_str_un (arg_type, &regno,
16769                                          padding_bits_to_clear, 0,
16770                                          &last_used_bit);
16771
16772
16773       /* If the 'last_used_bit' is not zero, that means we are still using a
16774          part of the last 'regno'.  In such cases we must clear the trailing
16775          bits.  Otherwise we are not using regno and we should mark it as to
16776          clear.  */
16777       if (last_used_bit != 0)
16778         padding_bits_to_clear[regno]
16779           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16780       else
16781         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16782     }
16783   else
16784     {
16785       not_to_clear_mask = 0;
16786       /* We are not dealing with structs nor unions.  So these arguments may be
16787          passed in floating point registers too.  In some cases a BLKmode is
16788          used when returning or passing arguments in multiple VFP registers.  */
16789       if (GET_MODE (arg_rtx) == BLKmode)
16790         {
16791           int i, arg_regs;
16792           rtx reg;
16793
16794           /* This should really only occur when dealing with the hard-float
16795              ABI.  */
16796           gcc_assert (TARGET_HARD_FLOAT_ABI);
16797
16798           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16799             {
16800               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16801               gcc_assert (REG_P (reg));
16802
16803               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16804
16805               /* If we are dealing with DF mode, make sure we don't
16806                  clear either of the registers it addresses.  */
16807               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16808               if (arg_regs > 1)
16809                 {
16810                   unsigned HOST_WIDE_INT mask;
16811                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16812                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
16813                   not_to_clear_mask |= mask;
16814                 }
16815             }
16816         }
16817       else
16818         {
16819           /* Otherwise we can rely on the MODE to determine how many registers
16820              are being used by this argument.  */
16821           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16822           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16823           if (arg_regs > 1)
16824             {
16825               unsigned HOST_WIDE_INT
16826               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16827               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16828               not_to_clear_mask |= mask;
16829             }
16830         }
16831     }
16832
16833   return not_to_clear_mask;
16834 }
16835
16836 /* Saves callee saved registers, clears callee saved registers and caller saved
16837    registers not used to pass arguments before a cmse_nonsecure_call.  And
16838    restores the callee saved registers after.  */
16839
16840 static void
16841 cmse_nonsecure_call_clear_caller_saved (void)
16842 {
16843   basic_block bb;
16844
16845   FOR_EACH_BB_FN (bb, cfun)
16846     {
16847       rtx_insn *insn;
16848
16849       FOR_BB_INSNS (bb, insn)
16850         {
16851           uint64_t to_clear_mask, float_mask;
16852           rtx_insn *seq;
16853           rtx pat, call, unspec, reg, cleared_reg, tmp;
16854           unsigned int regno, maxregno;
16855           rtx address;
16856           CUMULATIVE_ARGS args_so_far_v;
16857           cumulative_args_t args_so_far;
16858           tree arg_type, fntype;
16859           bool using_r4, first_param = true;
16860           function_args_iterator args_iter;
16861           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16862           uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16863
16864           if (!NONDEBUG_INSN_P (insn))
16865             continue;
16866
16867           if (!CALL_P (insn))
16868             continue;
16869
16870           pat = PATTERN (insn);
16871           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16872           call = XVECEXP (pat, 0, 0);
16873
16874           /* Get the real call RTX if the insn sets a value, ie. returns.  */
16875           if (GET_CODE (call) == SET)
16876               call = SET_SRC (call);
16877
16878           /* Check if it is a cmse_nonsecure_call.  */
16879           unspec = XEXP (call, 0);
16880           if (GET_CODE (unspec) != UNSPEC
16881               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16882             continue;
16883
16884           /* Determine the caller-saved registers we need to clear.  */
16885           to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16886           maxregno = NUM_ARG_REGS - 1;
16887           /* Only look at the caller-saved floating point registers in case of
16888              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
16889              lazy store and loads which clear both caller- and callee-saved
16890              registers.  */
16891           if (TARGET_HARD_FLOAT_ABI)
16892             {
16893               float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16894               float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16895               to_clear_mask |= float_mask;
16896               maxregno = D7_VFP_REGNUM;
16897             }
16898
16899           /* Make sure the register used to hold the function address is not
16900              cleared.  */
16901           address = RTVEC_ELT (XVEC (unspec, 0), 0);
16902           gcc_assert (MEM_P (address));
16903           gcc_assert (REG_P (XEXP (address, 0)));
16904           to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
16905
16906           /* Set basic block of call insn so that df rescan is performed on
16907              insns inserted here.  */
16908           set_block_for_insn (insn, bb);
16909           df_set_flags (DF_DEFER_INSN_RESCAN);
16910           start_sequence ();
16911
16912           /* Make sure the scheduler doesn't schedule other insns beyond
16913              here.  */
16914           emit_insn (gen_blockage ());
16915
16916           /* Walk through all arguments and clear registers appropriately.
16917           */
16918           fntype = TREE_TYPE (MEM_EXPR (address));
16919           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
16920                                     NULL_TREE);
16921           args_so_far = pack_cumulative_args (&args_so_far_v);
16922           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
16923             {
16924               rtx arg_rtx;
16925               machine_mode arg_mode = TYPE_MODE (arg_type);
16926
16927               if (VOID_TYPE_P (arg_type))
16928                 continue;
16929
16930               if (!first_param)
16931                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
16932                                           true);
16933
16934               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
16935                                           true);
16936               gcc_assert (REG_P (arg_rtx));
16937               to_clear_mask
16938                 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
16939                                                REGNO (arg_rtx),
16940                                                padding_bits_to_clear_ptr);
16941
16942               first_param = false;
16943             }
16944
16945           /* Clear padding bits where needed.  */
16946           cleared_reg = XEXP (address, 0);
16947           reg = gen_rtx_REG (SImode, IP_REGNUM);
16948           using_r4 = false;
16949           for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
16950             {
16951               if (padding_bits_to_clear[regno] == 0)
16952                 continue;
16953
16954               /* If this is a Thumb-1 target copy the address of the function
16955                  we are calling from 'r4' into 'ip' such that we can use r4 to
16956                  clear the unused bits in the arguments.  */
16957               if (TARGET_THUMB1 && !using_r4)
16958                 {
16959                   using_r4 =  true;
16960                   reg = cleared_reg;
16961                   emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
16962                                           reg);
16963                 }
16964
16965               tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
16966               emit_move_insn (reg, tmp);
16967               /* Also fill the top half of the negated
16968                  padding_bits_to_clear.  */
16969               if (((~padding_bits_to_clear[regno]) >> 16) > 0)
16970                 {
16971                   tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
16972                   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
16973                                                                 GEN_INT (16),
16974                                                                 GEN_INT (16)),
16975                                           tmp));
16976                 }
16977
16978               emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
16979                                      gen_rtx_REG (SImode, regno),
16980                                      reg));
16981
16982             }
16983           if (using_r4)
16984             emit_move_insn (cleared_reg,
16985                             gen_rtx_REG (SImode, IP_REGNUM));
16986
16987           /* We use right shift and left shift to clear the LSB of the address
16988              we jump to instead of using bic, to avoid having to use an extra
16989              register on Thumb-1.  */
16990           tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
16991           emit_insn (gen_rtx_SET (cleared_reg, tmp));
16992           tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
16993           emit_insn (gen_rtx_SET (cleared_reg, tmp));
16994
16995           /* Clearing all registers that leak before doing a non-secure
16996              call.  */
16997           for (regno = R0_REGNUM; regno <= maxregno; regno++)
16998             {
16999               if (!(to_clear_mask & (1LL << regno)))
17000                 continue;
17001
17002               /* If regno is an even vfp register and its successor is also to
17003                  be cleared, use vmov.  */
17004               if (IS_VFP_REGNUM (regno))
17005                 {
17006                   if (TARGET_VFP_DOUBLE
17007                       && VFP_REGNO_OK_FOR_DOUBLE (regno)
17008                       && to_clear_mask & (1LL << (regno + 1)))
17009                     emit_move_insn (gen_rtx_REG (DFmode, regno++),
17010                                     CONST0_RTX (DFmode));
17011                   else
17012                     emit_move_insn (gen_rtx_REG (SFmode, regno),
17013                                     CONST0_RTX (SFmode));
17014                 }
17015               else
17016                 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17017             }
17018
17019           seq = get_insns ();
17020           end_sequence ();
17021           emit_insn_before (seq, insn);
17022
17023         }
17024     }
17025 }
17026
17027 /* Rewrite move insn into subtract of 0 if the condition codes will
17028    be useful in next conditional jump insn.  */
17029
17030 static void
17031 thumb1_reorg (void)
17032 {
17033   basic_block bb;
17034
17035   FOR_EACH_BB_FN (bb, cfun)
17036     {
17037       rtx dest, src;
17038       rtx cmp, op0, op1, set = NULL;
17039       rtx_insn *prev, *insn = BB_END (bb);
17040       bool insn_clobbered = false;
17041
17042       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17043         insn = PREV_INSN (insn);
17044
17045       /* Find the last cbranchsi4_insn in basic block BB.  */
17046       if (insn == BB_HEAD (bb)
17047           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17048         continue;
17049
17050       /* Get the register with which we are comparing.  */
17051       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17052       op0 = XEXP (cmp, 0);
17053       op1 = XEXP (cmp, 1);
17054
17055       /* Check that comparison is against ZERO.  */
17056       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17057         continue;
17058
17059       /* Find the first flag setting insn before INSN in basic block BB.  */
17060       gcc_assert (insn != BB_HEAD (bb));
17061       for (prev = PREV_INSN (insn);
17062            (!insn_clobbered
17063             && prev != BB_HEAD (bb)
17064             && (NOTE_P (prev)
17065                 || DEBUG_INSN_P (prev)
17066                 || ((set = single_set (prev)) != NULL
17067                     && get_attr_conds (prev) == CONDS_NOCOND)));
17068            prev = PREV_INSN (prev))
17069         {
17070           if (reg_set_p (op0, prev))
17071             insn_clobbered = true;
17072         }
17073
17074       /* Skip if op0 is clobbered by insn other than prev. */
17075       if (insn_clobbered)
17076         continue;
17077
17078       if (!set)
17079         continue;
17080
17081       dest = SET_DEST (set);
17082       src = SET_SRC (set);
17083       if (!low_register_operand (dest, SImode)
17084           || !low_register_operand (src, SImode))
17085         continue;
17086
17087       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17088          in INSN.  Both src and dest of the move insn are checked.  */
17089       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17090         {
17091           dest = copy_rtx (dest);
17092           src = copy_rtx (src);
17093           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17094           PATTERN (prev) = gen_rtx_SET (dest, src);
17095           INSN_CODE (prev) = -1;
17096           /* Set test register in INSN to dest.  */
17097           XEXP (cmp, 0) = copy_rtx (dest);
17098           INSN_CODE (insn) = -1;
17099         }
17100     }
17101 }
17102
17103 /* Convert instructions to their cc-clobbering variant if possible, since
17104    that allows us to use smaller encodings.  */
17105
17106 static void
17107 thumb2_reorg (void)
17108 {
17109   basic_block bb;
17110   regset_head live;
17111
17112   INIT_REG_SET (&live);
17113
17114   /* We are freeing block_for_insn in the toplev to keep compatibility
17115      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17116   compute_bb_for_insn ();
17117   df_analyze ();
17118
17119   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17120
17121   FOR_EACH_BB_FN (bb, cfun)
17122     {
17123       if ((current_tune->disparage_flag_setting_t16_encodings
17124            == tune_params::DISPARAGE_FLAGS_ALL)
17125           && optimize_bb_for_speed_p (bb))
17126         continue;
17127
17128       rtx_insn *insn;
17129       Convert_Action action = SKIP;
17130       Convert_Action action_for_partial_flag_setting
17131         = ((current_tune->disparage_flag_setting_t16_encodings
17132             != tune_params::DISPARAGE_FLAGS_NEITHER)
17133            && optimize_bb_for_speed_p (bb))
17134           ? SKIP : CONV;
17135
17136       COPY_REG_SET (&live, DF_LR_OUT (bb));
17137       df_simulate_initialize_backwards (bb, &live);
17138       FOR_BB_INSNS_REVERSE (bb, insn)
17139         {
17140           if (NONJUMP_INSN_P (insn)
17141               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17142               && GET_CODE (PATTERN (insn)) == SET)
17143             {
17144               action = SKIP;
17145               rtx pat = PATTERN (insn);
17146               rtx dst = XEXP (pat, 0);
17147               rtx src = XEXP (pat, 1);
17148               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17149
17150               if (UNARY_P (src) || BINARY_P (src))
17151                   op0 = XEXP (src, 0);
17152
17153               if (BINARY_P (src))
17154                   op1 = XEXP (src, 1);
17155
17156               if (low_register_operand (dst, SImode))
17157                 {
17158                   switch (GET_CODE (src))
17159                     {
17160                     case PLUS:
17161                       /* Adding two registers and storing the result
17162                          in the first source is already a 16-bit
17163                          operation.  */
17164                       if (rtx_equal_p (dst, op0)
17165                           && register_operand (op1, SImode))
17166                         break;
17167
17168                       if (low_register_operand (op0, SImode))
17169                         {
17170                           /* ADDS <Rd>,<Rn>,<Rm>  */
17171                           if (low_register_operand (op1, SImode))
17172                             action = CONV;
17173                           /* ADDS <Rdn>,#<imm8>  */
17174                           /* SUBS <Rdn>,#<imm8>  */
17175                           else if (rtx_equal_p (dst, op0)
17176                                    && CONST_INT_P (op1)
17177                                    && IN_RANGE (INTVAL (op1), -255, 255))
17178                             action = CONV;
17179                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17180                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17181                           else if (CONST_INT_P (op1)
17182                                    && IN_RANGE (INTVAL (op1), -7, 7))
17183                             action = CONV;
17184                         }
17185                       /* ADCS <Rd>, <Rn>  */
17186                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17187                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17188                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17189                                                        SImode)
17190                               && COMPARISON_P (op1)
17191                               && cc_register (XEXP (op1, 0), VOIDmode)
17192                               && maybe_get_arm_condition_code (op1) == ARM_CS
17193                               && XEXP (op1, 1) == const0_rtx)
17194                         action = CONV;
17195                       break;
17196
17197                     case MINUS:
17198                       /* RSBS <Rd>,<Rn>,#0
17199                          Not handled here: see NEG below.  */
17200                       /* SUBS <Rd>,<Rn>,#<imm3>
17201                          SUBS <Rdn>,#<imm8>
17202                          Not handled here: see PLUS above.  */
17203                       /* SUBS <Rd>,<Rn>,<Rm>  */
17204                       if (low_register_operand (op0, SImode)
17205                           && low_register_operand (op1, SImode))
17206                             action = CONV;
17207                       break;
17208
17209                     case MULT:
17210                       /* MULS <Rdm>,<Rn>,<Rdm>
17211                          As an exception to the rule, this is only used
17212                          when optimizing for size since MULS is slow on all
17213                          known implementations.  We do not even want to use
17214                          MULS in cold code, if optimizing for speed, so we
17215                          test the global flag here.  */
17216                       if (!optimize_size)
17217                         break;
17218                       /* Fall through.  */
17219                     case AND:
17220                     case IOR:
17221                     case XOR:
17222                       /* ANDS <Rdn>,<Rm>  */
17223                       if (rtx_equal_p (dst, op0)
17224                           && low_register_operand (op1, SImode))
17225                         action = action_for_partial_flag_setting;
17226                       else if (rtx_equal_p (dst, op1)
17227                                && low_register_operand (op0, SImode))
17228                         action = action_for_partial_flag_setting == SKIP
17229                                  ? SKIP : SWAP_CONV;
17230                       break;
17231
17232                     case ASHIFTRT:
17233                     case ASHIFT:
17234                     case LSHIFTRT:
17235                       /* ASRS <Rdn>,<Rm> */
17236                       /* LSRS <Rdn>,<Rm> */
17237                       /* LSLS <Rdn>,<Rm> */
17238                       if (rtx_equal_p (dst, op0)
17239                           && low_register_operand (op1, SImode))
17240                         action = action_for_partial_flag_setting;
17241                       /* ASRS <Rd>,<Rm>,#<imm5> */
17242                       /* LSRS <Rd>,<Rm>,#<imm5> */
17243                       /* LSLS <Rd>,<Rm>,#<imm5> */
17244                       else if (low_register_operand (op0, SImode)
17245                                && CONST_INT_P (op1)
17246                                && IN_RANGE (INTVAL (op1), 0, 31))
17247                         action = action_for_partial_flag_setting;
17248                       break;
17249
17250                     case ROTATERT:
17251                       /* RORS <Rdn>,<Rm>  */
17252                       if (rtx_equal_p (dst, op0)
17253                           && low_register_operand (op1, SImode))
17254                         action = action_for_partial_flag_setting;
17255                       break;
17256
17257                     case NOT:
17258                       /* MVNS <Rd>,<Rm>  */
17259                       if (low_register_operand (op0, SImode))
17260                         action = action_for_partial_flag_setting;
17261                       break;
17262
17263                     case NEG:
17264                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17265                       if (low_register_operand (op0, SImode))
17266                         action = CONV;
17267                       break;
17268
17269                     case CONST_INT:
17270                       /* MOVS <Rd>,#<imm8>  */
17271                       if (CONST_INT_P (src)
17272                           && IN_RANGE (INTVAL (src), 0, 255))
17273                         action = action_for_partial_flag_setting;
17274                       break;
17275
17276                     case REG:
17277                       /* MOVS and MOV<c> with registers have different
17278                          encodings, so are not relevant here.  */
17279                       break;
17280
17281                     default:
17282                       break;
17283                     }
17284                 }
17285
17286               if (action != SKIP)
17287                 {
17288                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17289                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17290                   rtvec vec;
17291
17292                   if (action == SWAP_CONV)
17293                     {
17294                       src = copy_rtx (src);
17295                       XEXP (src, 0) = op1;
17296                       XEXP (src, 1) = op0;
17297                       pat = gen_rtx_SET (dst, src);
17298                       vec = gen_rtvec (2, pat, clobber);
17299                     }
17300                   else /* action == CONV */
17301                     vec = gen_rtvec (2, pat, clobber);
17302
17303                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17304                   INSN_CODE (insn) = -1;
17305                 }
17306             }
17307
17308           if (NONDEBUG_INSN_P (insn))
17309             df_simulate_one_insn_backwards (bb, insn, &live);
17310         }
17311     }
17312
17313   CLEAR_REG_SET (&live);
17314 }
17315
17316 /* Gcc puts the pool in the wrong place for ARM, since we can only
17317    load addresses a limited distance around the pc.  We do some
17318    special munging to move the constant pool values to the correct
17319    point in the code.  */
17320 static void
17321 arm_reorg (void)
17322 {
17323   rtx_insn *insn;
17324   HOST_WIDE_INT address = 0;
17325   Mfix * fix;
17326
17327   if (use_cmse)
17328     cmse_nonsecure_call_clear_caller_saved ();
17329   if (TARGET_THUMB1)
17330     thumb1_reorg ();
17331   else if (TARGET_THUMB2)
17332     thumb2_reorg ();
17333
17334   /* Ensure all insns that must be split have been split at this point.
17335      Otherwise, the pool placement code below may compute incorrect
17336      insn lengths.  Note that when optimizing, all insns have already
17337      been split at this point.  */
17338   if (!optimize)
17339     split_all_insns_noflow ();
17340
17341   minipool_fix_head = minipool_fix_tail = NULL;
17342
17343   /* The first insn must always be a note, or the code below won't
17344      scan it properly.  */
17345   insn = get_insns ();
17346   gcc_assert (NOTE_P (insn));
17347   minipool_pad = 0;
17348
17349   /* Scan all the insns and record the operands that will need fixing.  */
17350   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17351     {
17352       if (BARRIER_P (insn))
17353         push_minipool_barrier (insn, address);
17354       else if (INSN_P (insn))
17355         {
17356           rtx_jump_table_data *table;
17357
17358           note_invalid_constants (insn, address, true);
17359           address += get_attr_length (insn);
17360
17361           /* If the insn is a vector jump, add the size of the table
17362              and skip the table.  */
17363           if (tablejump_p (insn, NULL, &table))
17364             {
17365               address += get_jump_table_size (table);
17366               insn = table;
17367             }
17368         }
17369       else if (LABEL_P (insn))
17370         /* Add the worst-case padding due to alignment.  We don't add
17371            the _current_ padding because the minipool insertions
17372            themselves might change it.  */
17373         address += get_label_padding (insn);
17374     }
17375
17376   fix = minipool_fix_head;
17377
17378   /* Now scan the fixups and perform the required changes.  */
17379   while (fix)
17380     {
17381       Mfix * ftmp;
17382       Mfix * fdel;
17383       Mfix *  last_added_fix;
17384       Mfix * last_barrier = NULL;
17385       Mfix * this_fix;
17386
17387       /* Skip any further barriers before the next fix.  */
17388       while (fix && BARRIER_P (fix->insn))
17389         fix = fix->next;
17390
17391       /* No more fixes.  */
17392       if (fix == NULL)
17393         break;
17394
17395       last_added_fix = NULL;
17396
17397       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17398         {
17399           if (BARRIER_P (ftmp->insn))
17400             {
17401               if (ftmp->address >= minipool_vector_head->max_address)
17402                 break;
17403
17404               last_barrier = ftmp;
17405             }
17406           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17407             break;
17408
17409           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17410         }
17411
17412       /* If we found a barrier, drop back to that; any fixes that we
17413          could have reached but come after the barrier will now go in
17414          the next mini-pool.  */
17415       if (last_barrier != NULL)
17416         {
17417           /* Reduce the refcount for those fixes that won't go into this
17418              pool after all.  */
17419           for (fdel = last_barrier->next;
17420                fdel && fdel != ftmp;
17421                fdel = fdel->next)
17422             {
17423               fdel->minipool->refcount--;
17424               fdel->minipool = NULL;
17425             }
17426
17427           ftmp = last_barrier;
17428         }
17429       else
17430         {
17431           /* ftmp is first fix that we can't fit into this pool and
17432              there no natural barriers that we could use.  Insert a
17433              new barrier in the code somewhere between the previous
17434              fix and this one, and arrange to jump around it.  */
17435           HOST_WIDE_INT max_address;
17436
17437           /* The last item on the list of fixes must be a barrier, so
17438              we can never run off the end of the list of fixes without
17439              last_barrier being set.  */
17440           gcc_assert (ftmp);
17441
17442           max_address = minipool_vector_head->max_address;
17443           /* Check that there isn't another fix that is in range that
17444              we couldn't fit into this pool because the pool was
17445              already too large: we need to put the pool before such an
17446              instruction.  The pool itself may come just after the
17447              fix because create_fix_barrier also allows space for a
17448              jump instruction.  */
17449           if (ftmp->address < max_address)
17450             max_address = ftmp->address + 1;
17451
17452           last_barrier = create_fix_barrier (last_added_fix, max_address);
17453         }
17454
17455       assign_minipool_offsets (last_barrier);
17456
17457       while (ftmp)
17458         {
17459           if (!BARRIER_P (ftmp->insn)
17460               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17461                   == NULL))
17462             break;
17463
17464           ftmp = ftmp->next;
17465         }
17466
17467       /* Scan over the fixes we have identified for this pool, fixing them
17468          up and adding the constants to the pool itself.  */
17469       for (this_fix = fix; this_fix && ftmp != this_fix;
17470            this_fix = this_fix->next)
17471         if (!BARRIER_P (this_fix->insn))
17472           {
17473             rtx addr
17474               = plus_constant (Pmode,
17475                                gen_rtx_LABEL_REF (VOIDmode,
17476                                                   minipool_vector_label),
17477                                this_fix->minipool->offset);
17478             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17479           }
17480
17481       dump_minipool (last_barrier->insn);
17482       fix = ftmp;
17483     }
17484
17485   /* From now on we must synthesize any constants that we can't handle
17486      directly.  This can happen if the RTL gets split during final
17487      instruction generation.  */
17488   cfun->machine->after_arm_reorg = 1;
17489
17490   /* Free the minipool memory.  */
17491   obstack_free (&minipool_obstack, minipool_startobj);
17492 }
17493 \f
17494 /* Routines to output assembly language.  */
17495
17496 /* Return string representation of passed in real value.  */
17497 static const char *
17498 fp_const_from_val (REAL_VALUE_TYPE *r)
17499 {
17500   if (!fp_consts_inited)
17501     init_fp_table ();
17502
17503   gcc_assert (real_equal (r, &value_fp0));
17504   return "0";
17505 }
17506
17507 /* OPERANDS[0] is the entire list of insns that constitute pop,
17508    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17509    is in the list, UPDATE is true iff the list contains explicit
17510    update of base register.  */
17511 void
17512 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17513                          bool update)
17514 {
17515   int i;
17516   char pattern[100];
17517   int offset;
17518   const char *conditional;
17519   int num_saves = XVECLEN (operands[0], 0);
17520   unsigned int regno;
17521   unsigned int regno_base = REGNO (operands[1]);
17522   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17523
17524   offset = 0;
17525   offset += update ? 1 : 0;
17526   offset += return_pc ? 1 : 0;
17527
17528   /* Is the base register in the list?  */
17529   for (i = offset; i < num_saves; i++)
17530     {
17531       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17532       /* If SP is in the list, then the base register must be SP.  */
17533       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17534       /* If base register is in the list, there must be no explicit update.  */
17535       if (regno == regno_base)
17536         gcc_assert (!update);
17537     }
17538
17539   conditional = reverse ? "%?%D0" : "%?%d0";
17540   /* Can't use POP if returning from an interrupt.  */
17541   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17542     sprintf (pattern, "pop%s\t{", conditional);
17543   else
17544     {
17545       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17546          It's just a convention, their semantics are identical.  */
17547       if (regno_base == SP_REGNUM)
17548         sprintf (pattern, "ldmfd%s\t", conditional);
17549       else if (update)
17550         sprintf (pattern, "ldmia%s\t", conditional);
17551       else
17552         sprintf (pattern, "ldm%s\t", conditional);
17553
17554       strcat (pattern, reg_names[regno_base]);
17555       if (update)
17556         strcat (pattern, "!, {");
17557       else
17558         strcat (pattern, ", {");
17559     }
17560
17561   /* Output the first destination register.  */
17562   strcat (pattern,
17563           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17564
17565   /* Output the rest of the destination registers.  */
17566   for (i = offset + 1; i < num_saves; i++)
17567     {
17568       strcat (pattern, ", ");
17569       strcat (pattern,
17570               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17571     }
17572
17573   strcat (pattern, "}");
17574
17575   if (interrupt_p && return_pc)
17576     strcat (pattern, "^");
17577
17578   output_asm_insn (pattern, &cond);
17579 }
17580
17581
17582 /* Output the assembly for a store multiple.  */
17583
17584 const char *
17585 vfp_output_vstmd (rtx * operands)
17586 {
17587   char pattern[100];
17588   int p;
17589   int base;
17590   int i;
17591   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17592                    ? XEXP (operands[0], 0)
17593                    : XEXP (XEXP (operands[0], 0), 0);
17594   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17595
17596   if (push_p)
17597     strcpy (pattern, "vpush%?.64\t{%P1");
17598   else
17599     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17600
17601   p = strlen (pattern);
17602
17603   gcc_assert (REG_P (operands[1]));
17604
17605   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17606   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17607     {
17608       p += sprintf (&pattern[p], ", d%d", base + i);
17609     }
17610   strcpy (&pattern[p], "}");
17611
17612   output_asm_insn (pattern, operands);
17613   return "";
17614 }
17615
17616
17617 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17618    number of bytes pushed.  */
17619
17620 static int
17621 vfp_emit_fstmd (int base_reg, int count)
17622 {
17623   rtx par;
17624   rtx dwarf;
17625   rtx tmp, reg;
17626   int i;
17627
17628   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17629      register pairs are stored by a store multiple insn.  We avoid this
17630      by pushing an extra pair.  */
17631   if (count == 2 && !arm_arch6)
17632     {
17633       if (base_reg == LAST_VFP_REGNUM - 3)
17634         base_reg -= 2;
17635       count++;
17636     }
17637
17638   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17639      larger stores into multiple parts (up to a maximum of two, in
17640      practice).  */
17641   if (count > 16)
17642     {
17643       int saved;
17644       /* NOTE: base_reg is an internal register number, so each D register
17645          counts as 2.  */
17646       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17647       saved += vfp_emit_fstmd (base_reg, 16);
17648       return saved;
17649     }
17650
17651   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17652   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17653
17654   reg = gen_rtx_REG (DFmode, base_reg);
17655   base_reg += 2;
17656
17657   XVECEXP (par, 0, 0)
17658     = gen_rtx_SET (gen_frame_mem
17659                    (BLKmode,
17660                     gen_rtx_PRE_MODIFY (Pmode,
17661                                         stack_pointer_rtx,
17662                                         plus_constant
17663                                         (Pmode, stack_pointer_rtx,
17664                                          - (count * 8)))
17665                     ),
17666                    gen_rtx_UNSPEC (BLKmode,
17667                                    gen_rtvec (1, reg),
17668                                    UNSPEC_PUSH_MULT));
17669
17670   tmp = gen_rtx_SET (stack_pointer_rtx,
17671                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17672   RTX_FRAME_RELATED_P (tmp) = 1;
17673   XVECEXP (dwarf, 0, 0) = tmp;
17674
17675   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17676   RTX_FRAME_RELATED_P (tmp) = 1;
17677   XVECEXP (dwarf, 0, 1) = tmp;
17678
17679   for (i = 1; i < count; i++)
17680     {
17681       reg = gen_rtx_REG (DFmode, base_reg);
17682       base_reg += 2;
17683       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17684
17685       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17686                                         plus_constant (Pmode,
17687                                                        stack_pointer_rtx,
17688                                                        i * 8)),
17689                          reg);
17690       RTX_FRAME_RELATED_P (tmp) = 1;
17691       XVECEXP (dwarf, 0, i + 1) = tmp;
17692     }
17693
17694   par = emit_insn (par);
17695   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17696   RTX_FRAME_RELATED_P (par) = 1;
17697
17698   return count * 8;
17699 }
17700
17701 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17702    has the cmse_nonsecure_call attribute and returns false otherwise.  */
17703
17704 bool
17705 detect_cmse_nonsecure_call (tree addr)
17706 {
17707   if (!addr)
17708     return FALSE;
17709
17710   tree fntype = TREE_TYPE (addr);
17711   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17712                                     TYPE_ATTRIBUTES (fntype)))
17713     return TRUE;
17714   return FALSE;
17715 }
17716
17717
17718 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17719    the call target.  */
17720
17721 void
17722 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17723 {
17724   rtx insn;
17725
17726   insn = emit_call_insn (pat);
17727
17728   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17729      If the call might use such an entry, add a use of the PIC register
17730      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17731   if (TARGET_VXWORKS_RTP
17732       && flag_pic
17733       && !sibcall
17734       && GET_CODE (addr) == SYMBOL_REF
17735       && (SYMBOL_REF_DECL (addr)
17736           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17737           : !SYMBOL_REF_LOCAL_P (addr)))
17738     {
17739       require_pic_register ();
17740       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17741     }
17742
17743   if (TARGET_AAPCS_BASED)
17744     {
17745       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17746          linker.  We need to add an IP clobber to allow setting
17747          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17748          is not needed since it's a fixed register.  */
17749       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17750       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17751     }
17752 }
17753
17754 /* Output a 'call' insn.  */
17755 const char *
17756 output_call (rtx *operands)
17757 {
17758   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17759
17760   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17761   if (REGNO (operands[0]) == LR_REGNUM)
17762     {
17763       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17764       output_asm_insn ("mov%?\t%0, %|lr", operands);
17765     }
17766
17767   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17768
17769   if (TARGET_INTERWORK || arm_arch4t)
17770     output_asm_insn ("bx%?\t%0", operands);
17771   else
17772     output_asm_insn ("mov%?\t%|pc, %0", operands);
17773
17774   return "";
17775 }
17776
17777 /* Output a move from arm registers to arm registers of a long double
17778    OPERANDS[0] is the destination.
17779    OPERANDS[1] is the source.  */
17780 const char *
17781 output_mov_long_double_arm_from_arm (rtx *operands)
17782 {
17783   /* We have to be careful here because the two might overlap.  */
17784   int dest_start = REGNO (operands[0]);
17785   int src_start = REGNO (operands[1]);
17786   rtx ops[2];
17787   int i;
17788
17789   if (dest_start < src_start)
17790     {
17791       for (i = 0; i < 3; i++)
17792         {
17793           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17794           ops[1] = gen_rtx_REG (SImode, src_start + i);
17795           output_asm_insn ("mov%?\t%0, %1", ops);
17796         }
17797     }
17798   else
17799     {
17800       for (i = 2; i >= 0; i--)
17801         {
17802           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17803           ops[1] = gen_rtx_REG (SImode, src_start + i);
17804           output_asm_insn ("mov%?\t%0, %1", ops);
17805         }
17806     }
17807
17808   return "";
17809 }
17810
17811 void
17812 arm_emit_movpair (rtx dest, rtx src)
17813  {
17814   /* If the src is an immediate, simplify it.  */
17815   if (CONST_INT_P (src))
17816     {
17817       HOST_WIDE_INT val = INTVAL (src);
17818       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17819       if ((val >> 16) & 0x0000ffff)
17820         {
17821           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17822                                                GEN_INT (16)),
17823                          GEN_INT ((val >> 16) & 0x0000ffff));
17824           rtx_insn *insn = get_last_insn ();
17825           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17826         }
17827       return;
17828     }
17829    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17830    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17831    rtx_insn *insn = get_last_insn ();
17832    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17833  }
17834
17835 /* Output a move between double words.  It must be REG<-MEM
17836    or MEM<-REG.  */
17837 const char *
17838 output_move_double (rtx *operands, bool emit, int *count)
17839 {
17840   enum rtx_code code0 = GET_CODE (operands[0]);
17841   enum rtx_code code1 = GET_CODE (operands[1]);
17842   rtx otherops[3];
17843   if (count)
17844     *count = 1;
17845
17846   /* The only case when this might happen is when
17847      you are looking at the length of a DImode instruction
17848      that has an invalid constant in it.  */
17849   if (code0 == REG && code1 != MEM)
17850     {
17851       gcc_assert (!emit);
17852       *count = 2;
17853       return "";
17854     }
17855
17856   if (code0 == REG)
17857     {
17858       unsigned int reg0 = REGNO (operands[0]);
17859
17860       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17861
17862       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17863
17864       switch (GET_CODE (XEXP (operands[1], 0)))
17865         {
17866         case REG:
17867
17868           if (emit)
17869             {
17870               if (TARGET_LDRD
17871                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17872                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17873               else
17874                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17875             }
17876           break;
17877
17878         case PRE_INC:
17879           gcc_assert (TARGET_LDRD);
17880           if (emit)
17881             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17882           break;
17883
17884         case PRE_DEC:
17885           if (emit)
17886             {
17887               if (TARGET_LDRD)
17888                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17889               else
17890                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17891             }
17892           break;
17893
17894         case POST_INC:
17895           if (emit)
17896             {
17897               if (TARGET_LDRD)
17898                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
17899               else
17900                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
17901             }
17902           break;
17903
17904         case POST_DEC:
17905           gcc_assert (TARGET_LDRD);
17906           if (emit)
17907             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
17908           break;
17909
17910         case PRE_MODIFY:
17911         case POST_MODIFY:
17912           /* Autoicrement addressing modes should never have overlapping
17913              base and destination registers, and overlapping index registers
17914              are already prohibited, so this doesn't need to worry about
17915              fix_cm3_ldrd.  */
17916           otherops[0] = operands[0];
17917           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17918           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17919
17920           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17921             {
17922               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17923                 {
17924                   /* Registers overlap so split out the increment.  */
17925                   if (emit)
17926                     {
17927                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
17928                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
17929                     }
17930                   if (count)
17931                     *count = 2;
17932                 }
17933               else
17934                 {
17935                   /* Use a single insn if we can.
17936                      FIXME: IWMMXT allows offsets larger than ldrd can
17937                      handle, fix these up with a pair of ldr.  */
17938                   if (TARGET_THUMB2
17939                       || !CONST_INT_P (otherops[2])
17940                       || (INTVAL (otherops[2]) > -256
17941                           && INTVAL (otherops[2]) < 256))
17942                     {
17943                       if (emit)
17944                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
17945                     }
17946                   else
17947                     {
17948                       if (emit)
17949                         {
17950                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17951                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17952                         }
17953                       if (count)
17954                         *count = 2;
17955
17956                     }
17957                 }
17958             }
17959           else
17960             {
17961               /* Use a single insn if we can.
17962                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
17963                  fix these up with a pair of ldr.  */
17964               if (TARGET_THUMB2
17965                   || !CONST_INT_P (otherops[2])
17966                   || (INTVAL (otherops[2]) > -256
17967                       && INTVAL (otherops[2]) < 256))
17968                 {
17969                   if (emit)
17970                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
17971                 }
17972               else
17973                 {
17974                   if (emit)
17975                     {
17976                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17977                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17978                     }
17979                   if (count)
17980                     *count = 2;
17981                 }
17982             }
17983           break;
17984
17985         case LABEL_REF:
17986         case CONST:
17987           /* We might be able to use ldrd %0, %1 here.  However the range is
17988              different to ldr/adr, and it is broken on some ARMv7-M
17989              implementations.  */
17990           /* Use the second register of the pair to avoid problematic
17991              overlap.  */
17992           otherops[1] = operands[1];
17993           if (emit)
17994             output_asm_insn ("adr%?\t%0, %1", otherops);
17995           operands[1] = otherops[0];
17996           if (emit)
17997             {
17998               if (TARGET_LDRD)
17999                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18000               else
18001                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18002             }
18003
18004           if (count)
18005             *count = 2;
18006           break;
18007
18008           /* ??? This needs checking for thumb2.  */
18009         default:
18010           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18011                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18012             {
18013               otherops[0] = operands[0];
18014               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18015               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18016
18017               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18018                 {
18019                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18020                     {
18021                       switch ((int) INTVAL (otherops[2]))
18022                         {
18023                         case -8:
18024                           if (emit)
18025                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18026                           return "";
18027                         case -4:
18028                           if (TARGET_THUMB2)
18029                             break;
18030                           if (emit)
18031                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18032                           return "";
18033                         case 4:
18034                           if (TARGET_THUMB2)
18035                             break;
18036                           if (emit)
18037                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18038                           return "";
18039                         }
18040                     }
18041                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18042                   operands[1] = otherops[0];
18043                   if (TARGET_LDRD
18044                       && (REG_P (otherops[2])
18045                           || TARGET_THUMB2
18046                           || (CONST_INT_P (otherops[2])
18047                               && INTVAL (otherops[2]) > -256
18048                               && INTVAL (otherops[2]) < 256)))
18049                     {
18050                       if (reg_overlap_mentioned_p (operands[0],
18051                                                    otherops[2]))
18052                         {
18053                           /* Swap base and index registers over to
18054                              avoid a conflict.  */
18055                           std::swap (otherops[1], otherops[2]);
18056                         }
18057                       /* If both registers conflict, it will usually
18058                          have been fixed by a splitter.  */
18059                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18060                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18061                         {
18062                           if (emit)
18063                             {
18064                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18065                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18066                             }
18067                           if (count)
18068                             *count = 2;
18069                         }
18070                       else
18071                         {
18072                           otherops[0] = operands[0];
18073                           if (emit)
18074                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18075                         }
18076                       return "";
18077                     }
18078
18079                   if (CONST_INT_P (otherops[2]))
18080                     {
18081                       if (emit)
18082                         {
18083                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18084                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18085                           else
18086                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18087                         }
18088                     }
18089                   else
18090                     {
18091                       if (emit)
18092                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18093                     }
18094                 }
18095               else
18096                 {
18097                   if (emit)
18098                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18099                 }
18100
18101               if (count)
18102                 *count = 2;
18103
18104               if (TARGET_LDRD)
18105                 return "ldrd%?\t%0, [%1]";
18106
18107               return "ldmia%?\t%1, %M0";
18108             }
18109           else
18110             {
18111               otherops[1] = adjust_address (operands[1], SImode, 4);
18112               /* Take care of overlapping base/data reg.  */
18113               if (reg_mentioned_p (operands[0], operands[1]))
18114                 {
18115                   if (emit)
18116                     {
18117                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18118                       output_asm_insn ("ldr%?\t%0, %1", operands);
18119                     }
18120                   if (count)
18121                     *count = 2;
18122
18123                 }
18124               else
18125                 {
18126                   if (emit)
18127                     {
18128                       output_asm_insn ("ldr%?\t%0, %1", operands);
18129                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18130                     }
18131                   if (count)
18132                     *count = 2;
18133                 }
18134             }
18135         }
18136     }
18137   else
18138     {
18139       /* Constraints should ensure this.  */
18140       gcc_assert (code0 == MEM && code1 == REG);
18141       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18142                   || (TARGET_ARM && TARGET_LDRD));
18143
18144       switch (GET_CODE (XEXP (operands[0], 0)))
18145         {
18146         case REG:
18147           if (emit)
18148             {
18149               if (TARGET_LDRD)
18150                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18151               else
18152                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18153             }
18154           break;
18155
18156         case PRE_INC:
18157           gcc_assert (TARGET_LDRD);
18158           if (emit)
18159             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18160           break;
18161
18162         case PRE_DEC:
18163           if (emit)
18164             {
18165               if (TARGET_LDRD)
18166                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18167               else
18168                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18169             }
18170           break;
18171
18172         case POST_INC:
18173           if (emit)
18174             {
18175               if (TARGET_LDRD)
18176                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18177               else
18178                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18179             }
18180           break;
18181
18182         case POST_DEC:
18183           gcc_assert (TARGET_LDRD);
18184           if (emit)
18185             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18186           break;
18187
18188         case PRE_MODIFY:
18189         case POST_MODIFY:
18190           otherops[0] = operands[1];
18191           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18192           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18193
18194           /* IWMMXT allows offsets larger than ldrd can handle,
18195              fix these up with a pair of ldr.  */
18196           if (!TARGET_THUMB2
18197               && CONST_INT_P (otherops[2])
18198               && (INTVAL(otherops[2]) <= -256
18199                   || INTVAL(otherops[2]) >= 256))
18200             {
18201               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18202                 {
18203                   if (emit)
18204                     {
18205                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18206                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18207                     }
18208                   if (count)
18209                     *count = 2;
18210                 }
18211               else
18212                 {
18213                   if (emit)
18214                     {
18215                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18216                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18217                     }
18218                   if (count)
18219                     *count = 2;
18220                 }
18221             }
18222           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18223             {
18224               if (emit)
18225                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18226             }
18227           else
18228             {
18229               if (emit)
18230                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18231             }
18232           break;
18233
18234         case PLUS:
18235           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18236           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18237             {
18238               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18239                 {
18240                 case -8:
18241                   if (emit)
18242                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18243                   return "";
18244
18245                 case -4:
18246                   if (TARGET_THUMB2)
18247                     break;
18248                   if (emit)
18249                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18250                   return "";
18251
18252                 case 4:
18253                   if (TARGET_THUMB2)
18254                     break;
18255                   if (emit)
18256                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18257                   return "";
18258                 }
18259             }
18260           if (TARGET_LDRD
18261               && (REG_P (otherops[2])
18262                   || TARGET_THUMB2
18263                   || (CONST_INT_P (otherops[2])
18264                       && INTVAL (otherops[2]) > -256
18265                       && INTVAL (otherops[2]) < 256)))
18266             {
18267               otherops[0] = operands[1];
18268               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18269               if (emit)
18270                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18271               return "";
18272             }
18273           /* Fall through */
18274
18275         default:
18276           otherops[0] = adjust_address (operands[0], SImode, 4);
18277           otherops[1] = operands[1];
18278           if (emit)
18279             {
18280               output_asm_insn ("str%?\t%1, %0", operands);
18281               output_asm_insn ("str%?\t%H1, %0", otherops);
18282             }
18283           if (count)
18284             *count = 2;
18285         }
18286     }
18287
18288   return "";
18289 }
18290
18291 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18292    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18293
18294 const char *
18295 output_move_quad (rtx *operands)
18296 {
18297   if (REG_P (operands[0]))
18298     {
18299       /* Load, or reg->reg move.  */
18300
18301       if (MEM_P (operands[1]))
18302         {
18303           switch (GET_CODE (XEXP (operands[1], 0)))
18304             {
18305             case REG:
18306               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18307               break;
18308
18309             case LABEL_REF:
18310             case CONST:
18311               output_asm_insn ("adr%?\t%0, %1", operands);
18312               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18313               break;
18314
18315             default:
18316               gcc_unreachable ();
18317             }
18318         }
18319       else
18320         {
18321           rtx ops[2];
18322           int dest, src, i;
18323
18324           gcc_assert (REG_P (operands[1]));
18325
18326           dest = REGNO (operands[0]);
18327           src = REGNO (operands[1]);
18328
18329           /* This seems pretty dumb, but hopefully GCC won't try to do it
18330              very often.  */
18331           if (dest < src)
18332             for (i = 0; i < 4; i++)
18333               {
18334                 ops[0] = gen_rtx_REG (SImode, dest + i);
18335                 ops[1] = gen_rtx_REG (SImode, src + i);
18336                 output_asm_insn ("mov%?\t%0, %1", ops);
18337               }
18338           else
18339             for (i = 3; i >= 0; i--)
18340               {
18341                 ops[0] = gen_rtx_REG (SImode, dest + i);
18342                 ops[1] = gen_rtx_REG (SImode, src + i);
18343                 output_asm_insn ("mov%?\t%0, %1", ops);
18344               }
18345         }
18346     }
18347   else
18348     {
18349       gcc_assert (MEM_P (operands[0]));
18350       gcc_assert (REG_P (operands[1]));
18351       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18352
18353       switch (GET_CODE (XEXP (operands[0], 0)))
18354         {
18355         case REG:
18356           output_asm_insn ("stm%?\t%m0, %M1", operands);
18357           break;
18358
18359         default:
18360           gcc_unreachable ();
18361         }
18362     }
18363
18364   return "";
18365 }
18366
18367 /* Output a VFP load or store instruction.  */
18368
18369 const char *
18370 output_move_vfp (rtx *operands)
18371 {
18372   rtx reg, mem, addr, ops[2];
18373   int load = REG_P (operands[0]);
18374   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18375   int sp = (!TARGET_VFP_FP16INST
18376             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18377   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18378   const char *templ;
18379   char buff[50];
18380   machine_mode mode;
18381
18382   reg = operands[!load];
18383   mem = operands[load];
18384
18385   mode = GET_MODE (reg);
18386
18387   gcc_assert (REG_P (reg));
18388   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18389   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18390               || mode == SFmode
18391               || mode == DFmode
18392               || mode == HImode
18393               || mode == SImode
18394               || mode == DImode
18395               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18396   gcc_assert (MEM_P (mem));
18397
18398   addr = XEXP (mem, 0);
18399
18400   switch (GET_CODE (addr))
18401     {
18402     case PRE_DEC:
18403       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18404       ops[0] = XEXP (addr, 0);
18405       ops[1] = reg;
18406       break;
18407
18408     case POST_INC:
18409       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18410       ops[0] = XEXP (addr, 0);
18411       ops[1] = reg;
18412       break;
18413
18414     default:
18415       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18416       ops[0] = reg;
18417       ops[1] = mem;
18418       break;
18419     }
18420
18421   sprintf (buff, templ,
18422            load ? "ld" : "st",
18423            dp ? "64" : sp ? "32" : "16",
18424            dp ? "P" : "",
18425            integer_p ? "\t%@ int" : "");
18426   output_asm_insn (buff, ops);
18427
18428   return "";
18429 }
18430
18431 /* Output a Neon double-word or quad-word load or store, or a load
18432    or store for larger structure modes.
18433
18434    WARNING: The ordering of elements is weird in big-endian mode,
18435    because the EABI requires that vectors stored in memory appear
18436    as though they were stored by a VSTM, as required by the EABI.
18437    GCC RTL defines element ordering based on in-memory order.
18438    This can be different from the architectural ordering of elements
18439    within a NEON register. The intrinsics defined in arm_neon.h use the
18440    NEON register element ordering, not the GCC RTL element ordering.
18441
18442    For example, the in-memory ordering of a big-endian a quadword
18443    vector with 16-bit elements when stored from register pair {d0,d1}
18444    will be (lowest address first, d0[N] is NEON register element N):
18445
18446      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18447
18448    When necessary, quadword registers (dN, dN+1) are moved to ARM
18449    registers from rN in the order:
18450
18451      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18452
18453    So that STM/LDM can be used on vectors in ARM registers, and the
18454    same memory layout will result as if VSTM/VLDM were used.
18455
18456    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18457    possible, which allows use of appropriate alignment tags.
18458    Note that the choice of "64" is independent of the actual vector
18459    element size; this size simply ensures that the behavior is
18460    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18461
18462    Due to limitations of those instructions, use of VST1.64/VLD1.64
18463    is not possible if:
18464     - the address contains PRE_DEC, or
18465     - the mode refers to more than 4 double-word registers
18466
18467    In those cases, it would be possible to replace VSTM/VLDM by a
18468    sequence of instructions; this is not currently implemented since
18469    this is not certain to actually improve performance.  */
18470
18471 const char *
18472 output_move_neon (rtx *operands)
18473 {
18474   rtx reg, mem, addr, ops[2];
18475   int regno, nregs, load = REG_P (operands[0]);
18476   const char *templ;
18477   char buff[50];
18478   machine_mode mode;
18479
18480   reg = operands[!load];
18481   mem = operands[load];
18482
18483   mode = GET_MODE (reg);
18484
18485   gcc_assert (REG_P (reg));
18486   regno = REGNO (reg);
18487   nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18488   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18489               || NEON_REGNO_OK_FOR_QUAD (regno));
18490   gcc_assert (VALID_NEON_DREG_MODE (mode)
18491               || VALID_NEON_QREG_MODE (mode)
18492               || VALID_NEON_STRUCT_MODE (mode));
18493   gcc_assert (MEM_P (mem));
18494
18495   addr = XEXP (mem, 0);
18496
18497   /* Strip off const from addresses like (const (plus (...))).  */
18498   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18499     addr = XEXP (addr, 0);
18500
18501   switch (GET_CODE (addr))
18502     {
18503     case POST_INC:
18504       /* We have to use vldm / vstm for too-large modes.  */
18505       if (nregs > 4)
18506         {
18507           templ = "v%smia%%?\t%%0!, %%h1";
18508           ops[0] = XEXP (addr, 0);
18509         }
18510       else
18511         {
18512           templ = "v%s1.64\t%%h1, %%A0";
18513           ops[0] = mem;
18514         }
18515       ops[1] = reg;
18516       break;
18517
18518     case PRE_DEC:
18519       /* We have to use vldm / vstm in this case, since there is no
18520          pre-decrement form of the vld1 / vst1 instructions.  */
18521       templ = "v%smdb%%?\t%%0!, %%h1";
18522       ops[0] = XEXP (addr, 0);
18523       ops[1] = reg;
18524       break;
18525
18526     case POST_MODIFY:
18527       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18528       gcc_unreachable ();
18529
18530     case REG:
18531       /* We have to use vldm / vstm for too-large modes.  */
18532       if (nregs > 1)
18533         {
18534           if (nregs > 4)
18535             templ = "v%smia%%?\t%%m0, %%h1";
18536           else
18537             templ = "v%s1.64\t%%h1, %%A0";
18538
18539           ops[0] = mem;
18540           ops[1] = reg;
18541           break;
18542         }
18543       /* Fall through.  */
18544     case LABEL_REF:
18545     case PLUS:
18546       {
18547         int i;
18548         int overlap = -1;
18549         for (i = 0; i < nregs; i++)
18550           {
18551             /* We're only using DImode here because it's a convenient size.  */
18552             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18553             ops[1] = adjust_address (mem, DImode, 8 * i);
18554             if (reg_overlap_mentioned_p (ops[0], mem))
18555               {
18556                 gcc_assert (overlap == -1);
18557                 overlap = i;
18558               }
18559             else
18560               {
18561                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18562                 output_asm_insn (buff, ops);
18563               }
18564           }
18565         if (overlap != -1)
18566           {
18567             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18568             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18569             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18570             output_asm_insn (buff, ops);
18571           }
18572
18573         return "";
18574       }
18575
18576     default:
18577       gcc_unreachable ();
18578     }
18579
18580   sprintf (buff, templ, load ? "ld" : "st");
18581   output_asm_insn (buff, ops);
18582
18583   return "";
18584 }
18585
18586 /* Compute and return the length of neon_mov<mode>, where <mode> is
18587    one of VSTRUCT modes: EI, OI, CI or XI.  */
18588 int
18589 arm_attr_length_move_neon (rtx_insn *insn)
18590 {
18591   rtx reg, mem, addr;
18592   int load;
18593   machine_mode mode;
18594
18595   extract_insn_cached (insn);
18596
18597   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18598     {
18599       mode = GET_MODE (recog_data.operand[0]);
18600       switch (mode)
18601         {
18602         case EImode:
18603         case OImode:
18604           return 8;
18605         case CImode:
18606           return 12;
18607         case XImode:
18608           return 16;
18609         default:
18610           gcc_unreachable ();
18611         }
18612     }
18613
18614   load = REG_P (recog_data.operand[0]);
18615   reg = recog_data.operand[!load];
18616   mem = recog_data.operand[load];
18617
18618   gcc_assert (MEM_P (mem));
18619
18620   mode = GET_MODE (reg);
18621   addr = XEXP (mem, 0);
18622
18623   /* Strip off const from addresses like (const (plus (...))).  */
18624   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18625     addr = XEXP (addr, 0);
18626
18627   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18628     {
18629       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18630       return insns * 4;
18631     }
18632   else
18633     return 4;
18634 }
18635
18636 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18637    return zero.  */
18638
18639 int
18640 arm_address_offset_is_imm (rtx_insn *insn)
18641 {
18642   rtx mem, addr;
18643
18644   extract_insn_cached (insn);
18645
18646   if (REG_P (recog_data.operand[0]))
18647     return 0;
18648
18649   mem = recog_data.operand[0];
18650
18651   gcc_assert (MEM_P (mem));
18652
18653   addr = XEXP (mem, 0);
18654
18655   if (REG_P (addr)
18656       || (GET_CODE (addr) == PLUS
18657           && REG_P (XEXP (addr, 0))
18658           && CONST_INT_P (XEXP (addr, 1))))
18659     return 1;
18660   else
18661     return 0;
18662 }
18663
18664 /* Output an ADD r, s, #n where n may be too big for one instruction.
18665    If adding zero to one register, output nothing.  */
18666 const char *
18667 output_add_immediate (rtx *operands)
18668 {
18669   HOST_WIDE_INT n = INTVAL (operands[2]);
18670
18671   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18672     {
18673       if (n < 0)
18674         output_multi_immediate (operands,
18675                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18676                                 -n);
18677       else
18678         output_multi_immediate (operands,
18679                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18680                                 n);
18681     }
18682
18683   return "";
18684 }
18685
18686 /* Output a multiple immediate operation.
18687    OPERANDS is the vector of operands referred to in the output patterns.
18688    INSTR1 is the output pattern to use for the first constant.
18689    INSTR2 is the output pattern to use for subsequent constants.
18690    IMMED_OP is the index of the constant slot in OPERANDS.
18691    N is the constant value.  */
18692 static const char *
18693 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18694                         int immed_op, HOST_WIDE_INT n)
18695 {
18696 #if HOST_BITS_PER_WIDE_INT > 32
18697   n &= 0xffffffff;
18698 #endif
18699
18700   if (n == 0)
18701     {
18702       /* Quick and easy output.  */
18703       operands[immed_op] = const0_rtx;
18704       output_asm_insn (instr1, operands);
18705     }
18706   else
18707     {
18708       int i;
18709       const char * instr = instr1;
18710
18711       /* Note that n is never zero here (which would give no output).  */
18712       for (i = 0; i < 32; i += 2)
18713         {
18714           if (n & (3 << i))
18715             {
18716               operands[immed_op] = GEN_INT (n & (255 << i));
18717               output_asm_insn (instr, operands);
18718               instr = instr2;
18719               i += 6;
18720             }
18721         }
18722     }
18723
18724   return "";
18725 }
18726
18727 /* Return the name of a shifter operation.  */
18728 static const char *
18729 arm_shift_nmem(enum rtx_code code)
18730 {
18731   switch (code)
18732     {
18733     case ASHIFT:
18734       return ARM_LSL_NAME;
18735
18736     case ASHIFTRT:
18737       return "asr";
18738
18739     case LSHIFTRT:
18740       return "lsr";
18741
18742     case ROTATERT:
18743       return "ror";
18744
18745     default:
18746       abort();
18747     }
18748 }
18749
18750 /* Return the appropriate ARM instruction for the operation code.
18751    The returned result should not be overwritten.  OP is the rtx of the
18752    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18753    was shifted.  */
18754 const char *
18755 arithmetic_instr (rtx op, int shift_first_arg)
18756 {
18757   switch (GET_CODE (op))
18758     {
18759     case PLUS:
18760       return "add";
18761
18762     case MINUS:
18763       return shift_first_arg ? "rsb" : "sub";
18764
18765     case IOR:
18766       return "orr";
18767
18768     case XOR:
18769       return "eor";
18770
18771     case AND:
18772       return "and";
18773
18774     case ASHIFT:
18775     case ASHIFTRT:
18776     case LSHIFTRT:
18777     case ROTATERT:
18778       return arm_shift_nmem(GET_CODE(op));
18779
18780     default:
18781       gcc_unreachable ();
18782     }
18783 }
18784
18785 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18786    for the operation code.  The returned result should not be overwritten.
18787    OP is the rtx code of the shift.
18788    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18789    shift.  */
18790 static const char *
18791 shift_op (rtx op, HOST_WIDE_INT *amountp)
18792 {
18793   const char * mnem;
18794   enum rtx_code code = GET_CODE (op);
18795
18796   switch (code)
18797     {
18798     case ROTATE:
18799       if (!CONST_INT_P (XEXP (op, 1)))
18800         {
18801           output_operand_lossage ("invalid shift operand");
18802           return NULL;
18803         }
18804
18805       code = ROTATERT;
18806       *amountp = 32 - INTVAL (XEXP (op, 1));
18807       mnem = "ror";
18808       break;
18809
18810     case ASHIFT:
18811     case ASHIFTRT:
18812     case LSHIFTRT:
18813     case ROTATERT:
18814       mnem = arm_shift_nmem(code);
18815       if (CONST_INT_P (XEXP (op, 1)))
18816         {
18817           *amountp = INTVAL (XEXP (op, 1));
18818         }
18819       else if (REG_P (XEXP (op, 1)))
18820         {
18821           *amountp = -1;
18822           return mnem;
18823         }
18824       else
18825         {
18826           output_operand_lossage ("invalid shift operand");
18827           return NULL;
18828         }
18829       break;
18830
18831     case MULT:
18832       /* We never have to worry about the amount being other than a
18833          power of 2, since this case can never be reloaded from a reg.  */
18834       if (!CONST_INT_P (XEXP (op, 1)))
18835         {
18836           output_operand_lossage ("invalid shift operand");
18837           return NULL;
18838         }
18839
18840       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18841
18842       /* Amount must be a power of two.  */
18843       if (*amountp & (*amountp - 1))
18844         {
18845           output_operand_lossage ("invalid shift operand");
18846           return NULL;
18847         }
18848
18849       *amountp = exact_log2 (*amountp);
18850       gcc_assert (IN_RANGE (*amountp, 0, 31));
18851       return ARM_LSL_NAME;
18852
18853     default:
18854       output_operand_lossage ("invalid shift operand");
18855       return NULL;
18856     }
18857
18858   /* This is not 100% correct, but follows from the desire to merge
18859      multiplication by a power of 2 with the recognizer for a
18860      shift.  >=32 is not a valid shift for "lsl", so we must try and
18861      output a shift that produces the correct arithmetical result.
18862      Using lsr #32 is identical except for the fact that the carry bit
18863      is not set correctly if we set the flags; but we never use the
18864      carry bit from such an operation, so we can ignore that.  */
18865   if (code == ROTATERT)
18866     /* Rotate is just modulo 32.  */
18867     *amountp &= 31;
18868   else if (*amountp != (*amountp & 31))
18869     {
18870       if (code == ASHIFT)
18871         mnem = "lsr";
18872       *amountp = 32;
18873     }
18874
18875   /* Shifts of 0 are no-ops.  */
18876   if (*amountp == 0)
18877     return NULL;
18878
18879   return mnem;
18880 }
18881
18882 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
18883    because /bin/as is horribly restrictive.  The judgement about
18884    whether or not each character is 'printable' (and can be output as
18885    is) or not (and must be printed with an octal escape) must be made
18886    with reference to the *host* character set -- the situation is
18887    similar to that discussed in the comments above pp_c_char in
18888    c-pretty-print.c.  */
18889
18890 #define MAX_ASCII_LEN 51
18891
18892 void
18893 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18894 {
18895   int i;
18896   int len_so_far = 0;
18897
18898   fputs ("\t.ascii\t\"", stream);
18899
18900   for (i = 0; i < len; i++)
18901     {
18902       int c = p[i];
18903
18904       if (len_so_far >= MAX_ASCII_LEN)
18905         {
18906           fputs ("\"\n\t.ascii\t\"", stream);
18907           len_so_far = 0;
18908         }
18909
18910       if (ISPRINT (c))
18911         {
18912           if (c == '\\' || c == '\"')
18913             {
18914               putc ('\\', stream);
18915               len_so_far++;
18916             }
18917           putc (c, stream);
18918           len_so_far++;
18919         }
18920       else
18921         {
18922           fprintf (stream, "\\%03o", c);
18923           len_so_far += 4;
18924         }
18925     }
18926
18927   fputs ("\"\n", stream);
18928 }
18929 \f
18930 /* Whether a register is callee saved or not.  This is necessary because high
18931    registers are marked as caller saved when optimizing for size on Thumb-1
18932    targets despite being callee saved in order to avoid using them.  */
18933 #define callee_saved_reg_p(reg) \
18934   (!call_used_regs[reg] \
18935    || (TARGET_THUMB1 && optimize_size \
18936        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18937
18938 /* Compute the register save mask for registers 0 through 12
18939    inclusive.  This code is used by arm_compute_save_reg_mask.  */
18940
18941 static unsigned long
18942 arm_compute_save_reg0_reg12_mask (void)
18943 {
18944   unsigned long func_type = arm_current_func_type ();
18945   unsigned long save_reg_mask = 0;
18946   unsigned int reg;
18947
18948   if (IS_INTERRUPT (func_type))
18949     {
18950       unsigned int max_reg;
18951       /* Interrupt functions must not corrupt any registers,
18952          even call clobbered ones.  If this is a leaf function
18953          we can just examine the registers used by the RTL, but
18954          otherwise we have to assume that whatever function is
18955          called might clobber anything, and so we have to save
18956          all the call-clobbered registers as well.  */
18957       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18958         /* FIQ handlers have registers r8 - r12 banked, so
18959            we only need to check r0 - r7, Normal ISRs only
18960            bank r14 and r15, so we must check up to r12.
18961            r13 is the stack pointer which is always preserved,
18962            so we do not need to consider it here.  */
18963         max_reg = 7;
18964       else
18965         max_reg = 12;
18966
18967       for (reg = 0; reg <= max_reg; reg++)
18968         if (df_regs_ever_live_p (reg)
18969             || (! crtl->is_leaf && call_used_regs[reg]))
18970           save_reg_mask |= (1 << reg);
18971
18972       /* Also save the pic base register if necessary.  */
18973       if (flag_pic
18974           && !TARGET_SINGLE_PIC_BASE
18975           && arm_pic_register != INVALID_REGNUM
18976           && crtl->uses_pic_offset_table)
18977         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18978     }
18979   else if (IS_VOLATILE(func_type))
18980     {
18981       /* For noreturn functions we historically omitted register saves
18982          altogether.  However this really messes up debugging.  As a
18983          compromise save just the frame pointers.  Combined with the link
18984          register saved elsewhere this should be sufficient to get
18985          a backtrace.  */
18986       if (frame_pointer_needed)
18987         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18988       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18989         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18990       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18991         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18992     }
18993   else
18994     {
18995       /* In the normal case we only need to save those registers
18996          which are call saved and which are used by this function.  */
18997       for (reg = 0; reg <= 11; reg++)
18998         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
18999           save_reg_mask |= (1 << reg);
19000
19001       /* Handle the frame pointer as a special case.  */
19002       if (frame_pointer_needed)
19003         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19004
19005       /* If we aren't loading the PIC register,
19006          don't stack it even though it may be live.  */
19007       if (flag_pic
19008           && !TARGET_SINGLE_PIC_BASE
19009           && arm_pic_register != INVALID_REGNUM
19010           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19011               || crtl->uses_pic_offset_table))
19012         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19013
19014       /* The prologue will copy SP into R0, so save it.  */
19015       if (IS_STACKALIGN (func_type))
19016         save_reg_mask |= 1;
19017     }
19018
19019   /* Save registers so the exception handler can modify them.  */
19020   if (crtl->calls_eh_return)
19021     {
19022       unsigned int i;
19023
19024       for (i = 0; ; i++)
19025         {
19026           reg = EH_RETURN_DATA_REGNO (i);
19027           if (reg == INVALID_REGNUM)
19028             break;
19029           save_reg_mask |= 1 << reg;
19030         }
19031     }
19032
19033   return save_reg_mask;
19034 }
19035
19036 /* Return true if r3 is live at the start of the function.  */
19037
19038 static bool
19039 arm_r3_live_at_start_p (void)
19040 {
19041   /* Just look at cfg info, which is still close enough to correct at this
19042      point.  This gives false positives for broken functions that might use
19043      uninitialized data that happens to be allocated in r3, but who cares?  */
19044   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19045 }
19046
19047 /* Compute the number of bytes used to store the static chain register on the
19048    stack, above the stack frame.  We need to know this accurately to get the
19049    alignment of the rest of the stack frame correct.  */
19050
19051 static int
19052 arm_compute_static_chain_stack_bytes (void)
19053 {
19054   /* See the defining assertion in arm_expand_prologue.  */
19055   if (IS_NESTED (arm_current_func_type ())
19056       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19057           || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19058               && !df_regs_ever_live_p (LR_REGNUM)))
19059       && arm_r3_live_at_start_p ()
19060       && crtl->args.pretend_args_size == 0)
19061     return 4;
19062
19063   return 0;
19064 }
19065
19066 /* Compute a bit mask of which registers need to be
19067    saved on the stack for the current function.
19068    This is used by arm_get_frame_offsets, which may add extra registers.  */
19069
19070 static unsigned long
19071 arm_compute_save_reg_mask (void)
19072 {
19073   unsigned int save_reg_mask = 0;
19074   unsigned long func_type = arm_current_func_type ();
19075   unsigned int reg;
19076
19077   if (IS_NAKED (func_type))
19078     /* This should never really happen.  */
19079     return 0;
19080
19081   /* If we are creating a stack frame, then we must save the frame pointer,
19082      IP (which will hold the old stack pointer), LR and the PC.  */
19083   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19084     save_reg_mask |=
19085       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19086       | (1 << IP_REGNUM)
19087       | (1 << LR_REGNUM)
19088       | (1 << PC_REGNUM);
19089
19090   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19091
19092   /* Decide if we need to save the link register.
19093      Interrupt routines have their own banked link register,
19094      so they never need to save it.
19095      Otherwise if we do not use the link register we do not need to save
19096      it.  If we are pushing other registers onto the stack however, we
19097      can save an instruction in the epilogue by pushing the link register
19098      now and then popping it back into the PC.  This incurs extra memory
19099      accesses though, so we only do it when optimizing for size, and only
19100      if we know that we will not need a fancy return sequence.  */
19101   if (df_regs_ever_live_p (LR_REGNUM)
19102       || (save_reg_mask
19103           && optimize_size
19104           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19105           && !crtl->tail_call_emit
19106           && !crtl->calls_eh_return))
19107     save_reg_mask |= 1 << LR_REGNUM;
19108
19109   if (cfun->machine->lr_save_eliminated)
19110     save_reg_mask &= ~ (1 << LR_REGNUM);
19111
19112   if (TARGET_REALLY_IWMMXT
19113       && ((bit_count (save_reg_mask)
19114            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19115                            arm_compute_static_chain_stack_bytes())
19116            ) % 2) != 0)
19117     {
19118       /* The total number of registers that are going to be pushed
19119          onto the stack is odd.  We need to ensure that the stack
19120          is 64-bit aligned before we start to save iWMMXt registers,
19121          and also before we start to create locals.  (A local variable
19122          might be a double or long long which we will load/store using
19123          an iWMMXt instruction).  Therefore we need to push another
19124          ARM register, so that the stack will be 64-bit aligned.  We
19125          try to avoid using the arg registers (r0 -r3) as they might be
19126          used to pass values in a tail call.  */
19127       for (reg = 4; reg <= 12; reg++)
19128         if ((save_reg_mask & (1 << reg)) == 0)
19129           break;
19130
19131       if (reg <= 12)
19132         save_reg_mask |= (1 << reg);
19133       else
19134         {
19135           cfun->machine->sibcall_blocked = 1;
19136           save_reg_mask |= (1 << 3);
19137         }
19138     }
19139
19140   /* We may need to push an additional register for use initializing the
19141      PIC base register.  */
19142   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19143       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19144     {
19145       reg = thumb_find_work_register (1 << 4);
19146       if (!call_used_regs[reg])
19147         save_reg_mask |= (1 << reg);
19148     }
19149
19150   return save_reg_mask;
19151 }
19152
19153 /* Compute a bit mask of which registers need to be
19154    saved on the stack for the current function.  */
19155 static unsigned long
19156 thumb1_compute_save_reg_mask (void)
19157 {
19158   unsigned long mask;
19159   unsigned reg;
19160
19161   mask = 0;
19162   for (reg = 0; reg < 12; reg ++)
19163     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19164       mask |= 1 << reg;
19165
19166   /* Handle the frame pointer as a special case.  */
19167   if (frame_pointer_needed)
19168     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19169
19170   if (flag_pic
19171       && !TARGET_SINGLE_PIC_BASE
19172       && arm_pic_register != INVALID_REGNUM
19173       && crtl->uses_pic_offset_table)
19174     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19175
19176   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19177   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19178     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19179
19180   /* LR will also be pushed if any lo regs are pushed.  */
19181   if (mask & 0xff || thumb_force_lr_save ())
19182     mask |= (1 << LR_REGNUM);
19183
19184   /* Make sure we have a low work register if we need one.
19185      We will need one if we are going to push a high register,
19186      but we are not currently intending to push a low register.  */
19187   if ((mask & 0xff) == 0
19188       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19189     {
19190       /* Use thumb_find_work_register to choose which register
19191          we will use.  If the register is live then we will
19192          have to push it.  Use LAST_LO_REGNUM as our fallback
19193          choice for the register to select.  */
19194       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19195       /* Make sure the register returned by thumb_find_work_register is
19196          not part of the return value.  */
19197       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19198         reg = LAST_LO_REGNUM;
19199
19200       if (callee_saved_reg_p (reg))
19201         mask |= 1 << reg;
19202     }
19203
19204   /* The 504 below is 8 bytes less than 512 because there are two possible
19205      alignment words.  We can't tell here if they will be present or not so we
19206      have to play it safe and assume that they are. */
19207   if ((CALLER_INTERWORKING_SLOT_SIZE +
19208        ROUND_UP_WORD (get_frame_size ()) +
19209        crtl->outgoing_args_size) >= 504)
19210     {
19211       /* This is the same as the code in thumb1_expand_prologue() which
19212          determines which register to use for stack decrement. */
19213       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19214         if (mask & (1 << reg))
19215           break;
19216
19217       if (reg > LAST_LO_REGNUM)
19218         {
19219           /* Make sure we have a register available for stack decrement. */
19220           mask |= 1 << LAST_LO_REGNUM;
19221         }
19222     }
19223
19224   return mask;
19225 }
19226
19227
19228 /* Return the number of bytes required to save VFP registers.  */
19229 static int
19230 arm_get_vfp_saved_size (void)
19231 {
19232   unsigned int regno;
19233   int count;
19234   int saved;
19235
19236   saved = 0;
19237   /* Space for saved VFP registers.  */
19238   if (TARGET_HARD_FLOAT)
19239     {
19240       count = 0;
19241       for (regno = FIRST_VFP_REGNUM;
19242            regno < LAST_VFP_REGNUM;
19243            regno += 2)
19244         {
19245           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19246               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19247             {
19248               if (count > 0)
19249                 {
19250                   /* Workaround ARM10 VFPr1 bug.  */
19251                   if (count == 2 && !arm_arch6)
19252                     count++;
19253                   saved += count * 8;
19254                 }
19255               count = 0;
19256             }
19257           else
19258             count++;
19259         }
19260       if (count > 0)
19261         {
19262           if (count == 2 && !arm_arch6)
19263             count++;
19264           saved += count * 8;
19265         }
19266     }
19267   return saved;
19268 }
19269
19270
19271 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19272    everything bar the final return instruction.  If simple_return is true,
19273    then do not output epilogue, because it has already been emitted in RTL.  */
19274 const char *
19275 output_return_instruction (rtx operand, bool really_return, bool reverse,
19276                            bool simple_return)
19277 {
19278   char conditional[10];
19279   char instr[100];
19280   unsigned reg;
19281   unsigned long live_regs_mask;
19282   unsigned long func_type;
19283   arm_stack_offsets *offsets;
19284
19285   func_type = arm_current_func_type ();
19286
19287   if (IS_NAKED (func_type))
19288     return "";
19289
19290   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19291     {
19292       /* If this function was declared non-returning, and we have
19293          found a tail call, then we have to trust that the called
19294          function won't return.  */
19295       if (really_return)
19296         {
19297           rtx ops[2];
19298
19299           /* Otherwise, trap an attempted return by aborting.  */
19300           ops[0] = operand;
19301           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19302                                        : "abort");
19303           assemble_external_libcall (ops[1]);
19304           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19305         }
19306
19307       return "";
19308     }
19309
19310   gcc_assert (!cfun->calls_alloca || really_return);
19311
19312   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19313
19314   cfun->machine->return_used_this_function = 1;
19315
19316   offsets = arm_get_frame_offsets ();
19317   live_regs_mask = offsets->saved_regs_mask;
19318
19319   if (!simple_return && live_regs_mask)
19320     {
19321       const char * return_reg;
19322
19323       /* If we do not have any special requirements for function exit
19324          (e.g. interworking) then we can load the return address
19325          directly into the PC.  Otherwise we must load it into LR.  */
19326       if (really_return
19327           && !IS_CMSE_ENTRY (func_type)
19328           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19329         return_reg = reg_names[PC_REGNUM];
19330       else
19331         return_reg = reg_names[LR_REGNUM];
19332
19333       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19334         {
19335           /* There are three possible reasons for the IP register
19336              being saved.  1) a stack frame was created, in which case
19337              IP contains the old stack pointer, or 2) an ISR routine
19338              corrupted it, or 3) it was saved to align the stack on
19339              iWMMXt.  In case 1, restore IP into SP, otherwise just
19340              restore IP.  */
19341           if (frame_pointer_needed)
19342             {
19343               live_regs_mask &= ~ (1 << IP_REGNUM);
19344               live_regs_mask |=   (1 << SP_REGNUM);
19345             }
19346           else
19347             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19348         }
19349
19350       /* On some ARM architectures it is faster to use LDR rather than
19351          LDM to load a single register.  On other architectures, the
19352          cost is the same.  In 26 bit mode, or for exception handlers,
19353          we have to use LDM to load the PC so that the CPSR is also
19354          restored.  */
19355       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19356         if (live_regs_mask == (1U << reg))
19357           break;
19358
19359       if (reg <= LAST_ARM_REGNUM
19360           && (reg != LR_REGNUM
19361               || ! really_return
19362               || ! IS_INTERRUPT (func_type)))
19363         {
19364           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19365                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19366         }
19367       else
19368         {
19369           char *p;
19370           int first = 1;
19371
19372           /* Generate the load multiple instruction to restore the
19373              registers.  Note we can get here, even if
19374              frame_pointer_needed is true, but only if sp already
19375              points to the base of the saved core registers.  */
19376           if (live_regs_mask & (1 << SP_REGNUM))
19377             {
19378               unsigned HOST_WIDE_INT stack_adjust;
19379
19380               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19381               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19382
19383               if (stack_adjust && arm_arch5 && TARGET_ARM)
19384                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19385               else
19386                 {
19387                   /* If we can't use ldmib (SA110 bug),
19388                      then try to pop r3 instead.  */
19389                   if (stack_adjust)
19390                     live_regs_mask |= 1 << 3;
19391
19392                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19393                 }
19394             }
19395           /* For interrupt returns we have to use an LDM rather than
19396              a POP so that we can use the exception return variant.  */
19397           else if (IS_INTERRUPT (func_type))
19398             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19399           else
19400             sprintf (instr, "pop%s\t{", conditional);
19401
19402           p = instr + strlen (instr);
19403
19404           for (reg = 0; reg <= SP_REGNUM; reg++)
19405             if (live_regs_mask & (1 << reg))
19406               {
19407                 int l = strlen (reg_names[reg]);
19408
19409                 if (first)
19410                   first = 0;
19411                 else
19412                   {
19413                     memcpy (p, ", ", 2);
19414                     p += 2;
19415                   }
19416
19417                 memcpy (p, "%|", 2);
19418                 memcpy (p + 2, reg_names[reg], l);
19419                 p += l + 2;
19420               }
19421
19422           if (live_regs_mask & (1 << LR_REGNUM))
19423             {
19424               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19425               /* If returning from an interrupt, restore the CPSR.  */
19426               if (IS_INTERRUPT (func_type))
19427                 strcat (p, "^");
19428             }
19429           else
19430             strcpy (p, "}");
19431         }
19432
19433       output_asm_insn (instr, & operand);
19434
19435       /* See if we need to generate an extra instruction to
19436          perform the actual function return.  */
19437       if (really_return
19438           && func_type != ARM_FT_INTERWORKED
19439           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19440         {
19441           /* The return has already been handled
19442              by loading the LR into the PC.  */
19443           return "";
19444         }
19445     }
19446
19447   if (really_return)
19448     {
19449       switch ((int) ARM_FUNC_TYPE (func_type))
19450         {
19451         case ARM_FT_ISR:
19452         case ARM_FT_FIQ:
19453           /* ??? This is wrong for unified assembly syntax.  */
19454           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19455           break;
19456
19457         case ARM_FT_INTERWORKED:
19458           gcc_assert (arm_arch5 || arm_arch4t);
19459           sprintf (instr, "bx%s\t%%|lr", conditional);
19460           break;
19461
19462         case ARM_FT_EXCEPTION:
19463           /* ??? This is wrong for unified assembly syntax.  */
19464           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19465           break;
19466
19467         default:
19468           if (IS_CMSE_ENTRY (func_type))
19469             {
19470               /* Check if we have to clear the 'GE bits' which is only used if
19471                  parallel add and subtraction instructions are available.  */
19472               if (TARGET_INT_SIMD)
19473                 snprintf (instr, sizeof (instr),
19474                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19475               else
19476                 snprintf (instr, sizeof (instr),
19477                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19478
19479               output_asm_insn (instr, & operand);
19480               if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19481                 {
19482                   /* Clear the cumulative exception-status bits (0-4,7) and the
19483                      condition code bits (28-31) of the FPSCR.  We need to
19484                      remember to clear the first scratch register used (IP) and
19485                      save and restore the second (r4).  */
19486                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19487                   output_asm_insn (instr, & operand);
19488                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19489                   output_asm_insn (instr, & operand);
19490                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19491                   output_asm_insn (instr, & operand);
19492                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19493                   output_asm_insn (instr, & operand);
19494                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19495                   output_asm_insn (instr, & operand);
19496                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19497                   output_asm_insn (instr, & operand);
19498                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19499                   output_asm_insn (instr, & operand);
19500                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19501                   output_asm_insn (instr, & operand);
19502                 }
19503               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19504             }
19505           /* Use bx if it's available.  */
19506           else if (arm_arch5 || arm_arch4t)
19507             sprintf (instr, "bx%s\t%%|lr", conditional);
19508           else
19509             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19510           break;
19511         }
19512
19513       output_asm_insn (instr, & operand);
19514     }
19515
19516   return "";
19517 }
19518
19519 /* Output in FILE asm statements needed to declare the NAME of the function
19520    defined by its DECL node.  */
19521
19522 void
19523 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19524 {
19525   size_t cmse_name_len;
19526   char *cmse_name = 0;
19527   char cmse_prefix[] = "__acle_se_";
19528
19529   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19530      extra function label for each function with the 'cmse_nonsecure_entry'
19531      attribute.  This extra function label should be prepended with
19532      '__acle_se_', telling the linker that it needs to create secure gateway
19533      veneers for this function.  */
19534   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19535                                     DECL_ATTRIBUTES (decl)))
19536     {
19537       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19538       cmse_name = XALLOCAVEC (char, cmse_name_len);
19539       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19540       targetm.asm_out.globalize_label (file, cmse_name);
19541
19542       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19543       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19544     }
19545
19546   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19547   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19548   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19549   ASM_OUTPUT_LABEL (file, name);
19550
19551   if (cmse_name)
19552     ASM_OUTPUT_LABEL (file, cmse_name);
19553
19554   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19555 }
19556
19557 /* Write the function name into the code section, directly preceding
19558    the function prologue.
19559
19560    Code will be output similar to this:
19561      t0
19562          .ascii "arm_poke_function_name", 0
19563          .align
19564      t1
19565          .word 0xff000000 + (t1 - t0)
19566      arm_poke_function_name
19567          mov     ip, sp
19568          stmfd   sp!, {fp, ip, lr, pc}
19569          sub     fp, ip, #4
19570
19571    When performing a stack backtrace, code can inspect the value
19572    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19573    at location pc - 12 and the top 8 bits are set, then we know
19574    that there is a function name embedded immediately preceding this
19575    location and has length ((pc[-3]) & 0xff000000).
19576
19577    We assume that pc is declared as a pointer to an unsigned long.
19578
19579    It is of no benefit to output the function name if we are assembling
19580    a leaf function.  These function types will not contain a stack
19581    backtrace structure, therefore it is not possible to determine the
19582    function name.  */
19583 void
19584 arm_poke_function_name (FILE *stream, const char *name)
19585 {
19586   unsigned long alignlength;
19587   unsigned long length;
19588   rtx           x;
19589
19590   length      = strlen (name) + 1;
19591   alignlength = ROUND_UP_WORD (length);
19592
19593   ASM_OUTPUT_ASCII (stream, name, length);
19594   ASM_OUTPUT_ALIGN (stream, 2);
19595   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19596   assemble_aligned_integer (UNITS_PER_WORD, x);
19597 }
19598
19599 /* Place some comments into the assembler stream
19600    describing the current function.  */
19601 static void
19602 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19603 {
19604   unsigned long func_type;
19605
19606   /* Sanity check.  */
19607   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19608
19609   func_type = arm_current_func_type ();
19610
19611   switch ((int) ARM_FUNC_TYPE (func_type))
19612     {
19613     default:
19614     case ARM_FT_NORMAL:
19615       break;
19616     case ARM_FT_INTERWORKED:
19617       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19618       break;
19619     case ARM_FT_ISR:
19620       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19621       break;
19622     case ARM_FT_FIQ:
19623       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19624       break;
19625     case ARM_FT_EXCEPTION:
19626       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19627       break;
19628     }
19629
19630   if (IS_NAKED (func_type))
19631     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19632
19633   if (IS_VOLATILE (func_type))
19634     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19635
19636   if (IS_NESTED (func_type))
19637     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19638   if (IS_STACKALIGN (func_type))
19639     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19640   if (IS_CMSE_ENTRY (func_type))
19641     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19642
19643   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19644                crtl->args.size,
19645                crtl->args.pretend_args_size, frame_size);
19646
19647   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19648                frame_pointer_needed,
19649                cfun->machine->uses_anonymous_args);
19650
19651   if (cfun->machine->lr_save_eliminated)
19652     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19653
19654   if (crtl->calls_eh_return)
19655     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19656
19657 }
19658
19659 static void
19660 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19661                               HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19662 {
19663   arm_stack_offsets *offsets;
19664
19665   if (TARGET_THUMB1)
19666     {
19667       int regno;
19668
19669       /* Emit any call-via-reg trampolines that are needed for v4t support
19670          of call_reg and call_value_reg type insns.  */
19671       for (regno = 0; regno < LR_REGNUM; regno++)
19672         {
19673           rtx label = cfun->machine->call_via[regno];
19674
19675           if (label != NULL)
19676             {
19677               switch_to_section (function_section (current_function_decl));
19678               targetm.asm_out.internal_label (asm_out_file, "L",
19679                                               CODE_LABEL_NUMBER (label));
19680               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19681             }
19682         }
19683
19684       /* ??? Probably not safe to set this here, since it assumes that a
19685          function will be emitted as assembly immediately after we generate
19686          RTL for it.  This does not happen for inline functions.  */
19687       cfun->machine->return_used_this_function = 0;
19688     }
19689   else /* TARGET_32BIT */
19690     {
19691       /* We need to take into account any stack-frame rounding.  */
19692       offsets = arm_get_frame_offsets ();
19693
19694       gcc_assert (!use_return_insn (FALSE, NULL)
19695                   || (cfun->machine->return_used_this_function != 0)
19696                   || offsets->saved_regs == offsets->outgoing_args
19697                   || frame_pointer_needed);
19698     }
19699 }
19700
19701 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19702    STR and STRD.  If an even number of registers are being pushed, one
19703    or more STRD patterns are created for each register pair.  If an
19704    odd number of registers are pushed, emit an initial STR followed by
19705    as many STRD instructions as are needed.  This works best when the
19706    stack is initially 64-bit aligned (the normal case), since it
19707    ensures that each STRD is also 64-bit aligned.  */
19708 static void
19709 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19710 {
19711   int num_regs = 0;
19712   int i;
19713   int regno;
19714   rtx par = NULL_RTX;
19715   rtx dwarf = NULL_RTX;
19716   rtx tmp;
19717   bool first = true;
19718
19719   num_regs = bit_count (saved_regs_mask);
19720
19721   /* Must be at least one register to save, and can't save SP or PC.  */
19722   gcc_assert (num_regs > 0 && num_regs <= 14);
19723   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19724   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19725
19726   /* Create sequence for DWARF info.  All the frame-related data for
19727      debugging is held in this wrapper.  */
19728   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19729
19730   /* Describe the stack adjustment.  */
19731   tmp = gen_rtx_SET (stack_pointer_rtx,
19732                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19733   RTX_FRAME_RELATED_P (tmp) = 1;
19734   XVECEXP (dwarf, 0, 0) = tmp;
19735
19736   /* Find the first register.  */
19737   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19738     ;
19739
19740   i = 0;
19741
19742   /* If there's an odd number of registers to push.  Start off by
19743      pushing a single register.  This ensures that subsequent strd
19744      operations are dword aligned (assuming that SP was originally
19745      64-bit aligned).  */
19746   if ((num_regs & 1) != 0)
19747     {
19748       rtx reg, mem, insn;
19749
19750       reg = gen_rtx_REG (SImode, regno);
19751       if (num_regs == 1)
19752         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19753                                                      stack_pointer_rtx));
19754       else
19755         mem = gen_frame_mem (Pmode,
19756                              gen_rtx_PRE_MODIFY
19757                              (Pmode, stack_pointer_rtx,
19758                               plus_constant (Pmode, stack_pointer_rtx,
19759                                              -4 * num_regs)));
19760
19761       tmp = gen_rtx_SET (mem, reg);
19762       RTX_FRAME_RELATED_P (tmp) = 1;
19763       insn = emit_insn (tmp);
19764       RTX_FRAME_RELATED_P (insn) = 1;
19765       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19766       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19767       RTX_FRAME_RELATED_P (tmp) = 1;
19768       i++;
19769       regno++;
19770       XVECEXP (dwarf, 0, i) = tmp;
19771       first = false;
19772     }
19773
19774   while (i < num_regs)
19775     if (saved_regs_mask & (1 << regno))
19776       {
19777         rtx reg1, reg2, mem1, mem2;
19778         rtx tmp0, tmp1, tmp2;
19779         int regno2;
19780
19781         /* Find the register to pair with this one.  */
19782         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19783              regno2++)
19784           ;
19785
19786         reg1 = gen_rtx_REG (SImode, regno);
19787         reg2 = gen_rtx_REG (SImode, regno2);
19788
19789         if (first)
19790           {
19791             rtx insn;
19792
19793             first = false;
19794             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19795                                                         stack_pointer_rtx,
19796                                                         -4 * num_regs));
19797             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19798                                                         stack_pointer_rtx,
19799                                                         -4 * (num_regs - 1)));
19800             tmp0 = gen_rtx_SET (stack_pointer_rtx,
19801                                 plus_constant (Pmode, stack_pointer_rtx,
19802                                                -4 * (num_regs)));
19803             tmp1 = gen_rtx_SET (mem1, reg1);
19804             tmp2 = gen_rtx_SET (mem2, reg2);
19805             RTX_FRAME_RELATED_P (tmp0) = 1;
19806             RTX_FRAME_RELATED_P (tmp1) = 1;
19807             RTX_FRAME_RELATED_P (tmp2) = 1;
19808             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19809             XVECEXP (par, 0, 0) = tmp0;
19810             XVECEXP (par, 0, 1) = tmp1;
19811             XVECEXP (par, 0, 2) = tmp2;
19812             insn = emit_insn (par);
19813             RTX_FRAME_RELATED_P (insn) = 1;
19814             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19815           }
19816         else
19817           {
19818             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19819                                                         stack_pointer_rtx,
19820                                                         4 * i));
19821             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19822                                                         stack_pointer_rtx,
19823                                                         4 * (i + 1)));
19824             tmp1 = gen_rtx_SET (mem1, reg1);
19825             tmp2 = gen_rtx_SET (mem2, reg2);
19826             RTX_FRAME_RELATED_P (tmp1) = 1;
19827             RTX_FRAME_RELATED_P (tmp2) = 1;
19828             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19829             XVECEXP (par, 0, 0) = tmp1;
19830             XVECEXP (par, 0, 1) = tmp2;
19831             emit_insn (par);
19832           }
19833
19834         /* Create unwind information.  This is an approximation.  */
19835         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19836                                            plus_constant (Pmode,
19837                                                           stack_pointer_rtx,
19838                                                           4 * i)),
19839                             reg1);
19840         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19841                                            plus_constant (Pmode,
19842                                                           stack_pointer_rtx,
19843                                                           4 * (i + 1))),
19844                             reg2);
19845
19846         RTX_FRAME_RELATED_P (tmp1) = 1;
19847         RTX_FRAME_RELATED_P (tmp2) = 1;
19848         XVECEXP (dwarf, 0, i + 1) = tmp1;
19849         XVECEXP (dwarf, 0, i + 2) = tmp2;
19850         i += 2;
19851         regno = regno2 + 1;
19852       }
19853     else
19854       regno++;
19855
19856   return;
19857 }
19858
19859 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19860    whenever possible, otherwise it emits single-word stores.  The first store
19861    also allocates stack space for all saved registers, using writeback with
19862    post-addressing mode.  All other stores use offset addressing.  If no STRD
19863    can be emitted, this function emits a sequence of single-word stores,
19864    and not an STM as before, because single-word stores provide more freedom
19865    scheduling and can be turned into an STM by peephole optimizations.  */
19866 static void
19867 arm_emit_strd_push (unsigned long saved_regs_mask)
19868 {
19869   int num_regs = 0;
19870   int i, j, dwarf_index  = 0;
19871   int offset = 0;
19872   rtx dwarf = NULL_RTX;
19873   rtx insn = NULL_RTX;
19874   rtx tmp, mem;
19875
19876   /* TODO: A more efficient code can be emitted by changing the
19877      layout, e.g., first push all pairs that can use STRD to keep the
19878      stack aligned, and then push all other registers.  */
19879   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19880     if (saved_regs_mask & (1 << i))
19881       num_regs++;
19882
19883   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19884   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19885   gcc_assert (num_regs > 0);
19886
19887   /* Create sequence for DWARF info.  */
19888   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19889
19890   /* For dwarf info, we generate explicit stack update.  */
19891   tmp = gen_rtx_SET (stack_pointer_rtx,
19892                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19893   RTX_FRAME_RELATED_P (tmp) = 1;
19894   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19895
19896   /* Save registers.  */
19897   offset = - 4 * num_regs;
19898   j = 0;
19899   while (j <= LAST_ARM_REGNUM)
19900     if (saved_regs_mask & (1 << j))
19901       {
19902         if ((j % 2 == 0)
19903             && (saved_regs_mask & (1 << (j + 1))))
19904           {
19905             /* Current register and previous register form register pair for
19906                which STRD can be generated.  */
19907             if (offset < 0)
19908               {
19909                 /* Allocate stack space for all saved registers.  */
19910                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19911                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19912                 mem = gen_frame_mem (DImode, tmp);
19913                 offset = 0;
19914               }
19915             else if (offset > 0)
19916               mem = gen_frame_mem (DImode,
19917                                    plus_constant (Pmode,
19918                                                   stack_pointer_rtx,
19919                                                   offset));
19920             else
19921               mem = gen_frame_mem (DImode, stack_pointer_rtx);
19922
19923             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19924             RTX_FRAME_RELATED_P (tmp) = 1;
19925             tmp = emit_insn (tmp);
19926
19927             /* Record the first store insn.  */
19928             if (dwarf_index == 1)
19929               insn = tmp;
19930
19931             /* Generate dwarf info.  */
19932             mem = gen_frame_mem (SImode,
19933                                  plus_constant (Pmode,
19934                                                 stack_pointer_rtx,
19935                                                 offset));
19936             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19937             RTX_FRAME_RELATED_P (tmp) = 1;
19938             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19939
19940             mem = gen_frame_mem (SImode,
19941                                  plus_constant (Pmode,
19942                                                 stack_pointer_rtx,
19943                                                 offset + 4));
19944             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
19945             RTX_FRAME_RELATED_P (tmp) = 1;
19946             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19947
19948             offset += 8;
19949             j += 2;
19950           }
19951         else
19952           {
19953             /* Emit a single word store.  */
19954             if (offset < 0)
19955               {
19956                 /* Allocate stack space for all saved registers.  */
19957                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19958                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19959                 mem = gen_frame_mem (SImode, tmp);
19960                 offset = 0;
19961               }
19962             else if (offset > 0)
19963               mem = gen_frame_mem (SImode,
19964                                    plus_constant (Pmode,
19965                                                   stack_pointer_rtx,
19966                                                   offset));
19967             else
19968               mem = gen_frame_mem (SImode, stack_pointer_rtx);
19969
19970             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19971             RTX_FRAME_RELATED_P (tmp) = 1;
19972             tmp = emit_insn (tmp);
19973
19974             /* Record the first store insn.  */
19975             if (dwarf_index == 1)
19976               insn = tmp;
19977
19978             /* Generate dwarf info.  */
19979             mem = gen_frame_mem (SImode,
19980                                  plus_constant(Pmode,
19981                                                stack_pointer_rtx,
19982                                                offset));
19983             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19984             RTX_FRAME_RELATED_P (tmp) = 1;
19985             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19986
19987             offset += 4;
19988             j += 1;
19989           }
19990       }
19991     else
19992       j++;
19993
19994   /* Attach dwarf info to the first insn we generate.  */
19995   gcc_assert (insn != NULL_RTX);
19996   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19997   RTX_FRAME_RELATED_P (insn) = 1;
19998 }
19999
20000 /* Generate and emit an insn that we will recognize as a push_multi.
20001    Unfortunately, since this insn does not reflect very well the actual
20002    semantics of the operation, we need to annotate the insn for the benefit
20003    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20004    MASK for registers that should be annotated for DWARF2 frame unwind
20005    information.  */
20006 static rtx
20007 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20008 {
20009   int num_regs = 0;
20010   int num_dwarf_regs = 0;
20011   int i, j;
20012   rtx par;
20013   rtx dwarf;
20014   int dwarf_par_index;
20015   rtx tmp, reg;
20016
20017   /* We don't record the PC in the dwarf frame information.  */
20018   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20019
20020   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20021     {
20022       if (mask & (1 << i))
20023         num_regs++;
20024       if (dwarf_regs_mask & (1 << i))
20025         num_dwarf_regs++;
20026     }
20027
20028   gcc_assert (num_regs && num_regs <= 16);
20029   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20030
20031   /* For the body of the insn we are going to generate an UNSPEC in
20032      parallel with several USEs.  This allows the insn to be recognized
20033      by the push_multi pattern in the arm.md file.
20034
20035      The body of the insn looks something like this:
20036
20037        (parallel [
20038            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20039                                         (const_int:SI <num>)))
20040                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20041            (use (reg:SI XX))
20042            (use (reg:SI YY))
20043            ...
20044         ])
20045
20046      For the frame note however, we try to be more explicit and actually
20047      show each register being stored into the stack frame, plus a (single)
20048      decrement of the stack pointer.  We do it this way in order to be
20049      friendly to the stack unwinding code, which only wants to see a single
20050      stack decrement per instruction.  The RTL we generate for the note looks
20051      something like this:
20052
20053       (sequence [
20054            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20055            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20056            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20057            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20058            ...
20059         ])
20060
20061      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20062      instead we'd have a parallel expression detailing all
20063      the stores to the various memory addresses so that debug
20064      information is more up-to-date. Remember however while writing
20065      this to take care of the constraints with the push instruction.
20066
20067      Note also that this has to be taken care of for the VFP registers.
20068
20069      For more see PR43399.  */
20070
20071   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20072   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20073   dwarf_par_index = 1;
20074
20075   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20076     {
20077       if (mask & (1 << i))
20078         {
20079           reg = gen_rtx_REG (SImode, i);
20080
20081           XVECEXP (par, 0, 0)
20082             = gen_rtx_SET (gen_frame_mem
20083                            (BLKmode,
20084                             gen_rtx_PRE_MODIFY (Pmode,
20085                                                 stack_pointer_rtx,
20086                                                 plus_constant
20087                                                 (Pmode, stack_pointer_rtx,
20088                                                  -4 * num_regs))
20089                             ),
20090                            gen_rtx_UNSPEC (BLKmode,
20091                                            gen_rtvec (1, reg),
20092                                            UNSPEC_PUSH_MULT));
20093
20094           if (dwarf_regs_mask & (1 << i))
20095             {
20096               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20097                                  reg);
20098               RTX_FRAME_RELATED_P (tmp) = 1;
20099               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20100             }
20101
20102           break;
20103         }
20104     }
20105
20106   for (j = 1, i++; j < num_regs; i++)
20107     {
20108       if (mask & (1 << i))
20109         {
20110           reg = gen_rtx_REG (SImode, i);
20111
20112           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20113
20114           if (dwarf_regs_mask & (1 << i))
20115             {
20116               tmp
20117                 = gen_rtx_SET (gen_frame_mem
20118                                (SImode,
20119                                 plus_constant (Pmode, stack_pointer_rtx,
20120                                                4 * j)),
20121                                reg);
20122               RTX_FRAME_RELATED_P (tmp) = 1;
20123               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20124             }
20125
20126           j++;
20127         }
20128     }
20129
20130   par = emit_insn (par);
20131
20132   tmp = gen_rtx_SET (stack_pointer_rtx,
20133                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20134   RTX_FRAME_RELATED_P (tmp) = 1;
20135   XVECEXP (dwarf, 0, 0) = tmp;
20136
20137   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20138
20139   return par;
20140 }
20141
20142 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20143    SIZE is the offset to be adjusted.
20144    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20145 static void
20146 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20147 {
20148   rtx dwarf;
20149
20150   RTX_FRAME_RELATED_P (insn) = 1;
20151   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20152   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20153 }
20154
20155 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20156    SAVED_REGS_MASK shows which registers need to be restored.
20157
20158    Unfortunately, since this insn does not reflect very well the actual
20159    semantics of the operation, we need to annotate the insn for the benefit
20160    of DWARF2 frame unwind information.  */
20161 static void
20162 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20163 {
20164   int num_regs = 0;
20165   int i, j;
20166   rtx par;
20167   rtx dwarf = NULL_RTX;
20168   rtx tmp, reg;
20169   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20170   int offset_adj;
20171   int emit_update;
20172
20173   offset_adj = return_in_pc ? 1 : 0;
20174   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20175     if (saved_regs_mask & (1 << i))
20176       num_regs++;
20177
20178   gcc_assert (num_regs && num_regs <= 16);
20179
20180   /* If SP is in reglist, then we don't emit SP update insn.  */
20181   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20182
20183   /* The parallel needs to hold num_regs SETs
20184      and one SET for the stack update.  */
20185   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20186
20187   if (return_in_pc)
20188     XVECEXP (par, 0, 0) = ret_rtx;
20189
20190   if (emit_update)
20191     {
20192       /* Increment the stack pointer, based on there being
20193          num_regs 4-byte registers to restore.  */
20194       tmp = gen_rtx_SET (stack_pointer_rtx,
20195                          plus_constant (Pmode,
20196                                         stack_pointer_rtx,
20197                                         4 * num_regs));
20198       RTX_FRAME_RELATED_P (tmp) = 1;
20199       XVECEXP (par, 0, offset_adj) = tmp;
20200     }
20201
20202   /* Now restore every reg, which may include PC.  */
20203   for (j = 0, i = 0; j < num_regs; i++)
20204     if (saved_regs_mask & (1 << i))
20205       {
20206         reg = gen_rtx_REG (SImode, i);
20207         if ((num_regs == 1) && emit_update && !return_in_pc)
20208           {
20209             /* Emit single load with writeback.  */
20210             tmp = gen_frame_mem (SImode,
20211                                  gen_rtx_POST_INC (Pmode,
20212                                                    stack_pointer_rtx));
20213             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20214             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20215             return;
20216           }
20217
20218         tmp = gen_rtx_SET (reg,
20219                            gen_frame_mem
20220                            (SImode,
20221                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20222         RTX_FRAME_RELATED_P (tmp) = 1;
20223         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20224
20225         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20226            should not have PC, skip PC.  */
20227         if (i != PC_REGNUM)
20228           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20229
20230         j++;
20231       }
20232
20233   if (return_in_pc)
20234     par = emit_jump_insn (par);
20235   else
20236     par = emit_insn (par);
20237
20238   REG_NOTES (par) = dwarf;
20239   if (!return_in_pc)
20240     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20241                                  stack_pointer_rtx, stack_pointer_rtx);
20242 }
20243
20244 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20245    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20246
20247    Unfortunately, since this insn does not reflect very well the actual
20248    semantics of the operation, we need to annotate the insn for the benefit
20249    of DWARF2 frame unwind information.  */
20250 static void
20251 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20252 {
20253   int i, j;
20254   rtx par;
20255   rtx dwarf = NULL_RTX;
20256   rtx tmp, reg;
20257
20258   gcc_assert (num_regs && num_regs <= 32);
20259
20260     /* Workaround ARM10 VFPr1 bug.  */
20261   if (num_regs == 2 && !arm_arch6)
20262     {
20263       if (first_reg == 15)
20264         first_reg--;
20265
20266       num_regs++;
20267     }
20268
20269   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20270      there could be up to 32 D-registers to restore.
20271      If there are more than 16 D-registers, make two recursive calls,
20272      each of which emits one pop_multi instruction.  */
20273   if (num_regs > 16)
20274     {
20275       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20276       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20277       return;
20278     }
20279
20280   /* The parallel needs to hold num_regs SETs
20281      and one SET for the stack update.  */
20282   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20283
20284   /* Increment the stack pointer, based on there being
20285      num_regs 8-byte registers to restore.  */
20286   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20287   RTX_FRAME_RELATED_P (tmp) = 1;
20288   XVECEXP (par, 0, 0) = tmp;
20289
20290   /* Now show every reg that will be restored, using a SET for each.  */
20291   for (j = 0, i=first_reg; j < num_regs; i += 2)
20292     {
20293       reg = gen_rtx_REG (DFmode, i);
20294
20295       tmp = gen_rtx_SET (reg,
20296                          gen_frame_mem
20297                          (DFmode,
20298                           plus_constant (Pmode, base_reg, 8 * j)));
20299       RTX_FRAME_RELATED_P (tmp) = 1;
20300       XVECEXP (par, 0, j + 1) = tmp;
20301
20302       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20303
20304       j++;
20305     }
20306
20307   par = emit_insn (par);
20308   REG_NOTES (par) = dwarf;
20309
20310   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20311   if (REGNO (base_reg) == IP_REGNUM)
20312     {
20313       RTX_FRAME_RELATED_P (par) = 1;
20314       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20315     }
20316   else
20317     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20318                                  base_reg, base_reg);
20319 }
20320
20321 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20322    number of registers are being popped, multiple LDRD patterns are created for
20323    all register pairs.  If odd number of registers are popped, last register is
20324    loaded by using LDR pattern.  */
20325 static void
20326 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20327 {
20328   int num_regs = 0;
20329   int i, j;
20330   rtx par = NULL_RTX;
20331   rtx dwarf = NULL_RTX;
20332   rtx tmp, reg, tmp1;
20333   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20334
20335   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20336     if (saved_regs_mask & (1 << i))
20337       num_regs++;
20338
20339   gcc_assert (num_regs && num_regs <= 16);
20340
20341   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20342      to be popped.  So, if num_regs is even, now it will become odd,
20343      and we can generate pop with PC.  If num_regs is odd, it will be
20344      even now, and ldr with return can be generated for PC.  */
20345   if (return_in_pc)
20346     num_regs--;
20347
20348   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20349
20350   /* Var j iterates over all the registers to gather all the registers in
20351      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20352      A PARALLEL RTX of register-pair is created here, so that pattern for
20353      LDRD can be matched.  As PC is always last register to be popped, and
20354      we have already decremented num_regs if PC, we don't have to worry
20355      about PC in this loop.  */
20356   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20357     if (saved_regs_mask & (1 << j))
20358       {
20359         /* Create RTX for memory load.  */
20360         reg = gen_rtx_REG (SImode, j);
20361         tmp = gen_rtx_SET (reg,
20362                            gen_frame_mem (SImode,
20363                                plus_constant (Pmode,
20364                                               stack_pointer_rtx, 4 * i)));
20365         RTX_FRAME_RELATED_P (tmp) = 1;
20366
20367         if (i % 2 == 0)
20368           {
20369             /* When saved-register index (i) is even, the RTX to be emitted is
20370                yet to be created.  Hence create it first.  The LDRD pattern we
20371                are generating is :
20372                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20373                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20374                where target registers need not be consecutive.  */
20375             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20376             dwarf = NULL_RTX;
20377           }
20378
20379         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20380            added as 0th element and if i is odd, reg_i is added as 1st element
20381            of LDRD pattern shown above.  */
20382         XVECEXP (par, 0, (i % 2)) = tmp;
20383         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20384
20385         if ((i % 2) == 1)
20386           {
20387             /* When saved-register index (i) is odd, RTXs for both the registers
20388                to be loaded are generated in above given LDRD pattern, and the
20389                pattern can be emitted now.  */
20390             par = emit_insn (par);
20391             REG_NOTES (par) = dwarf;
20392             RTX_FRAME_RELATED_P (par) = 1;
20393           }
20394
20395         i++;
20396       }
20397
20398   /* If the number of registers pushed is odd AND return_in_pc is false OR
20399      number of registers are even AND return_in_pc is true, last register is
20400      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20401      then LDR with post increment.  */
20402
20403   /* Increment the stack pointer, based on there being
20404      num_regs 4-byte registers to restore.  */
20405   tmp = gen_rtx_SET (stack_pointer_rtx,
20406                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20407   RTX_FRAME_RELATED_P (tmp) = 1;
20408   tmp = emit_insn (tmp);
20409   if (!return_in_pc)
20410     {
20411       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20412                                    stack_pointer_rtx, stack_pointer_rtx);
20413     }
20414
20415   dwarf = NULL_RTX;
20416
20417   if (((num_regs % 2) == 1 && !return_in_pc)
20418       || ((num_regs % 2) == 0 && return_in_pc))
20419     {
20420       /* Scan for the single register to be popped.  Skip until the saved
20421          register is found.  */
20422       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20423
20424       /* Gen LDR with post increment here.  */
20425       tmp1 = gen_rtx_MEM (SImode,
20426                           gen_rtx_POST_INC (SImode,
20427                                             stack_pointer_rtx));
20428       set_mem_alias_set (tmp1, get_frame_alias_set ());
20429
20430       reg = gen_rtx_REG (SImode, j);
20431       tmp = gen_rtx_SET (reg, tmp1);
20432       RTX_FRAME_RELATED_P (tmp) = 1;
20433       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20434
20435       if (return_in_pc)
20436         {
20437           /* If return_in_pc, j must be PC_REGNUM.  */
20438           gcc_assert (j == PC_REGNUM);
20439           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20440           XVECEXP (par, 0, 0) = ret_rtx;
20441           XVECEXP (par, 0, 1) = tmp;
20442           par = emit_jump_insn (par);
20443         }
20444       else
20445         {
20446           par = emit_insn (tmp);
20447           REG_NOTES (par) = dwarf;
20448           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20449                                        stack_pointer_rtx, stack_pointer_rtx);
20450         }
20451
20452     }
20453   else if ((num_regs % 2) == 1 && return_in_pc)
20454     {
20455       /* There are 2 registers to be popped.  So, generate the pattern
20456          pop_multiple_with_stack_update_and_return to pop in PC.  */
20457       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20458     }
20459
20460   return;
20461 }
20462
20463 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20464    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20465    offset addressing and then generates one separate stack udpate. This provides
20466    more scheduling freedom, compared to writeback on every load.  However,
20467    if the function returns using load into PC directly
20468    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20469    before the last load.  TODO: Add a peephole optimization to recognize
20470    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20471    peephole optimization to merge the load at stack-offset zero
20472    with the stack update instruction using load with writeback
20473    in post-index addressing mode.  */
20474 static void
20475 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20476 {
20477   int j = 0;
20478   int offset = 0;
20479   rtx par = NULL_RTX;
20480   rtx dwarf = NULL_RTX;
20481   rtx tmp, mem;
20482
20483   /* Restore saved registers.  */
20484   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20485   j = 0;
20486   while (j <= LAST_ARM_REGNUM)
20487     if (saved_regs_mask & (1 << j))
20488       {
20489         if ((j % 2) == 0
20490             && (saved_regs_mask & (1 << (j + 1)))
20491             && (j + 1) != PC_REGNUM)
20492           {
20493             /* Current register and next register form register pair for which
20494                LDRD can be generated. PC is always the last register popped, and
20495                we handle it separately.  */
20496             if (offset > 0)
20497               mem = gen_frame_mem (DImode,
20498                                    plus_constant (Pmode,
20499                                                   stack_pointer_rtx,
20500                                                   offset));
20501             else
20502               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20503
20504             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20505             tmp = emit_insn (tmp);
20506             RTX_FRAME_RELATED_P (tmp) = 1;
20507
20508             /* Generate dwarf info.  */
20509
20510             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20511                                     gen_rtx_REG (SImode, j),
20512                                     NULL_RTX);
20513             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20514                                     gen_rtx_REG (SImode, j + 1),
20515                                     dwarf);
20516
20517             REG_NOTES (tmp) = dwarf;
20518
20519             offset += 8;
20520             j += 2;
20521           }
20522         else if (j != PC_REGNUM)
20523           {
20524             /* Emit a single word load.  */
20525             if (offset > 0)
20526               mem = gen_frame_mem (SImode,
20527                                    plus_constant (Pmode,
20528                                                   stack_pointer_rtx,
20529                                                   offset));
20530             else
20531               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20532
20533             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20534             tmp = emit_insn (tmp);
20535             RTX_FRAME_RELATED_P (tmp) = 1;
20536
20537             /* Generate dwarf info.  */
20538             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20539                                               gen_rtx_REG (SImode, j),
20540                                               NULL_RTX);
20541
20542             offset += 4;
20543             j += 1;
20544           }
20545         else /* j == PC_REGNUM */
20546           j++;
20547       }
20548     else
20549       j++;
20550
20551   /* Update the stack.  */
20552   if (offset > 0)
20553     {
20554       tmp = gen_rtx_SET (stack_pointer_rtx,
20555                          plus_constant (Pmode,
20556                                         stack_pointer_rtx,
20557                                         offset));
20558       tmp = emit_insn (tmp);
20559       arm_add_cfa_adjust_cfa_note (tmp, offset,
20560                                    stack_pointer_rtx, stack_pointer_rtx);
20561       offset = 0;
20562     }
20563
20564   if (saved_regs_mask & (1 << PC_REGNUM))
20565     {
20566       /* Only PC is to be popped.  */
20567       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20568       XVECEXP (par, 0, 0) = ret_rtx;
20569       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20570                          gen_frame_mem (SImode,
20571                                         gen_rtx_POST_INC (SImode,
20572                                                           stack_pointer_rtx)));
20573       RTX_FRAME_RELATED_P (tmp) = 1;
20574       XVECEXP (par, 0, 1) = tmp;
20575       par = emit_jump_insn (par);
20576
20577       /* Generate dwarf info.  */
20578       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20579                               gen_rtx_REG (SImode, PC_REGNUM),
20580                               NULL_RTX);
20581       REG_NOTES (par) = dwarf;
20582       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20583                                    stack_pointer_rtx, stack_pointer_rtx);
20584     }
20585 }
20586
20587 /* Calculate the size of the return value that is passed in registers.  */
20588 static unsigned
20589 arm_size_return_regs (void)
20590 {
20591   machine_mode mode;
20592
20593   if (crtl->return_rtx != 0)
20594     mode = GET_MODE (crtl->return_rtx);
20595   else
20596     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20597
20598   return GET_MODE_SIZE (mode);
20599 }
20600
20601 /* Return true if the current function needs to save/restore LR.  */
20602 static bool
20603 thumb_force_lr_save (void)
20604 {
20605   return !cfun->machine->lr_save_eliminated
20606          && (!crtl->is_leaf
20607              || thumb_far_jump_used_p ()
20608              || df_regs_ever_live_p (LR_REGNUM));
20609 }
20610
20611 /* We do not know if r3 will be available because
20612    we do have an indirect tailcall happening in this
20613    particular case.  */
20614 static bool
20615 is_indirect_tailcall_p (rtx call)
20616 {
20617   rtx pat = PATTERN (call);
20618
20619   /* Indirect tail call.  */
20620   pat = XVECEXP (pat, 0, 0);
20621   if (GET_CODE (pat) == SET)
20622     pat = SET_SRC (pat);
20623
20624   pat = XEXP (XEXP (pat, 0), 0);
20625   return REG_P (pat);
20626 }
20627
20628 /* Return true if r3 is used by any of the tail call insns in the
20629    current function.  */
20630 static bool
20631 any_sibcall_could_use_r3 (void)
20632 {
20633   edge_iterator ei;
20634   edge e;
20635
20636   if (!crtl->tail_call_emit)
20637     return false;
20638   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20639     if (e->flags & EDGE_SIBCALL)
20640       {
20641         rtx_insn *call = BB_END (e->src);
20642         if (!CALL_P (call))
20643           call = prev_nonnote_nondebug_insn (call);
20644         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20645         if (find_regno_fusage (call, USE, 3)
20646             || is_indirect_tailcall_p (call))
20647           return true;
20648       }
20649   return false;
20650 }
20651
20652
20653 /* Compute the distance from register FROM to register TO.
20654    These can be the arg pointer (26), the soft frame pointer (25),
20655    the stack pointer (13) or the hard frame pointer (11).
20656    In thumb mode r7 is used as the soft frame pointer, if needed.
20657    Typical stack layout looks like this:
20658
20659        old stack pointer -> |    |
20660                              ----
20661                             |    | \
20662                             |    |   saved arguments for
20663                             |    |   vararg functions
20664                             |    | /
20665                               --
20666    hard FP & arg pointer -> |    | \
20667                             |    |   stack
20668                             |    |   frame
20669                             |    | /
20670                               --
20671                             |    | \
20672                             |    |   call saved
20673                             |    |   registers
20674       soft frame pointer -> |    | /
20675                               --
20676                             |    | \
20677                             |    |   local
20678                             |    |   variables
20679      locals base pointer -> |    | /
20680                               --
20681                             |    | \
20682                             |    |   outgoing
20683                             |    |   arguments
20684    current stack pointer -> |    | /
20685                               --
20686
20687   For a given function some or all of these stack components
20688   may not be needed, giving rise to the possibility of
20689   eliminating some of the registers.
20690
20691   The values returned by this function must reflect the behavior
20692   of arm_expand_prologue() and arm_compute_save_reg_mask().
20693
20694   The sign of the number returned reflects the direction of stack
20695   growth, so the values are positive for all eliminations except
20696   from the soft frame pointer to the hard frame pointer.
20697
20698   SFP may point just inside the local variables block to ensure correct
20699   alignment.  */
20700
20701
20702 /* Calculate stack offsets.  These are used to calculate register elimination
20703    offsets and in prologue/epilogue code.  Also calculates which registers
20704    should be saved.  */
20705
20706 static arm_stack_offsets *
20707 arm_get_frame_offsets (void)
20708 {
20709   struct arm_stack_offsets *offsets;
20710   unsigned long func_type;
20711   int saved;
20712   int core_saved;
20713   HOST_WIDE_INT frame_size;
20714   int i;
20715
20716   offsets = &cfun->machine->stack_offsets;
20717
20718   if (reload_completed)
20719     return offsets;
20720
20721   /* Initially this is the size of the local variables.  It will translated
20722      into an offset once we have determined the size of preceding data.  */
20723   frame_size = ROUND_UP_WORD (get_frame_size ());
20724
20725   /* Space for variadic functions.  */
20726   offsets->saved_args = crtl->args.pretend_args_size;
20727
20728   /* In Thumb mode this is incorrect, but never used.  */
20729   offsets->frame
20730     = (offsets->saved_args
20731        + arm_compute_static_chain_stack_bytes ()
20732        + (frame_pointer_needed ? 4 : 0));
20733
20734   if (TARGET_32BIT)
20735     {
20736       unsigned int regno;
20737
20738       offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20739       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20740       saved = core_saved;
20741
20742       /* We know that SP will be doubleword aligned on entry, and we must
20743          preserve that condition at any subroutine call.  We also require the
20744          soft frame pointer to be doubleword aligned.  */
20745
20746       if (TARGET_REALLY_IWMMXT)
20747         {
20748           /* Check for the call-saved iWMMXt registers.  */
20749           for (regno = FIRST_IWMMXT_REGNUM;
20750                regno <= LAST_IWMMXT_REGNUM;
20751                regno++)
20752             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20753               saved += 8;
20754         }
20755
20756       func_type = arm_current_func_type ();
20757       /* Space for saved VFP registers.  */
20758       if (! IS_VOLATILE (func_type)
20759           && TARGET_HARD_FLOAT)
20760         saved += arm_get_vfp_saved_size ();
20761     }
20762   else /* TARGET_THUMB1 */
20763     {
20764       offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20765       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20766       saved = core_saved;
20767       if (TARGET_BACKTRACE)
20768         saved += 16;
20769     }
20770
20771   /* Saved registers include the stack frame.  */
20772   offsets->saved_regs
20773     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20774   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20775
20776   /* A leaf function does not need any stack alignment if it has nothing
20777      on the stack.  */
20778   if (crtl->is_leaf && frame_size == 0
20779       /* However if it calls alloca(), we have a dynamically allocated
20780          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20781       && ! cfun->calls_alloca)
20782     {
20783       offsets->outgoing_args = offsets->soft_frame;
20784       offsets->locals_base = offsets->soft_frame;
20785       return offsets;
20786     }
20787
20788   /* Ensure SFP has the correct alignment.  */
20789   if (ARM_DOUBLEWORD_ALIGN
20790       && (offsets->soft_frame & 7))
20791     {
20792       offsets->soft_frame += 4;
20793       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20794          when there is a stack frame as the alignment will be rolled into
20795          the normal stack adjustment.  */
20796       if (frame_size + crtl->outgoing_args_size == 0)
20797         {
20798           int reg = -1;
20799
20800           /* Register r3 is caller-saved.  Normally it does not need to be
20801              saved on entry by the prologue.  However if we choose to save
20802              it for padding then we may confuse the compiler into thinking
20803              a prologue sequence is required when in fact it is not.  This
20804              will occur when shrink-wrapping if r3 is used as a scratch
20805              register and there are no other callee-saved writes.
20806
20807              This situation can be avoided when other callee-saved registers
20808              are available and r3 is not mandatory if we choose a callee-saved
20809              register for padding.  */
20810           bool prefer_callee_reg_p = false;
20811
20812           /* If it is safe to use r3, then do so.  This sometimes
20813              generates better code on Thumb-2 by avoiding the need to
20814              use 32-bit push/pop instructions.  */
20815           if (! any_sibcall_could_use_r3 ()
20816               && arm_size_return_regs () <= 12
20817               && (offsets->saved_regs_mask & (1 << 3)) == 0
20818               && (TARGET_THUMB2
20819                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20820             {
20821               reg = 3;
20822               if (!TARGET_THUMB2)
20823                 prefer_callee_reg_p = true;
20824             }
20825           if (reg == -1
20826               || prefer_callee_reg_p)
20827             {
20828               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20829                 {
20830                   /* Avoid fixed registers; they may be changed at
20831                      arbitrary times so it's unsafe to restore them
20832                      during the epilogue.  */
20833                   if (!fixed_regs[i]
20834                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20835                     {
20836                       reg = i;
20837                       break;
20838                     }
20839                 }
20840             }
20841
20842           if (reg != -1)
20843             {
20844               offsets->saved_regs += 4;
20845               offsets->saved_regs_mask |= (1 << reg);
20846             }
20847         }
20848     }
20849
20850   offsets->locals_base = offsets->soft_frame + frame_size;
20851   offsets->outgoing_args = (offsets->locals_base
20852                             + crtl->outgoing_args_size);
20853
20854   if (ARM_DOUBLEWORD_ALIGN)
20855     {
20856       /* Ensure SP remains doubleword aligned.  */
20857       if (offsets->outgoing_args & 7)
20858         offsets->outgoing_args += 4;
20859       gcc_assert (!(offsets->outgoing_args & 7));
20860     }
20861
20862   return offsets;
20863 }
20864
20865
20866 /* Calculate the relative offsets for the different stack pointers.  Positive
20867    offsets are in the direction of stack growth.  */
20868
20869 HOST_WIDE_INT
20870 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20871 {
20872   arm_stack_offsets *offsets;
20873
20874   offsets = arm_get_frame_offsets ();
20875
20876   /* OK, now we have enough information to compute the distances.
20877      There must be an entry in these switch tables for each pair
20878      of registers in ELIMINABLE_REGS, even if some of the entries
20879      seem to be redundant or useless.  */
20880   switch (from)
20881     {
20882     case ARG_POINTER_REGNUM:
20883       switch (to)
20884         {
20885         case THUMB_HARD_FRAME_POINTER_REGNUM:
20886           return 0;
20887
20888         case FRAME_POINTER_REGNUM:
20889           /* This is the reverse of the soft frame pointer
20890              to hard frame pointer elimination below.  */
20891           return offsets->soft_frame - offsets->saved_args;
20892
20893         case ARM_HARD_FRAME_POINTER_REGNUM:
20894           /* This is only non-zero in the case where the static chain register
20895              is stored above the frame.  */
20896           return offsets->frame - offsets->saved_args - 4;
20897
20898         case STACK_POINTER_REGNUM:
20899           /* If nothing has been pushed on the stack at all
20900              then this will return -4.  This *is* correct!  */
20901           return offsets->outgoing_args - (offsets->saved_args + 4);
20902
20903         default:
20904           gcc_unreachable ();
20905         }
20906       gcc_unreachable ();
20907
20908     case FRAME_POINTER_REGNUM:
20909       switch (to)
20910         {
20911         case THUMB_HARD_FRAME_POINTER_REGNUM:
20912           return 0;
20913
20914         case ARM_HARD_FRAME_POINTER_REGNUM:
20915           /* The hard frame pointer points to the top entry in the
20916              stack frame.  The soft frame pointer to the bottom entry
20917              in the stack frame.  If there is no stack frame at all,
20918              then they are identical.  */
20919
20920           return offsets->frame - offsets->soft_frame;
20921
20922         case STACK_POINTER_REGNUM:
20923           return offsets->outgoing_args - offsets->soft_frame;
20924
20925         default:
20926           gcc_unreachable ();
20927         }
20928       gcc_unreachable ();
20929
20930     default:
20931       /* You cannot eliminate from the stack pointer.
20932          In theory you could eliminate from the hard frame
20933          pointer to the stack pointer, but this will never
20934          happen, since if a stack frame is not needed the
20935          hard frame pointer will never be used.  */
20936       gcc_unreachable ();
20937     }
20938 }
20939
20940 /* Given FROM and TO register numbers, say whether this elimination is
20941    allowed.  Frame pointer elimination is automatically handled.
20942
20943    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
20944    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
20945    pointer, we must eliminate FRAME_POINTER_REGNUM into
20946    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20947    ARG_POINTER_REGNUM.  */
20948
20949 bool
20950 arm_can_eliminate (const int from, const int to)
20951 {
20952   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20953           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20954           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20955           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20956            true);
20957 }
20958
20959 /* Emit RTL to save coprocessor registers on function entry.  Returns the
20960    number of bytes pushed.  */
20961
20962 static int
20963 arm_save_coproc_regs(void)
20964 {
20965   int saved_size = 0;
20966   unsigned reg;
20967   unsigned start_reg;
20968   rtx insn;
20969
20970   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20971     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20972       {
20973         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20974         insn = gen_rtx_MEM (V2SImode, insn);
20975         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20976         RTX_FRAME_RELATED_P (insn) = 1;
20977         saved_size += 8;
20978       }
20979
20980   if (TARGET_HARD_FLOAT)
20981     {
20982       start_reg = FIRST_VFP_REGNUM;
20983
20984       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20985         {
20986           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20987               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20988             {
20989               if (start_reg != reg)
20990                 saved_size += vfp_emit_fstmd (start_reg,
20991                                               (reg - start_reg) / 2);
20992               start_reg = reg + 2;
20993             }
20994         }
20995       if (start_reg != reg)
20996         saved_size += vfp_emit_fstmd (start_reg,
20997                                       (reg - start_reg) / 2);
20998     }
20999   return saved_size;
21000 }
21001
21002
21003 /* Set the Thumb frame pointer from the stack pointer.  */
21004
21005 static void
21006 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21007 {
21008   HOST_WIDE_INT amount;
21009   rtx insn, dwarf;
21010
21011   amount = offsets->outgoing_args - offsets->locals_base;
21012   if (amount < 1024)
21013     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21014                                   stack_pointer_rtx, GEN_INT (amount)));
21015   else
21016     {
21017       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21018       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21019          expects the first two operands to be the same.  */
21020       if (TARGET_THUMB2)
21021         {
21022           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21023                                         stack_pointer_rtx,
21024                                         hard_frame_pointer_rtx));
21025         }
21026       else
21027         {
21028           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21029                                         hard_frame_pointer_rtx,
21030                                         stack_pointer_rtx));
21031         }
21032       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21033                            plus_constant (Pmode, stack_pointer_rtx, amount));
21034       RTX_FRAME_RELATED_P (dwarf) = 1;
21035       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21036     }
21037
21038   RTX_FRAME_RELATED_P (insn) = 1;
21039 }
21040
21041 struct scratch_reg {
21042   rtx reg;
21043   bool saved;
21044 };
21045
21046 /* Return a short-lived scratch register for use as a 2nd scratch register on
21047    function entry after the registers are saved in the prologue.  This register
21048    must be released by means of release_scratch_register_on_entry.  IP is not
21049    considered since it is always used as the 1st scratch register if available.
21050
21051    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21052    mask of live registers.  */
21053
21054 static void
21055 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21056                                unsigned long live_regs)
21057 {
21058   int regno = -1;
21059
21060   sr->saved = false;
21061
21062   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21063     regno = LR_REGNUM;
21064   else
21065     {
21066       unsigned int i;
21067
21068       for (i = 4; i < 11; i++)
21069         if (regno1 != i && (live_regs & (1 << i)) != 0)
21070           {
21071             regno = i;
21072             break;
21073           }
21074
21075       if (regno < 0)
21076         {
21077           /* If IP is used as the 1st scratch register for a nested function,
21078              then either r3 wasn't available or is used to preserve IP.  */
21079           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21080             regno1 = 3;
21081           regno = (regno1 == 3 ? 2 : 3);
21082           sr->saved
21083             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21084                                regno);
21085         }
21086     }
21087
21088   sr->reg = gen_rtx_REG (SImode, regno);
21089   if (sr->saved)
21090     {
21091       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21092       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21093       rtx x = gen_rtx_SET (stack_pointer_rtx,
21094                            plus_constant (Pmode, stack_pointer_rtx, -4));
21095       RTX_FRAME_RELATED_P (insn) = 1;
21096       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21097     }
21098 }
21099
21100 /* Release a scratch register obtained from the preceding function.  */
21101
21102 static void
21103 release_scratch_register_on_entry (struct scratch_reg *sr)
21104 {
21105   if (sr->saved)
21106     {
21107       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21108       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21109       rtx x = gen_rtx_SET (stack_pointer_rtx,
21110                            plus_constant (Pmode, stack_pointer_rtx, 4));
21111       RTX_FRAME_RELATED_P (insn) = 1;
21112       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21113     }
21114 }
21115
21116 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21117
21118 #if PROBE_INTERVAL > 4096
21119 #error Cannot use indexed addressing mode for stack probing
21120 #endif
21121
21122 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21123    inclusive.  These are offsets from the current stack pointer.  REGNO1
21124    is the index number of the 1st scratch register and LIVE_REGS is the
21125    mask of live registers.  */
21126
21127 static void
21128 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21129                             unsigned int regno1, unsigned long live_regs)
21130 {
21131   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21132
21133   /* See if we have a constant small number of probes to generate.  If so,
21134      that's the easy case.  */
21135   if (size <= PROBE_INTERVAL)
21136     {
21137       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21138       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21139       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21140     }
21141
21142   /* The run-time loop is made up of 10 insns in the generic case while the
21143      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21144   else if (size <= 5 * PROBE_INTERVAL)
21145     {
21146       HOST_WIDE_INT i, rem;
21147
21148       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21149       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21150       emit_stack_probe (reg1);
21151
21152       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21153          it exceeds SIZE.  If only two probes are needed, this will not
21154          generate any code.  Then probe at FIRST + SIZE.  */
21155       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21156         {
21157           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21158           emit_stack_probe (reg1);
21159         }
21160
21161       rem = size - (i - PROBE_INTERVAL);
21162       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21163         {
21164           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21165           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21166         }
21167       else
21168         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21169     }
21170
21171   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21172      extra careful with variables wrapping around because we might be at
21173      the very top (or the very bottom) of the address space and we have
21174      to be able to handle this case properly; in particular, we use an
21175      equality test for the loop condition.  */
21176   else
21177     {
21178       HOST_WIDE_INT rounded_size;
21179       struct scratch_reg sr;
21180
21181       get_scratch_register_on_entry (&sr, regno1, live_regs);
21182
21183       emit_move_insn (reg1, GEN_INT (first));
21184
21185
21186       /* Step 1: round SIZE to the previous multiple of the interval.  */
21187
21188       rounded_size = size & -PROBE_INTERVAL;
21189       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21190
21191
21192       /* Step 2: compute initial and final value of the loop counter.  */
21193
21194       /* TEST_ADDR = SP + FIRST.  */
21195       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21196
21197       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21198       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21199
21200
21201       /* Step 3: the loop
21202
21203          do
21204            {
21205              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21206              probe at TEST_ADDR
21207            }
21208          while (TEST_ADDR != LAST_ADDR)
21209
21210          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21211          until it is equal to ROUNDED_SIZE.  */
21212
21213       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21214
21215
21216       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21217          that SIZE is equal to ROUNDED_SIZE.  */
21218
21219       if (size != rounded_size)
21220         {
21221           HOST_WIDE_INT rem = size - rounded_size;
21222
21223           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21224             {
21225               emit_set_insn (sr.reg,
21226                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21227               emit_stack_probe (plus_constant (Pmode, sr.reg,
21228                                                PROBE_INTERVAL - rem));
21229             }
21230           else
21231             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21232         }
21233
21234       release_scratch_register_on_entry (&sr);
21235     }
21236
21237   /* Make sure nothing is scheduled before we are done.  */
21238   emit_insn (gen_blockage ());
21239 }
21240
21241 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21242    absolute addresses.  */
21243
21244 const char *
21245 output_probe_stack_range (rtx reg1, rtx reg2)
21246 {
21247   static int labelno = 0;
21248   char loop_lab[32];
21249   rtx xops[2];
21250
21251   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21252
21253   /* Loop.  */
21254   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21255
21256   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21257   xops[0] = reg1;
21258   xops[1] = GEN_INT (PROBE_INTERVAL);
21259   output_asm_insn ("sub\t%0, %0, %1", xops);
21260
21261   /* Probe at TEST_ADDR.  */
21262   output_asm_insn ("str\tr0, [%0, #0]", xops);
21263
21264   /* Test if TEST_ADDR == LAST_ADDR.  */
21265   xops[1] = reg2;
21266   output_asm_insn ("cmp\t%0, %1", xops);
21267
21268   /* Branch.  */
21269   fputs ("\tbne\t", asm_out_file);
21270   assemble_name_raw (asm_out_file, loop_lab);
21271   fputc ('\n', asm_out_file);
21272
21273   return "";
21274 }
21275
21276 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21277    function.  */
21278 void
21279 arm_expand_prologue (void)
21280 {
21281   rtx amount;
21282   rtx insn;
21283   rtx ip_rtx;
21284   unsigned long live_regs_mask;
21285   unsigned long func_type;
21286   int fp_offset = 0;
21287   int saved_pretend_args = 0;
21288   int saved_regs = 0;
21289   unsigned HOST_WIDE_INT args_to_push;
21290   HOST_WIDE_INT size;
21291   arm_stack_offsets *offsets;
21292   bool clobber_ip;
21293
21294   func_type = arm_current_func_type ();
21295
21296   /* Naked functions don't have prologues.  */
21297   if (IS_NAKED (func_type))
21298     {
21299       if (flag_stack_usage_info)
21300         current_function_static_stack_size = 0;
21301       return;
21302     }
21303
21304   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21305   args_to_push = crtl->args.pretend_args_size;
21306
21307   /* Compute which register we will have to save onto the stack.  */
21308   offsets = arm_get_frame_offsets ();
21309   live_regs_mask = offsets->saved_regs_mask;
21310
21311   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21312
21313   if (IS_STACKALIGN (func_type))
21314     {
21315       rtx r0, r1;
21316
21317       /* Handle a word-aligned stack pointer.  We generate the following:
21318
21319           mov r0, sp
21320           bic r1, r0, #7
21321           mov sp, r1
21322           <save and restore r0 in normal prologue/epilogue>
21323           mov sp, r0
21324           bx lr
21325
21326          The unwinder doesn't need to know about the stack realignment.
21327          Just tell it we saved SP in r0.  */
21328       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21329
21330       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21331       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21332
21333       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21334       RTX_FRAME_RELATED_P (insn) = 1;
21335       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21336
21337       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21338
21339       /* ??? The CFA changes here, which may cause GDB to conclude that it
21340          has entered a different function.  That said, the unwind info is
21341          correct, individually, before and after this instruction because
21342          we've described the save of SP, which will override the default
21343          handling of SP as restoring from the CFA.  */
21344       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21345     }
21346
21347   /* The static chain register is the same as the IP register.  If it is
21348      clobbered when creating the frame, we need to save and restore it.  */
21349   clobber_ip = IS_NESTED (func_type)
21350                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21351                    || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21352                        && !df_regs_ever_live_p (LR_REGNUM)
21353                        && arm_r3_live_at_start_p ()));
21354
21355   /* Find somewhere to store IP whilst the frame is being created.
21356      We try the following places in order:
21357
21358        1. The last argument register r3 if it is available.
21359        2. A slot on the stack above the frame if there are no
21360           arguments to push onto the stack.
21361        3. Register r3 again, after pushing the argument registers
21362           onto the stack, if this is a varargs function.
21363        4. The last slot on the stack created for the arguments to
21364           push, if this isn't a varargs function.
21365
21366      Note - we only need to tell the dwarf2 backend about the SP
21367      adjustment in the second variant; the static chain register
21368      doesn't need to be unwound, as it doesn't contain a value
21369      inherited from the caller.  */
21370   if (clobber_ip)
21371     {
21372       if (!arm_r3_live_at_start_p ())
21373         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21374       else if (args_to_push == 0)
21375         {
21376           rtx addr, dwarf;
21377
21378           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21379           saved_regs += 4;
21380
21381           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21382           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21383           fp_offset = 4;
21384
21385           /* Just tell the dwarf backend that we adjusted SP.  */
21386           dwarf = gen_rtx_SET (stack_pointer_rtx,
21387                                plus_constant (Pmode, stack_pointer_rtx,
21388                                               -fp_offset));
21389           RTX_FRAME_RELATED_P (insn) = 1;
21390           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21391         }
21392       else
21393         {
21394           /* Store the args on the stack.  */
21395           if (cfun->machine->uses_anonymous_args)
21396             {
21397               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21398                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21399               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21400               saved_pretend_args = 1;
21401             }
21402           else
21403             {
21404               rtx addr, dwarf;
21405
21406               if (args_to_push == 4)
21407                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21408               else
21409                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21410                                            plus_constant (Pmode,
21411                                                           stack_pointer_rtx,
21412                                                           -args_to_push));
21413
21414               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21415
21416               /* Just tell the dwarf backend that we adjusted SP.  */
21417               dwarf = gen_rtx_SET (stack_pointer_rtx,
21418                                    plus_constant (Pmode, stack_pointer_rtx,
21419                                                   -args_to_push));
21420               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21421             }
21422
21423           RTX_FRAME_RELATED_P (insn) = 1;
21424           fp_offset = args_to_push;
21425           args_to_push = 0;
21426         }
21427     }
21428
21429   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21430     {
21431       if (IS_INTERRUPT (func_type))
21432         {
21433           /* Interrupt functions must not corrupt any registers.
21434              Creating a frame pointer however, corrupts the IP
21435              register, so we must push it first.  */
21436           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21437
21438           /* Do not set RTX_FRAME_RELATED_P on this insn.
21439              The dwarf stack unwinding code only wants to see one
21440              stack decrement per function, and this is not it.  If
21441              this instruction is labeled as being part of the frame
21442              creation sequence then dwarf2out_frame_debug_expr will
21443              die when it encounters the assignment of IP to FP
21444              later on, since the use of SP here establishes SP as
21445              the CFA register and not IP.
21446
21447              Anyway this instruction is not really part of the stack
21448              frame creation although it is part of the prologue.  */
21449         }
21450
21451       insn = emit_set_insn (ip_rtx,
21452                             plus_constant (Pmode, stack_pointer_rtx,
21453                                            fp_offset));
21454       RTX_FRAME_RELATED_P (insn) = 1;
21455     }
21456
21457   if (args_to_push)
21458     {
21459       /* Push the argument registers, or reserve space for them.  */
21460       if (cfun->machine->uses_anonymous_args)
21461         insn = emit_multi_reg_push
21462           ((0xf0 >> (args_to_push / 4)) & 0xf,
21463            (0xf0 >> (args_to_push / 4)) & 0xf);
21464       else
21465         insn = emit_insn
21466           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21467                        GEN_INT (- args_to_push)));
21468       RTX_FRAME_RELATED_P (insn) = 1;
21469     }
21470
21471   /* If this is an interrupt service routine, and the link register
21472      is going to be pushed, and we're not generating extra
21473      push of IP (needed when frame is needed and frame layout if apcs),
21474      subtracting four from LR now will mean that the function return
21475      can be done with a single instruction.  */
21476   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21477       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21478       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21479       && TARGET_ARM)
21480     {
21481       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21482
21483       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21484     }
21485
21486   if (live_regs_mask)
21487     {
21488       unsigned long dwarf_regs_mask = live_regs_mask;
21489
21490       saved_regs += bit_count (live_regs_mask) * 4;
21491       if (optimize_size && !frame_pointer_needed
21492           && saved_regs == offsets->saved_regs - offsets->saved_args)
21493         {
21494           /* If no coprocessor registers are being pushed and we don't have
21495              to worry about a frame pointer then push extra registers to
21496              create the stack frame.  This is done is a way that does not
21497              alter the frame layout, so is independent of the epilogue.  */
21498           int n;
21499           int frame;
21500           n = 0;
21501           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21502             n++;
21503           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21504           if (frame && n * 4 >= frame)
21505             {
21506               n = frame / 4;
21507               live_regs_mask |= (1 << n) - 1;
21508               saved_regs += frame;
21509             }
21510         }
21511
21512       if (TARGET_LDRD
21513           && current_tune->prefer_ldrd_strd
21514           && !optimize_function_for_size_p (cfun))
21515         {
21516           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21517           if (TARGET_THUMB2)
21518             thumb2_emit_strd_push (live_regs_mask);
21519           else if (TARGET_ARM
21520                    && !TARGET_APCS_FRAME
21521                    && !IS_INTERRUPT (func_type))
21522             arm_emit_strd_push (live_regs_mask);
21523           else
21524             {
21525               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21526               RTX_FRAME_RELATED_P (insn) = 1;
21527             }
21528         }
21529       else
21530         {
21531           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21532           RTX_FRAME_RELATED_P (insn) = 1;
21533         }
21534     }
21535
21536   if (! IS_VOLATILE (func_type))
21537     saved_regs += arm_save_coproc_regs ();
21538
21539   if (frame_pointer_needed && TARGET_ARM)
21540     {
21541       /* Create the new frame pointer.  */
21542       if (TARGET_APCS_FRAME)
21543         {
21544           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21545           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21546           RTX_FRAME_RELATED_P (insn) = 1;
21547         }
21548       else
21549         {
21550           insn = GEN_INT (saved_regs - (4 + fp_offset));
21551           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21552                                         stack_pointer_rtx, insn));
21553           RTX_FRAME_RELATED_P (insn) = 1;
21554         }
21555     }
21556
21557   size = offsets->outgoing_args - offsets->saved_args;
21558   if (flag_stack_usage_info)
21559     current_function_static_stack_size = size;
21560
21561   /* If this isn't an interrupt service routine and we have a frame, then do
21562      stack checking.  We use IP as the first scratch register, except for the
21563      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21564   if (!IS_INTERRUPT (func_type)
21565       && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21566     {
21567       unsigned int regno;
21568
21569       if (!IS_NESTED (func_type) || clobber_ip)
21570         regno = IP_REGNUM;
21571       else if (df_regs_ever_live_p (LR_REGNUM))
21572         regno = LR_REGNUM;
21573       else
21574         regno = 3;
21575
21576       if (crtl->is_leaf && !cfun->calls_alloca)
21577         {
21578           if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21579             arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21580                                         size - STACK_CHECK_PROTECT,
21581                                         regno, live_regs_mask);
21582         }
21583       else if (size > 0)
21584         arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21585                                     regno, live_regs_mask);
21586     }
21587
21588   /* Recover the static chain register.  */
21589   if (clobber_ip)
21590     {
21591       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21592         insn = gen_rtx_REG (SImode, 3);
21593       else
21594         {
21595           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21596           insn = gen_frame_mem (SImode, insn);
21597         }
21598       emit_set_insn (ip_rtx, insn);
21599       emit_insn (gen_force_register_use (ip_rtx));
21600     }
21601
21602   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21603     {
21604       /* This add can produce multiple insns for a large constant, so we
21605          need to get tricky.  */
21606       rtx_insn *last = get_last_insn ();
21607
21608       amount = GEN_INT (offsets->saved_args + saved_regs
21609                         - offsets->outgoing_args);
21610
21611       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21612                                     amount));
21613       do
21614         {
21615           last = last ? NEXT_INSN (last) : get_insns ();
21616           RTX_FRAME_RELATED_P (last) = 1;
21617         }
21618       while (last != insn);
21619
21620       /* If the frame pointer is needed, emit a special barrier that
21621          will prevent the scheduler from moving stores to the frame
21622          before the stack adjustment.  */
21623       if (frame_pointer_needed)
21624         insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21625                                          hard_frame_pointer_rtx));
21626     }
21627
21628
21629   if (frame_pointer_needed && TARGET_THUMB2)
21630     thumb_set_frame_pointer (offsets);
21631
21632   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21633     {
21634       unsigned long mask;
21635
21636       mask = live_regs_mask;
21637       mask &= THUMB2_WORK_REGS;
21638       if (!IS_NESTED (func_type))
21639         mask |= (1 << IP_REGNUM);
21640       arm_load_pic_register (mask);
21641     }
21642
21643   /* If we are profiling, make sure no instructions are scheduled before
21644      the call to mcount.  Similarly if the user has requested no
21645      scheduling in the prolog.  Similarly if we want non-call exceptions
21646      using the EABI unwinder, to prevent faulting instructions from being
21647      swapped with a stack adjustment.  */
21648   if (crtl->profile || !TARGET_SCHED_PROLOG
21649       || (arm_except_unwind_info (&global_options) == UI_TARGET
21650           && cfun->can_throw_non_call_exceptions))
21651     emit_insn (gen_blockage ());
21652
21653   /* If the link register is being kept alive, with the return address in it,
21654      then make sure that it does not get reused by the ce2 pass.  */
21655   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21656     cfun->machine->lr_save_eliminated = 1;
21657 }
21658 \f
21659 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21660 static void
21661 arm_print_condition (FILE *stream)
21662 {
21663   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21664     {
21665       /* Branch conversion is not implemented for Thumb-2.  */
21666       if (TARGET_THUMB)
21667         {
21668           output_operand_lossage ("predicated Thumb instruction");
21669           return;
21670         }
21671       if (current_insn_predicate != NULL)
21672         {
21673           output_operand_lossage
21674             ("predicated instruction in conditional sequence");
21675           return;
21676         }
21677
21678       fputs (arm_condition_codes[arm_current_cc], stream);
21679     }
21680   else if (current_insn_predicate)
21681     {
21682       enum arm_cond_code code;
21683
21684       if (TARGET_THUMB1)
21685         {
21686           output_operand_lossage ("predicated Thumb instruction");
21687           return;
21688         }
21689
21690       code = get_arm_condition_code (current_insn_predicate);
21691       fputs (arm_condition_codes[code], stream);
21692     }
21693 }
21694
21695
21696 /* Globally reserved letters: acln
21697    Puncutation letters currently used: @_|?().!#
21698    Lower case letters currently used: bcdefhimpqtvwxyz
21699    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21700    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21701
21702    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21703
21704    If CODE is 'd', then the X is a condition operand and the instruction
21705    should only be executed if the condition is true.
21706    if CODE is 'D', then the X is a condition operand and the instruction
21707    should only be executed if the condition is false: however, if the mode
21708    of the comparison is CCFPEmode, then always execute the instruction -- we
21709    do this because in these circumstances !GE does not necessarily imply LT;
21710    in these cases the instruction pattern will take care to make sure that
21711    an instruction containing %d will follow, thereby undoing the effects of
21712    doing this instruction unconditionally.
21713    If CODE is 'N' then X is a floating point operand that must be negated
21714    before output.
21715    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21716    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21717 static void
21718 arm_print_operand (FILE *stream, rtx x, int code)
21719 {
21720   switch (code)
21721     {
21722     case '@':
21723       fputs (ASM_COMMENT_START, stream);
21724       return;
21725
21726     case '_':
21727       fputs (user_label_prefix, stream);
21728       return;
21729
21730     case '|':
21731       fputs (REGISTER_PREFIX, stream);
21732       return;
21733
21734     case '?':
21735       arm_print_condition (stream);
21736       return;
21737
21738     case '.':
21739       /* The current condition code for a condition code setting instruction.
21740          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21741       fputc('s', stream);
21742       arm_print_condition (stream);
21743       return;
21744
21745     case '!':
21746       /* If the instruction is conditionally executed then print
21747          the current condition code, otherwise print 's'.  */
21748       gcc_assert (TARGET_THUMB2);
21749       if (current_insn_predicate)
21750         arm_print_condition (stream);
21751       else
21752         fputc('s', stream);
21753       break;
21754
21755     /* %# is a "break" sequence. It doesn't output anything, but is used to
21756        separate e.g. operand numbers from following text, if that text consists
21757        of further digits which we don't want to be part of the operand
21758        number.  */
21759     case '#':
21760       return;
21761
21762     case 'N':
21763       {
21764         REAL_VALUE_TYPE r;
21765         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21766         fprintf (stream, "%s", fp_const_from_val (&r));
21767       }
21768       return;
21769
21770     /* An integer or symbol address without a preceding # sign.  */
21771     case 'c':
21772       switch (GET_CODE (x))
21773         {
21774         case CONST_INT:
21775           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21776           break;
21777
21778         case SYMBOL_REF:
21779           output_addr_const (stream, x);
21780           break;
21781
21782         case CONST:
21783           if (GET_CODE (XEXP (x, 0)) == PLUS
21784               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21785             {
21786               output_addr_const (stream, x);
21787               break;
21788             }
21789           /* Fall through.  */
21790
21791         default:
21792           output_operand_lossage ("Unsupported operand for code '%c'", code);
21793         }
21794       return;
21795
21796     /* An integer that we want to print in HEX.  */
21797     case 'x':
21798       switch (GET_CODE (x))
21799         {
21800         case CONST_INT:
21801           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21802           break;
21803
21804         default:
21805           output_operand_lossage ("Unsupported operand for code '%c'", code);
21806         }
21807       return;
21808
21809     case 'B':
21810       if (CONST_INT_P (x))
21811         {
21812           HOST_WIDE_INT val;
21813           val = ARM_SIGN_EXTEND (~INTVAL (x));
21814           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21815         }
21816       else
21817         {
21818           putc ('~', stream);
21819           output_addr_const (stream, x);
21820         }
21821       return;
21822
21823     case 'b':
21824       /* Print the log2 of a CONST_INT.  */
21825       {
21826         HOST_WIDE_INT val;
21827
21828         if (!CONST_INT_P (x)
21829             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21830           output_operand_lossage ("Unsupported operand for code '%c'", code);
21831         else
21832           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21833       }
21834       return;
21835
21836     case 'L':
21837       /* The low 16 bits of an immediate constant.  */
21838       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21839       return;
21840
21841     case 'i':
21842       fprintf (stream, "%s", arithmetic_instr (x, 1));
21843       return;
21844
21845     case 'I':
21846       fprintf (stream, "%s", arithmetic_instr (x, 0));
21847       return;
21848
21849     case 'S':
21850       {
21851         HOST_WIDE_INT val;
21852         const char *shift;
21853
21854         shift = shift_op (x, &val);
21855
21856         if (shift)
21857           {
21858             fprintf (stream, ", %s ", shift);
21859             if (val == -1)
21860               arm_print_operand (stream, XEXP (x, 1), 0);
21861             else
21862               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21863           }
21864       }
21865       return;
21866
21867       /* An explanation of the 'Q', 'R' and 'H' register operands:
21868
21869          In a pair of registers containing a DI or DF value the 'Q'
21870          operand returns the register number of the register containing
21871          the least significant part of the value.  The 'R' operand returns
21872          the register number of the register containing the most
21873          significant part of the value.
21874
21875          The 'H' operand returns the higher of the two register numbers.
21876          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21877          same as the 'Q' operand, since the most significant part of the
21878          value is held in the lower number register.  The reverse is true
21879          on systems where WORDS_BIG_ENDIAN is false.
21880
21881          The purpose of these operands is to distinguish between cases
21882          where the endian-ness of the values is important (for example
21883          when they are added together), and cases where the endian-ness
21884          is irrelevant, but the order of register operations is important.
21885          For example when loading a value from memory into a register
21886          pair, the endian-ness does not matter.  Provided that the value
21887          from the lower memory address is put into the lower numbered
21888          register, and the value from the higher address is put into the
21889          higher numbered register, the load will work regardless of whether
21890          the value being loaded is big-wordian or little-wordian.  The
21891          order of the two register loads can matter however, if the address
21892          of the memory location is actually held in one of the registers
21893          being overwritten by the load.
21894
21895          The 'Q' and 'R' constraints are also available for 64-bit
21896          constants.  */
21897     case 'Q':
21898       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21899         {
21900           rtx part = gen_lowpart (SImode, x);
21901           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21902           return;
21903         }
21904
21905       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21906         {
21907           output_operand_lossage ("invalid operand for code '%c'", code);
21908           return;
21909         }
21910
21911       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21912       return;
21913
21914     case 'R':
21915       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21916         {
21917           machine_mode mode = GET_MODE (x);
21918           rtx part;
21919
21920           if (mode == VOIDmode)
21921             mode = DImode;
21922           part = gen_highpart_mode (SImode, mode, x);
21923           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21924           return;
21925         }
21926
21927       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21928         {
21929           output_operand_lossage ("invalid operand for code '%c'", code);
21930           return;
21931         }
21932
21933       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21934       return;
21935
21936     case 'H':
21937       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21938         {
21939           output_operand_lossage ("invalid operand for code '%c'", code);
21940           return;
21941         }
21942
21943       asm_fprintf (stream, "%r", REGNO (x) + 1);
21944       return;
21945
21946     case 'J':
21947       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21948         {
21949           output_operand_lossage ("invalid operand for code '%c'", code);
21950           return;
21951         }
21952
21953       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21954       return;
21955
21956     case 'K':
21957       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21958         {
21959           output_operand_lossage ("invalid operand for code '%c'", code);
21960           return;
21961         }
21962
21963       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21964       return;
21965
21966     case 'm':
21967       asm_fprintf (stream, "%r",
21968                    REG_P (XEXP (x, 0))
21969                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21970       return;
21971
21972     case 'M':
21973       asm_fprintf (stream, "{%r-%r}",
21974                    REGNO (x),
21975                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21976       return;
21977
21978     /* Like 'M', but writing doubleword vector registers, for use by Neon
21979        insns.  */
21980     case 'h':
21981       {
21982         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21983         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21984         if (numregs == 1)
21985           asm_fprintf (stream, "{d%d}", regno);
21986         else
21987           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21988       }
21989       return;
21990
21991     case 'd':
21992       /* CONST_TRUE_RTX means always -- that's the default.  */
21993       if (x == const_true_rtx)
21994         return;
21995
21996       if (!COMPARISON_P (x))
21997         {
21998           output_operand_lossage ("invalid operand for code '%c'", code);
21999           return;
22000         }
22001
22002       fputs (arm_condition_codes[get_arm_condition_code (x)],
22003              stream);
22004       return;
22005
22006     case 'D':
22007       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22008          want to do that.  */
22009       if (x == const_true_rtx)
22010         {
22011           output_operand_lossage ("instruction never executed");
22012           return;
22013         }
22014       if (!COMPARISON_P (x))
22015         {
22016           output_operand_lossage ("invalid operand for code '%c'", code);
22017           return;
22018         }
22019
22020       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22021                                  (get_arm_condition_code (x))],
22022              stream);
22023       return;
22024
22025     case 's':
22026     case 'V':
22027     case 'W':
22028     case 'X':
22029     case 'Y':
22030     case 'Z':
22031       /* Former Maverick support, removed after GCC-4.7.  */
22032       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22033       return;
22034
22035     case 'U':
22036       if (!REG_P (x)
22037           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22038           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22039         /* Bad value for wCG register number.  */
22040         {
22041           output_operand_lossage ("invalid operand for code '%c'", code);
22042           return;
22043         }
22044
22045       else
22046         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22047       return;
22048
22049       /* Print an iWMMXt control register name.  */
22050     case 'w':
22051       if (!CONST_INT_P (x)
22052           || INTVAL (x) < 0
22053           || INTVAL (x) >= 16)
22054         /* Bad value for wC register number.  */
22055         {
22056           output_operand_lossage ("invalid operand for code '%c'", code);
22057           return;
22058         }
22059
22060       else
22061         {
22062           static const char * wc_reg_names [16] =
22063             {
22064               "wCID",  "wCon",  "wCSSF", "wCASF",
22065               "wC4",   "wC5",   "wC6",   "wC7",
22066               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22067               "wC12",  "wC13",  "wC14",  "wC15"
22068             };
22069
22070           fputs (wc_reg_names [INTVAL (x)], stream);
22071         }
22072       return;
22073
22074     /* Print the high single-precision register of a VFP double-precision
22075        register.  */
22076     case 'p':
22077       {
22078         machine_mode mode = GET_MODE (x);
22079         int regno;
22080
22081         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22082           {
22083             output_operand_lossage ("invalid operand for code '%c'", code);
22084             return;
22085           }
22086
22087         regno = REGNO (x);
22088         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22089           {
22090             output_operand_lossage ("invalid operand for code '%c'", code);
22091             return;
22092           }
22093
22094         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22095       }
22096       return;
22097
22098     /* Print a VFP/Neon double precision or quad precision register name.  */
22099     case 'P':
22100     case 'q':
22101       {
22102         machine_mode mode = GET_MODE (x);
22103         int is_quad = (code == 'q');
22104         int regno;
22105
22106         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22107           {
22108             output_operand_lossage ("invalid operand for code '%c'", code);
22109             return;
22110           }
22111
22112         if (!REG_P (x)
22113             || !IS_VFP_REGNUM (REGNO (x)))
22114           {
22115             output_operand_lossage ("invalid operand for code '%c'", code);
22116             return;
22117           }
22118
22119         regno = REGNO (x);
22120         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22121             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22122           {
22123             output_operand_lossage ("invalid operand for code '%c'", code);
22124             return;
22125           }
22126
22127         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22128           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22129       }
22130       return;
22131
22132     /* These two codes print the low/high doubleword register of a Neon quad
22133        register, respectively.  For pair-structure types, can also print
22134        low/high quadword registers.  */
22135     case 'e':
22136     case 'f':
22137       {
22138         machine_mode mode = GET_MODE (x);
22139         int regno;
22140
22141         if ((GET_MODE_SIZE (mode) != 16
22142              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22143           {
22144             output_operand_lossage ("invalid operand for code '%c'", code);
22145             return;
22146           }
22147
22148         regno = REGNO (x);
22149         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22150           {
22151             output_operand_lossage ("invalid operand for code '%c'", code);
22152             return;
22153           }
22154
22155         if (GET_MODE_SIZE (mode) == 16)
22156           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22157                                   + (code == 'f' ? 1 : 0));
22158         else
22159           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22160                                   + (code == 'f' ? 1 : 0));
22161       }
22162       return;
22163
22164     /* Print a VFPv3 floating-point constant, represented as an integer
22165        index.  */
22166     case 'G':
22167       {
22168         int index = vfp3_const_double_index (x);
22169         gcc_assert (index != -1);
22170         fprintf (stream, "%d", index);
22171       }
22172       return;
22173
22174     /* Print bits representing opcode features for Neon.
22175
22176        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22177        and polynomials as unsigned.
22178
22179        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22180
22181        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22182
22183     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22184     case 'T':
22185       {
22186         HOST_WIDE_INT bits = INTVAL (x);
22187         fputc ("uspf"[bits & 3], stream);
22188       }
22189       return;
22190
22191     /* Likewise, but signed and unsigned integers are both 'i'.  */
22192     case 'F':
22193       {
22194         HOST_WIDE_INT bits = INTVAL (x);
22195         fputc ("iipf"[bits & 3], stream);
22196       }
22197       return;
22198
22199     /* As for 'T', but emit 'u' instead of 'p'.  */
22200     case 't':
22201       {
22202         HOST_WIDE_INT bits = INTVAL (x);
22203         fputc ("usuf"[bits & 3], stream);
22204       }
22205       return;
22206
22207     /* Bit 2: rounding (vs none).  */
22208     case 'O':
22209       {
22210         HOST_WIDE_INT bits = INTVAL (x);
22211         fputs ((bits & 4) != 0 ? "r" : "", stream);
22212       }
22213       return;
22214
22215     /* Memory operand for vld1/vst1 instruction.  */
22216     case 'A':
22217       {
22218         rtx addr;
22219         bool postinc = FALSE;
22220         rtx postinc_reg = NULL;
22221         unsigned align, memsize, align_bits;
22222
22223         gcc_assert (MEM_P (x));
22224         addr = XEXP (x, 0);
22225         if (GET_CODE (addr) == POST_INC)
22226           {
22227             postinc = 1;
22228             addr = XEXP (addr, 0);
22229           }
22230         if (GET_CODE (addr) == POST_MODIFY)
22231           {
22232             postinc_reg = XEXP( XEXP (addr, 1), 1);
22233             addr = XEXP (addr, 0);
22234           }
22235         asm_fprintf (stream, "[%r", REGNO (addr));
22236
22237         /* We know the alignment of this access, so we can emit a hint in the
22238            instruction (for some alignments) as an aid to the memory subsystem
22239            of the target.  */
22240         align = MEM_ALIGN (x) >> 3;
22241         memsize = MEM_SIZE (x);
22242
22243         /* Only certain alignment specifiers are supported by the hardware.  */
22244         if (memsize == 32 && (align % 32) == 0)
22245           align_bits = 256;
22246         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22247           align_bits = 128;
22248         else if (memsize >= 8 && (align % 8) == 0)
22249           align_bits = 64;
22250         else
22251           align_bits = 0;
22252
22253         if (align_bits != 0)
22254           asm_fprintf (stream, ":%d", align_bits);
22255
22256         asm_fprintf (stream, "]");
22257
22258         if (postinc)
22259           fputs("!", stream);
22260         if (postinc_reg)
22261           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22262       }
22263       return;
22264
22265     case 'C':
22266       {
22267         rtx addr;
22268
22269         gcc_assert (MEM_P (x));
22270         addr = XEXP (x, 0);
22271         gcc_assert (REG_P (addr));
22272         asm_fprintf (stream, "[%r]", REGNO (addr));
22273       }
22274       return;
22275
22276     /* Translate an S register number into a D register number and element index.  */
22277     case 'y':
22278       {
22279         machine_mode mode = GET_MODE (x);
22280         int regno;
22281
22282         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22283           {
22284             output_operand_lossage ("invalid operand for code '%c'", code);
22285             return;
22286           }
22287
22288         regno = REGNO (x);
22289         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22290           {
22291             output_operand_lossage ("invalid operand for code '%c'", code);
22292             return;
22293           }
22294
22295         regno = regno - FIRST_VFP_REGNUM;
22296         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22297       }
22298       return;
22299
22300     case 'v':
22301         gcc_assert (CONST_DOUBLE_P (x));
22302         int result;
22303         result = vfp3_const_double_for_fract_bits (x);
22304         if (result == 0)
22305           result = vfp3_const_double_for_bits (x);
22306         fprintf (stream, "#%d", result);
22307         return;
22308
22309     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22310        number into a D register number and element index.  */
22311     case 'z':
22312       {
22313         machine_mode mode = GET_MODE (x);
22314         int regno;
22315
22316         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22317           {
22318             output_operand_lossage ("invalid operand for code '%c'", code);
22319             return;
22320           }
22321
22322         regno = REGNO (x);
22323         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22324           {
22325             output_operand_lossage ("invalid operand for code '%c'", code);
22326             return;
22327           }
22328
22329         regno = regno - FIRST_VFP_REGNUM;
22330         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22331       }
22332       return;
22333
22334     default:
22335       if (x == 0)
22336         {
22337           output_operand_lossage ("missing operand");
22338           return;
22339         }
22340
22341       switch (GET_CODE (x))
22342         {
22343         case REG:
22344           asm_fprintf (stream, "%r", REGNO (x));
22345           break;
22346
22347         case MEM:
22348           output_address (GET_MODE (x), XEXP (x, 0));
22349           break;
22350
22351         case CONST_DOUBLE:
22352           {
22353             char fpstr[20];
22354             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22355                               sizeof (fpstr), 0, 1);
22356             fprintf (stream, "#%s", fpstr);
22357           }
22358           break;
22359
22360         default:
22361           gcc_assert (GET_CODE (x) != NEG);
22362           fputc ('#', stream);
22363           if (GET_CODE (x) == HIGH)
22364             {
22365               fputs (":lower16:", stream);
22366               x = XEXP (x, 0);
22367             }
22368
22369           output_addr_const (stream, x);
22370           break;
22371         }
22372     }
22373 }
22374 \f
22375 /* Target hook for printing a memory address.  */
22376 static void
22377 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22378 {
22379   if (TARGET_32BIT)
22380     {
22381       int is_minus = GET_CODE (x) == MINUS;
22382
22383       if (REG_P (x))
22384         asm_fprintf (stream, "[%r]", REGNO (x));
22385       else if (GET_CODE (x) == PLUS || is_minus)
22386         {
22387           rtx base = XEXP (x, 0);
22388           rtx index = XEXP (x, 1);
22389           HOST_WIDE_INT offset = 0;
22390           if (!REG_P (base)
22391               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22392             {
22393               /* Ensure that BASE is a register.  */
22394               /* (one of them must be).  */
22395               /* Also ensure the SP is not used as in index register.  */
22396               std::swap (base, index);
22397             }
22398           switch (GET_CODE (index))
22399             {
22400             case CONST_INT:
22401               offset = INTVAL (index);
22402               if (is_minus)
22403                 offset = -offset;
22404               asm_fprintf (stream, "[%r, #%wd]",
22405                            REGNO (base), offset);
22406               break;
22407
22408             case REG:
22409               asm_fprintf (stream, "[%r, %s%r]",
22410                            REGNO (base), is_minus ? "-" : "",
22411                            REGNO (index));
22412               break;
22413
22414             case MULT:
22415             case ASHIFTRT:
22416             case LSHIFTRT:
22417             case ASHIFT:
22418             case ROTATERT:
22419               {
22420                 asm_fprintf (stream, "[%r, %s%r",
22421                              REGNO (base), is_minus ? "-" : "",
22422                              REGNO (XEXP (index, 0)));
22423                 arm_print_operand (stream, index, 'S');
22424                 fputs ("]", stream);
22425                 break;
22426               }
22427
22428             default:
22429               gcc_unreachable ();
22430             }
22431         }
22432       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22433                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22434         {
22435           gcc_assert (REG_P (XEXP (x, 0)));
22436
22437           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22438             asm_fprintf (stream, "[%r, #%s%d]!",
22439                          REGNO (XEXP (x, 0)),
22440                          GET_CODE (x) == PRE_DEC ? "-" : "",
22441                          GET_MODE_SIZE (mode));
22442           else
22443             asm_fprintf (stream, "[%r], #%s%d",
22444                          REGNO (XEXP (x, 0)),
22445                          GET_CODE (x) == POST_DEC ? "-" : "",
22446                          GET_MODE_SIZE (mode));
22447         }
22448       else if (GET_CODE (x) == PRE_MODIFY)
22449         {
22450           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22451           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22452             asm_fprintf (stream, "#%wd]!",
22453                          INTVAL (XEXP (XEXP (x, 1), 1)));
22454           else
22455             asm_fprintf (stream, "%r]!",
22456                          REGNO (XEXP (XEXP (x, 1), 1)));
22457         }
22458       else if (GET_CODE (x) == POST_MODIFY)
22459         {
22460           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22461           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22462             asm_fprintf (stream, "#%wd",
22463                          INTVAL (XEXP (XEXP (x, 1), 1)));
22464           else
22465             asm_fprintf (stream, "%r",
22466                          REGNO (XEXP (XEXP (x, 1), 1)));
22467         }
22468       else output_addr_const (stream, x);
22469     }
22470   else
22471     {
22472       if (REG_P (x))
22473         asm_fprintf (stream, "[%r]", REGNO (x));
22474       else if (GET_CODE (x) == POST_INC)
22475         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22476       else if (GET_CODE (x) == PLUS)
22477         {
22478           gcc_assert (REG_P (XEXP (x, 0)));
22479           if (CONST_INT_P (XEXP (x, 1)))
22480             asm_fprintf (stream, "[%r, #%wd]",
22481                          REGNO (XEXP (x, 0)),
22482                          INTVAL (XEXP (x, 1)));
22483           else
22484             asm_fprintf (stream, "[%r, %r]",
22485                          REGNO (XEXP (x, 0)),
22486                          REGNO (XEXP (x, 1)));
22487         }
22488       else
22489         output_addr_const (stream, x);
22490     }
22491 }
22492 \f
22493 /* Target hook for indicating whether a punctuation character for
22494    TARGET_PRINT_OPERAND is valid.  */
22495 static bool
22496 arm_print_operand_punct_valid_p (unsigned char code)
22497 {
22498   return (code == '@' || code == '|' || code == '.'
22499           || code == '(' || code == ')' || code == '#'
22500           || (TARGET_32BIT && (code == '?'))
22501           || (TARGET_THUMB2 && (code == '!'))
22502           || (TARGET_THUMB && (code == '_')));
22503 }
22504 \f
22505 /* Target hook for assembling integer objects.  The ARM version needs to
22506    handle word-sized values specially.  */
22507 static bool
22508 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22509 {
22510   machine_mode mode;
22511
22512   if (size == UNITS_PER_WORD && aligned_p)
22513     {
22514       fputs ("\t.word\t", asm_out_file);
22515       output_addr_const (asm_out_file, x);
22516
22517       /* Mark symbols as position independent.  We only do this in the
22518          .text segment, not in the .data segment.  */
22519       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22520           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22521         {
22522           /* See legitimize_pic_address for an explanation of the
22523              TARGET_VXWORKS_RTP check.  */
22524           /* References to weak symbols cannot be resolved locally:
22525              they may be overridden by a non-weak definition at link
22526              time.  */
22527           if (!arm_pic_data_is_text_relative
22528               || (GET_CODE (x) == SYMBOL_REF
22529                   && (!SYMBOL_REF_LOCAL_P (x)
22530                       || (SYMBOL_REF_DECL (x)
22531                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22532             fputs ("(GOT)", asm_out_file);
22533           else
22534             fputs ("(GOTOFF)", asm_out_file);
22535         }
22536       fputc ('\n', asm_out_file);
22537       return true;
22538     }
22539
22540   mode = GET_MODE (x);
22541
22542   if (arm_vector_mode_supported_p (mode))
22543     {
22544       int i, units;
22545
22546       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22547
22548       units = CONST_VECTOR_NUNITS (x);
22549       size = GET_MODE_UNIT_SIZE (mode);
22550
22551       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22552         for (i = 0; i < units; i++)
22553           {
22554             rtx elt = CONST_VECTOR_ELT (x, i);
22555             assemble_integer
22556               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22557           }
22558       else
22559         for (i = 0; i < units; i++)
22560           {
22561             rtx elt = CONST_VECTOR_ELT (x, i);
22562             assemble_real
22563               (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22564                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22565           }
22566
22567       return true;
22568     }
22569
22570   return default_assemble_integer (x, size, aligned_p);
22571 }
22572
22573 static void
22574 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22575 {
22576   section *s;
22577
22578   if (!TARGET_AAPCS_BASED)
22579     {
22580       (is_ctor ?
22581        default_named_section_asm_out_constructor
22582        : default_named_section_asm_out_destructor) (symbol, priority);
22583       return;
22584     }
22585
22586   /* Put these in the .init_array section, using a special relocation.  */
22587   if (priority != DEFAULT_INIT_PRIORITY)
22588     {
22589       char buf[18];
22590       sprintf (buf, "%s.%.5u",
22591                is_ctor ? ".init_array" : ".fini_array",
22592                priority);
22593       s = get_section (buf, SECTION_WRITE, NULL_TREE);
22594     }
22595   else if (is_ctor)
22596     s = ctors_section;
22597   else
22598     s = dtors_section;
22599
22600   switch_to_section (s);
22601   assemble_align (POINTER_SIZE);
22602   fputs ("\t.word\t", asm_out_file);
22603   output_addr_const (asm_out_file, symbol);
22604   fputs ("(target1)\n", asm_out_file);
22605 }
22606
22607 /* Add a function to the list of static constructors.  */
22608
22609 static void
22610 arm_elf_asm_constructor (rtx symbol, int priority)
22611 {
22612   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22613 }
22614
22615 /* Add a function to the list of static destructors.  */
22616
22617 static void
22618 arm_elf_asm_destructor (rtx symbol, int priority)
22619 {
22620   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22621 }
22622 \f
22623 /* A finite state machine takes care of noticing whether or not instructions
22624    can be conditionally executed, and thus decrease execution time and code
22625    size by deleting branch instructions.  The fsm is controlled by
22626    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22627
22628 /* The state of the fsm controlling condition codes are:
22629    0: normal, do nothing special
22630    1: make ASM_OUTPUT_OPCODE not output this instruction
22631    2: make ASM_OUTPUT_OPCODE not output this instruction
22632    3: make instructions conditional
22633    4: make instructions conditional
22634
22635    State transitions (state->state by whom under condition):
22636    0 -> 1 final_prescan_insn if the `target' is a label
22637    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22638    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22639    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22640    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22641           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22642    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22643           (the target insn is arm_target_insn).
22644
22645    If the jump clobbers the conditions then we use states 2 and 4.
22646
22647    A similar thing can be done with conditional return insns.
22648
22649    XXX In case the `target' is an unconditional branch, this conditionalising
22650    of the instructions always reduces code size, but not always execution
22651    time.  But then, I want to reduce the code size to somewhere near what
22652    /bin/cc produces.  */
22653
22654 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22655    instructions.  When a COND_EXEC instruction is seen the subsequent
22656    instructions are scanned so that multiple conditional instructions can be
22657    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22658    specify the length and true/false mask for the IT block.  These will be
22659    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22660
22661 /* Returns the index of the ARM condition code string in
22662    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22663    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22664
22665 enum arm_cond_code
22666 maybe_get_arm_condition_code (rtx comparison)
22667 {
22668   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22669   enum arm_cond_code code;
22670   enum rtx_code comp_code = GET_CODE (comparison);
22671
22672   if (GET_MODE_CLASS (mode) != MODE_CC)
22673     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22674                            XEXP (comparison, 1));
22675
22676   switch (mode)
22677     {
22678     case CC_DNEmode: code = ARM_NE; goto dominance;
22679     case CC_DEQmode: code = ARM_EQ; goto dominance;
22680     case CC_DGEmode: code = ARM_GE; goto dominance;
22681     case CC_DGTmode: code = ARM_GT; goto dominance;
22682     case CC_DLEmode: code = ARM_LE; goto dominance;
22683     case CC_DLTmode: code = ARM_LT; goto dominance;
22684     case CC_DGEUmode: code = ARM_CS; goto dominance;
22685     case CC_DGTUmode: code = ARM_HI; goto dominance;
22686     case CC_DLEUmode: code = ARM_LS; goto dominance;
22687     case CC_DLTUmode: code = ARM_CC;
22688
22689     dominance:
22690       if (comp_code == EQ)
22691         return ARM_INVERSE_CONDITION_CODE (code);
22692       if (comp_code == NE)
22693         return code;
22694       return ARM_NV;
22695
22696     case CC_NOOVmode:
22697       switch (comp_code)
22698         {
22699         case NE: return ARM_NE;
22700         case EQ: return ARM_EQ;
22701         case GE: return ARM_PL;
22702         case LT: return ARM_MI;
22703         default: return ARM_NV;
22704         }
22705
22706     case CC_Zmode:
22707       switch (comp_code)
22708         {
22709         case NE: return ARM_NE;
22710         case EQ: return ARM_EQ;
22711         default: return ARM_NV;
22712         }
22713
22714     case CC_Nmode:
22715       switch (comp_code)
22716         {
22717         case NE: return ARM_MI;
22718         case EQ: return ARM_PL;
22719         default: return ARM_NV;
22720         }
22721
22722     case CCFPEmode:
22723     case CCFPmode:
22724       /* We can handle all cases except UNEQ and LTGT.  */
22725       switch (comp_code)
22726         {
22727         case GE: return ARM_GE;
22728         case GT: return ARM_GT;
22729         case LE: return ARM_LS;
22730         case LT: return ARM_MI;
22731         case NE: return ARM_NE;
22732         case EQ: return ARM_EQ;
22733         case ORDERED: return ARM_VC;
22734         case UNORDERED: return ARM_VS;
22735         case UNLT: return ARM_LT;
22736         case UNLE: return ARM_LE;
22737         case UNGT: return ARM_HI;
22738         case UNGE: return ARM_PL;
22739           /* UNEQ and LTGT do not have a representation.  */
22740         case UNEQ: /* Fall through.  */
22741         case LTGT: /* Fall through.  */
22742         default: return ARM_NV;
22743         }
22744
22745     case CC_SWPmode:
22746       switch (comp_code)
22747         {
22748         case NE: return ARM_NE;
22749         case EQ: return ARM_EQ;
22750         case GE: return ARM_LE;
22751         case GT: return ARM_LT;
22752         case LE: return ARM_GE;
22753         case LT: return ARM_GT;
22754         case GEU: return ARM_LS;
22755         case GTU: return ARM_CC;
22756         case LEU: return ARM_CS;
22757         case LTU: return ARM_HI;
22758         default: return ARM_NV;
22759         }
22760
22761     case CC_Cmode:
22762       switch (comp_code)
22763         {
22764         case LTU: return ARM_CS;
22765         case GEU: return ARM_CC;
22766         case NE: return ARM_CS;
22767         case EQ: return ARM_CC;
22768         default: return ARM_NV;
22769         }
22770
22771     case CC_CZmode:
22772       switch (comp_code)
22773         {
22774         case NE: return ARM_NE;
22775         case EQ: return ARM_EQ;
22776         case GEU: return ARM_CS;
22777         case GTU: return ARM_HI;
22778         case LEU: return ARM_LS;
22779         case LTU: return ARM_CC;
22780         default: return ARM_NV;
22781         }
22782
22783     case CC_NCVmode:
22784       switch (comp_code)
22785         {
22786         case GE: return ARM_GE;
22787         case LT: return ARM_LT;
22788         case GEU: return ARM_CS;
22789         case LTU: return ARM_CC;
22790         default: return ARM_NV;
22791         }
22792
22793     case CC_Vmode:
22794       switch (comp_code)
22795         {
22796         case NE: return ARM_VS;
22797         case EQ: return ARM_VC;
22798         default: return ARM_NV;
22799         }
22800
22801     case CCmode:
22802       switch (comp_code)
22803         {
22804         case NE: return ARM_NE;
22805         case EQ: return ARM_EQ;
22806         case GE: return ARM_GE;
22807         case GT: return ARM_GT;
22808         case LE: return ARM_LE;
22809         case LT: return ARM_LT;
22810         case GEU: return ARM_CS;
22811         case GTU: return ARM_HI;
22812         case LEU: return ARM_LS;
22813         case LTU: return ARM_CC;
22814         default: return ARM_NV;
22815         }
22816
22817     default: gcc_unreachable ();
22818     }
22819 }
22820
22821 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22822 static enum arm_cond_code
22823 get_arm_condition_code (rtx comparison)
22824 {
22825   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22826   gcc_assert (code != ARM_NV);
22827   return code;
22828 }
22829
22830 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22831    instructions.  */
22832 void
22833 thumb2_final_prescan_insn (rtx_insn *insn)
22834 {
22835   rtx_insn *first_insn = insn;
22836   rtx body = PATTERN (insn);
22837   rtx predicate;
22838   enum arm_cond_code code;
22839   int n;
22840   int mask;
22841   int max;
22842
22843   /* max_insns_skipped in the tune was already taken into account in the
22844      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22845      just emit the IT blocks as we can.  It does not make sense to split
22846      the IT blocks.  */
22847   max = MAX_INSN_PER_IT_BLOCK;
22848
22849   /* Remove the previous insn from the count of insns to be output.  */
22850   if (arm_condexec_count)
22851       arm_condexec_count--;
22852
22853   /* Nothing to do if we are already inside a conditional block.  */
22854   if (arm_condexec_count)
22855     return;
22856
22857   if (GET_CODE (body) != COND_EXEC)
22858     return;
22859
22860   /* Conditional jumps are implemented directly.  */
22861   if (JUMP_P (insn))
22862     return;
22863
22864   predicate = COND_EXEC_TEST (body);
22865   arm_current_cc = get_arm_condition_code (predicate);
22866
22867   n = get_attr_ce_count (insn);
22868   arm_condexec_count = 1;
22869   arm_condexec_mask = (1 << n) - 1;
22870   arm_condexec_masklen = n;
22871   /* See if subsequent instructions can be combined into the same block.  */
22872   for (;;)
22873     {
22874       insn = next_nonnote_insn (insn);
22875
22876       /* Jumping into the middle of an IT block is illegal, so a label or
22877          barrier terminates the block.  */
22878       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22879         break;
22880
22881       body = PATTERN (insn);
22882       /* USE and CLOBBER aren't really insns, so just skip them.  */
22883       if (GET_CODE (body) == USE
22884           || GET_CODE (body) == CLOBBER)
22885         continue;
22886
22887       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
22888       if (GET_CODE (body) != COND_EXEC)
22889         break;
22890       /* Maximum number of conditionally executed instructions in a block.  */
22891       n = get_attr_ce_count (insn);
22892       if (arm_condexec_masklen + n > max)
22893         break;
22894
22895       predicate = COND_EXEC_TEST (body);
22896       code = get_arm_condition_code (predicate);
22897       mask = (1 << n) - 1;
22898       if (arm_current_cc == code)
22899         arm_condexec_mask |= (mask << arm_condexec_masklen);
22900       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22901         break;
22902
22903       arm_condexec_count++;
22904       arm_condexec_masklen += n;
22905
22906       /* A jump must be the last instruction in a conditional block.  */
22907       if (JUMP_P (insn))
22908         break;
22909     }
22910   /* Restore recog_data (getting the attributes of other insns can
22911      destroy this array, but final.c assumes that it remains intact
22912      across this call).  */
22913   extract_constrain_insn_cached (first_insn);
22914 }
22915
22916 void
22917 arm_final_prescan_insn (rtx_insn *insn)
22918 {
22919   /* BODY will hold the body of INSN.  */
22920   rtx body = PATTERN (insn);
22921
22922   /* This will be 1 if trying to repeat the trick, and things need to be
22923      reversed if it appears to fail.  */
22924   int reverse = 0;
22925
22926   /* If we start with a return insn, we only succeed if we find another one.  */
22927   int seeking_return = 0;
22928   enum rtx_code return_code = UNKNOWN;
22929
22930   /* START_INSN will hold the insn from where we start looking.  This is the
22931      first insn after the following code_label if REVERSE is true.  */
22932   rtx_insn *start_insn = insn;
22933
22934   /* If in state 4, check if the target branch is reached, in order to
22935      change back to state 0.  */
22936   if (arm_ccfsm_state == 4)
22937     {
22938       if (insn == arm_target_insn)
22939         {
22940           arm_target_insn = NULL;
22941           arm_ccfsm_state = 0;
22942         }
22943       return;
22944     }
22945
22946   /* If in state 3, it is possible to repeat the trick, if this insn is an
22947      unconditional branch to a label, and immediately following this branch
22948      is the previous target label which is only used once, and the label this
22949      branch jumps to is not too far off.  */
22950   if (arm_ccfsm_state == 3)
22951     {
22952       if (simplejump_p (insn))
22953         {
22954           start_insn = next_nonnote_insn (start_insn);
22955           if (BARRIER_P (start_insn))
22956             {
22957               /* XXX Isn't this always a barrier?  */
22958               start_insn = next_nonnote_insn (start_insn);
22959             }
22960           if (LABEL_P (start_insn)
22961               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22962               && LABEL_NUSES (start_insn) == 1)
22963             reverse = TRUE;
22964           else
22965             return;
22966         }
22967       else if (ANY_RETURN_P (body))
22968         {
22969           start_insn = next_nonnote_insn (start_insn);
22970           if (BARRIER_P (start_insn))
22971             start_insn = next_nonnote_insn (start_insn);
22972           if (LABEL_P (start_insn)
22973               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22974               && LABEL_NUSES (start_insn) == 1)
22975             {
22976               reverse = TRUE;
22977               seeking_return = 1;
22978               return_code = GET_CODE (body);
22979             }
22980           else
22981             return;
22982         }
22983       else
22984         return;
22985     }
22986
22987   gcc_assert (!arm_ccfsm_state || reverse);
22988   if (!JUMP_P (insn))
22989     return;
22990
22991   /* This jump might be paralleled with a clobber of the condition codes
22992      the jump should always come first */
22993   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22994     body = XVECEXP (body, 0, 0);
22995
22996   if (reverse
22997       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22998           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22999     {
23000       int insns_skipped;
23001       int fail = FALSE, succeed = FALSE;
23002       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23003       int then_not_else = TRUE;
23004       rtx_insn *this_insn = start_insn;
23005       rtx label = 0;
23006
23007       /* Register the insn jumped to.  */
23008       if (reverse)
23009         {
23010           if (!seeking_return)
23011             label = XEXP (SET_SRC (body), 0);
23012         }
23013       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23014         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23015       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23016         {
23017           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23018           then_not_else = FALSE;
23019         }
23020       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23021         {
23022           seeking_return = 1;
23023           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23024         }
23025       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23026         {
23027           seeking_return = 1;
23028           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23029           then_not_else = FALSE;
23030         }
23031       else
23032         gcc_unreachable ();
23033
23034       /* See how many insns this branch skips, and what kind of insns.  If all
23035          insns are okay, and the label or unconditional branch to the same
23036          label is not too far away, succeed.  */
23037       for (insns_skipped = 0;
23038            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23039         {
23040           rtx scanbody;
23041
23042           this_insn = next_nonnote_insn (this_insn);
23043           if (!this_insn)
23044             break;
23045
23046           switch (GET_CODE (this_insn))
23047             {
23048             case CODE_LABEL:
23049               /* Succeed if it is the target label, otherwise fail since
23050                  control falls in from somewhere else.  */
23051               if (this_insn == label)
23052                 {
23053                   arm_ccfsm_state = 1;
23054                   succeed = TRUE;
23055                 }
23056               else
23057                 fail = TRUE;
23058               break;
23059
23060             case BARRIER:
23061               /* Succeed if the following insn is the target label.
23062                  Otherwise fail.
23063                  If return insns are used then the last insn in a function
23064                  will be a barrier.  */
23065               this_insn = next_nonnote_insn (this_insn);
23066               if (this_insn && this_insn == label)
23067                 {
23068                   arm_ccfsm_state = 1;
23069                   succeed = TRUE;
23070                 }
23071               else
23072                 fail = TRUE;
23073               break;
23074
23075             case CALL_INSN:
23076               /* The AAPCS says that conditional calls should not be
23077                  used since they make interworking inefficient (the
23078                  linker can't transform BL<cond> into BLX).  That's
23079                  only a problem if the machine has BLX.  */
23080               if (arm_arch5)
23081                 {
23082                   fail = TRUE;
23083                   break;
23084                 }
23085
23086               /* Succeed if the following insn is the target label, or
23087                  if the following two insns are a barrier and the
23088                  target label.  */
23089               this_insn = next_nonnote_insn (this_insn);
23090               if (this_insn && BARRIER_P (this_insn))
23091                 this_insn = next_nonnote_insn (this_insn);
23092
23093               if (this_insn && this_insn == label
23094                   && insns_skipped < max_insns_skipped)
23095                 {
23096                   arm_ccfsm_state = 1;
23097                   succeed = TRUE;
23098                 }
23099               else
23100                 fail = TRUE;
23101               break;
23102
23103             case JUMP_INSN:
23104               /* If this is an unconditional branch to the same label, succeed.
23105                  If it is to another label, do nothing.  If it is conditional,
23106                  fail.  */
23107               /* XXX Probably, the tests for SET and the PC are
23108                  unnecessary.  */
23109
23110               scanbody = PATTERN (this_insn);
23111               if (GET_CODE (scanbody) == SET
23112                   && GET_CODE (SET_DEST (scanbody)) == PC)
23113                 {
23114                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23115                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23116                     {
23117                       arm_ccfsm_state = 2;
23118                       succeed = TRUE;
23119                     }
23120                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23121                     fail = TRUE;
23122                 }
23123               /* Fail if a conditional return is undesirable (e.g. on a
23124                  StrongARM), but still allow this if optimizing for size.  */
23125               else if (GET_CODE (scanbody) == return_code
23126                        && !use_return_insn (TRUE, NULL)
23127                        && !optimize_size)
23128                 fail = TRUE;
23129               else if (GET_CODE (scanbody) == return_code)
23130                 {
23131                   arm_ccfsm_state = 2;
23132                   succeed = TRUE;
23133                 }
23134               else if (GET_CODE (scanbody) == PARALLEL)
23135                 {
23136                   switch (get_attr_conds (this_insn))
23137                     {
23138                     case CONDS_NOCOND:
23139                       break;
23140                     default:
23141                       fail = TRUE;
23142                       break;
23143                     }
23144                 }
23145               else
23146                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23147
23148               break;
23149
23150             case INSN:
23151               /* Instructions using or affecting the condition codes make it
23152                  fail.  */
23153               scanbody = PATTERN (this_insn);
23154               if (!(GET_CODE (scanbody) == SET
23155                     || GET_CODE (scanbody) == PARALLEL)
23156                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23157                 fail = TRUE;
23158               break;
23159
23160             default:
23161               break;
23162             }
23163         }
23164       if (succeed)
23165         {
23166           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23167             arm_target_label = CODE_LABEL_NUMBER (label);
23168           else
23169             {
23170               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23171
23172               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23173                 {
23174                   this_insn = next_nonnote_insn (this_insn);
23175                   gcc_assert (!this_insn
23176                               || (!BARRIER_P (this_insn)
23177                                   && !LABEL_P (this_insn)));
23178                 }
23179               if (!this_insn)
23180                 {
23181                   /* Oh, dear! we ran off the end.. give up.  */
23182                   extract_constrain_insn_cached (insn);
23183                   arm_ccfsm_state = 0;
23184                   arm_target_insn = NULL;
23185                   return;
23186                 }
23187               arm_target_insn = this_insn;
23188             }
23189
23190           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23191              what it was.  */
23192           if (!reverse)
23193             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23194
23195           if (reverse || then_not_else)
23196             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23197         }
23198
23199       /* Restore recog_data (getting the attributes of other insns can
23200          destroy this array, but final.c assumes that it remains intact
23201          across this call.  */
23202       extract_constrain_insn_cached (insn);
23203     }
23204 }
23205
23206 /* Output IT instructions.  */
23207 void
23208 thumb2_asm_output_opcode (FILE * stream)
23209 {
23210   char buff[5];
23211   int n;
23212
23213   if (arm_condexec_mask)
23214     {
23215       for (n = 0; n < arm_condexec_masklen; n++)
23216         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23217       buff[n] = 0;
23218       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23219                   arm_condition_codes[arm_current_cc]);
23220       arm_condexec_mask = 0;
23221     }
23222 }
23223
23224 /* Returns true if REGNO is a valid register
23225    for holding a quantity of type MODE.  */
23226 int
23227 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23228 {
23229   if (GET_MODE_CLASS (mode) == MODE_CC)
23230     return (regno == CC_REGNUM
23231             || (TARGET_HARD_FLOAT
23232                 && regno == VFPCC_REGNUM));
23233
23234   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23235     return false;
23236
23237   if (TARGET_THUMB1)
23238     /* For the Thumb we only allow values bigger than SImode in
23239        registers 0 - 6, so that there is always a second low
23240        register available to hold the upper part of the value.
23241        We probably we ought to ensure that the register is the
23242        start of an even numbered register pair.  */
23243     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23244
23245   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23246     {
23247       if (mode == SFmode || mode == SImode)
23248         return VFP_REGNO_OK_FOR_SINGLE (regno);
23249
23250       if (mode == DFmode)
23251         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23252
23253       if (mode == HFmode)
23254         return VFP_REGNO_OK_FOR_SINGLE (regno);
23255
23256       /* VFP registers can hold HImode values.  */
23257       if (mode == HImode)
23258         return VFP_REGNO_OK_FOR_SINGLE (regno);
23259
23260       if (TARGET_NEON)
23261         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23262                || (VALID_NEON_QREG_MODE (mode)
23263                    && NEON_REGNO_OK_FOR_QUAD (regno))
23264                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23265                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23266                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23267                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23268                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23269
23270       return FALSE;
23271     }
23272
23273   if (TARGET_REALLY_IWMMXT)
23274     {
23275       if (IS_IWMMXT_GR_REGNUM (regno))
23276         return mode == SImode;
23277
23278       if (IS_IWMMXT_REGNUM (regno))
23279         return VALID_IWMMXT_REG_MODE (mode);
23280     }
23281
23282   /* We allow almost any value to be stored in the general registers.
23283      Restrict doubleword quantities to even register pairs in ARM state
23284      so that we can use ldrd.  Do not allow very large Neon structure
23285      opaque modes in general registers; they would use too many.  */
23286   if (regno <= LAST_ARM_REGNUM)
23287     {
23288       if (ARM_NUM_REGS (mode) > 4)
23289           return FALSE;
23290
23291       if (TARGET_THUMB2)
23292         return TRUE;
23293
23294       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23295     }
23296
23297   if (regno == FRAME_POINTER_REGNUM
23298       || regno == ARG_POINTER_REGNUM)
23299     /* We only allow integers in the fake hard registers.  */
23300     return GET_MODE_CLASS (mode) == MODE_INT;
23301
23302   return FALSE;
23303 }
23304
23305 /* Implement MODES_TIEABLE_P.  */
23306
23307 bool
23308 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23309 {
23310   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23311     return true;
23312
23313   /* We specifically want to allow elements of "structure" modes to
23314      be tieable to the structure.  This more general condition allows
23315      other rarer situations too.  */
23316   if (TARGET_NEON
23317       && (VALID_NEON_DREG_MODE (mode1)
23318           || VALID_NEON_QREG_MODE (mode1)
23319           || VALID_NEON_STRUCT_MODE (mode1))
23320       && (VALID_NEON_DREG_MODE (mode2)
23321           || VALID_NEON_QREG_MODE (mode2)
23322           || VALID_NEON_STRUCT_MODE (mode2)))
23323     return true;
23324
23325   return false;
23326 }
23327
23328 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23329    not used in arm mode.  */
23330
23331 enum reg_class
23332 arm_regno_class (int regno)
23333 {
23334   if (regno == PC_REGNUM)
23335     return NO_REGS;
23336
23337   if (TARGET_THUMB1)
23338     {
23339       if (regno == STACK_POINTER_REGNUM)
23340         return STACK_REG;
23341       if (regno == CC_REGNUM)
23342         return CC_REG;
23343       if (regno < 8)
23344         return LO_REGS;
23345       return HI_REGS;
23346     }
23347
23348   if (TARGET_THUMB2 && regno < 8)
23349     return LO_REGS;
23350
23351   if (   regno <= LAST_ARM_REGNUM
23352       || regno == FRAME_POINTER_REGNUM
23353       || regno == ARG_POINTER_REGNUM)
23354     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23355
23356   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23357     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23358
23359   if (IS_VFP_REGNUM (regno))
23360     {
23361       if (regno <= D7_VFP_REGNUM)
23362         return VFP_D0_D7_REGS;
23363       else if (regno <= LAST_LO_VFP_REGNUM)
23364         return VFP_LO_REGS;
23365       else
23366         return VFP_HI_REGS;
23367     }
23368
23369   if (IS_IWMMXT_REGNUM (regno))
23370     return IWMMXT_REGS;
23371
23372   if (IS_IWMMXT_GR_REGNUM (regno))
23373     return IWMMXT_GR_REGS;
23374
23375   return NO_REGS;
23376 }
23377
23378 /* Handle a special case when computing the offset
23379    of an argument from the frame pointer.  */
23380 int
23381 arm_debugger_arg_offset (int value, rtx addr)
23382 {
23383   rtx_insn *insn;
23384
23385   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23386   if (value != 0)
23387     return 0;
23388
23389   /* We can only cope with the case where the address is held in a register.  */
23390   if (!REG_P (addr))
23391     return 0;
23392
23393   /* If we are using the frame pointer to point at the argument, then
23394      an offset of 0 is correct.  */
23395   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23396     return 0;
23397
23398   /* If we are using the stack pointer to point at the
23399      argument, then an offset of 0 is correct.  */
23400   /* ??? Check this is consistent with thumb2 frame layout.  */
23401   if ((TARGET_THUMB || !frame_pointer_needed)
23402       && REGNO (addr) == SP_REGNUM)
23403     return 0;
23404
23405   /* Oh dear.  The argument is pointed to by a register rather
23406      than being held in a register, or being stored at a known
23407      offset from the frame pointer.  Since GDB only understands
23408      those two kinds of argument we must translate the address
23409      held in the register into an offset from the frame pointer.
23410      We do this by searching through the insns for the function
23411      looking to see where this register gets its value.  If the
23412      register is initialized from the frame pointer plus an offset
23413      then we are in luck and we can continue, otherwise we give up.
23414
23415      This code is exercised by producing debugging information
23416      for a function with arguments like this:
23417
23418            double func (double a, double b, int c, double d) {return d;}
23419
23420      Without this code the stab for parameter 'd' will be set to
23421      an offset of 0 from the frame pointer, rather than 8.  */
23422
23423   /* The if() statement says:
23424
23425      If the insn is a normal instruction
23426      and if the insn is setting the value in a register
23427      and if the register being set is the register holding the address of the argument
23428      and if the address is computing by an addition
23429      that involves adding to a register
23430      which is the frame pointer
23431      a constant integer
23432
23433      then...  */
23434
23435   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23436     {
23437       if (   NONJUMP_INSN_P (insn)
23438           && GET_CODE (PATTERN (insn)) == SET
23439           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23440           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23441           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23442           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23443           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23444              )
23445         {
23446           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23447
23448           break;
23449         }
23450     }
23451
23452   if (value == 0)
23453     {
23454       debug_rtx (addr);
23455       warning (0, "unable to compute real location of stacked parameter");
23456       value = 8; /* XXX magic hack */
23457     }
23458
23459   return value;
23460 }
23461 \f
23462 /* Implement TARGET_PROMOTED_TYPE.  */
23463
23464 static tree
23465 arm_promoted_type (const_tree t)
23466 {
23467   if (SCALAR_FLOAT_TYPE_P (t)
23468       && TYPE_PRECISION (t) == 16
23469       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23470     return float_type_node;
23471   return NULL_TREE;
23472 }
23473
23474 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23475    This simply adds HFmode as a supported mode; even though we don't
23476    implement arithmetic on this type directly, it's supported by
23477    optabs conversions, much the way the double-word arithmetic is
23478    special-cased in the default hook.  */
23479
23480 static bool
23481 arm_scalar_mode_supported_p (machine_mode mode)
23482 {
23483   if (mode == HFmode)
23484     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23485   else if (ALL_FIXED_POINT_MODE_P (mode))
23486     return true;
23487   else
23488     return default_scalar_mode_supported_p (mode);
23489 }
23490
23491 /* Set the value of FLT_EVAL_METHOD.
23492    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23493
23494     0: evaluate all operations and constants, whose semantic type has at
23495        most the range and precision of type float, to the range and
23496        precision of float; evaluate all other operations and constants to
23497        the range and precision of the semantic type;
23498
23499     N, where _FloatN is a supported interchange floating type
23500        evaluate all operations and constants, whose semantic type has at
23501        most the range and precision of _FloatN type, to the range and
23502        precision of the _FloatN type; evaluate all other operations and
23503        constants to the range and precision of the semantic type;
23504
23505    If we have the ARMv8.2-A extensions then we support _Float16 in native
23506    precision, so we should set this to 16.  Otherwise, we support the type,
23507    but want to evaluate expressions in float precision, so set this to
23508    0.  */
23509
23510 static enum flt_eval_method
23511 arm_excess_precision (enum excess_precision_type type)
23512 {
23513   switch (type)
23514     {
23515       case EXCESS_PRECISION_TYPE_FAST:
23516       case EXCESS_PRECISION_TYPE_STANDARD:
23517         /* We can calculate either in 16-bit range and precision or
23518            32-bit range and precision.  Make that decision based on whether
23519            we have native support for the ARMv8.2-A 16-bit floating-point
23520            instructions or not.  */
23521         return (TARGET_VFP_FP16INST
23522                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23523                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23524       case EXCESS_PRECISION_TYPE_IMPLICIT:
23525         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23526       default:
23527         gcc_unreachable ();
23528     }
23529   return FLT_EVAL_METHOD_UNPREDICTABLE;
23530 }
23531
23532
23533 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23534    _Float16 if we are using anything other than ieee format for 16-bit
23535    floating point.  Otherwise, punt to the default implementation.  */
23536 static machine_mode
23537 arm_floatn_mode (int n, bool extended)
23538 {
23539   if (!extended && n == 16)
23540     return arm_fp16_format == ARM_FP16_FORMAT_IEEE ? HFmode : VOIDmode;
23541
23542   return default_floatn_mode (n, extended);
23543 }
23544
23545
23546 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23547    not to early-clobber SRC registers in the process.
23548
23549    We assume that the operands described by SRC and DEST represent a
23550    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23551    number of components into which the copy has been decomposed.  */
23552 void
23553 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23554 {
23555   unsigned int i;
23556
23557   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23558       || REGNO (operands[0]) < REGNO (operands[1]))
23559     {
23560       for (i = 0; i < count; i++)
23561         {
23562           operands[2 * i] = dest[i];
23563           operands[2 * i + 1] = src[i];
23564         }
23565     }
23566   else
23567     {
23568       for (i = 0; i < count; i++)
23569         {
23570           operands[2 * i] = dest[count - i - 1];
23571           operands[2 * i + 1] = src[count - i - 1];
23572         }
23573     }
23574 }
23575
23576 /* Split operands into moves from op[1] + op[2] into op[0].  */
23577
23578 void
23579 neon_split_vcombine (rtx operands[3])
23580 {
23581   unsigned int dest = REGNO (operands[0]);
23582   unsigned int src1 = REGNO (operands[1]);
23583   unsigned int src2 = REGNO (operands[2]);
23584   machine_mode halfmode = GET_MODE (operands[1]);
23585   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23586   rtx destlo, desthi;
23587
23588   if (src1 == dest && src2 == dest + halfregs)
23589     {
23590       /* No-op move.  Can't split to nothing; emit something.  */
23591       emit_note (NOTE_INSN_DELETED);
23592       return;
23593     }
23594
23595   /* Preserve register attributes for variable tracking.  */
23596   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23597   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23598                                GET_MODE_SIZE (halfmode));
23599
23600   /* Special case of reversed high/low parts.  Use VSWP.  */
23601   if (src2 == dest && src1 == dest + halfregs)
23602     {
23603       rtx x = gen_rtx_SET (destlo, operands[1]);
23604       rtx y = gen_rtx_SET (desthi, operands[2]);
23605       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23606       return;
23607     }
23608
23609   if (!reg_overlap_mentioned_p (operands[2], destlo))
23610     {
23611       /* Try to avoid unnecessary moves if part of the result
23612          is in the right place already.  */
23613       if (src1 != dest)
23614         emit_move_insn (destlo, operands[1]);
23615       if (src2 != dest + halfregs)
23616         emit_move_insn (desthi, operands[2]);
23617     }
23618   else
23619     {
23620       if (src2 != dest + halfregs)
23621         emit_move_insn (desthi, operands[2]);
23622       if (src1 != dest)
23623         emit_move_insn (destlo, operands[1]);
23624     }
23625 }
23626 \f
23627 /* Return the number (counting from 0) of
23628    the least significant set bit in MASK.  */
23629
23630 inline static int
23631 number_of_first_bit_set (unsigned mask)
23632 {
23633   return ctz_hwi (mask);
23634 }
23635
23636 /* Like emit_multi_reg_push, but allowing for a different set of
23637    registers to be described as saved.  MASK is the set of registers
23638    to be saved; REAL_REGS is the set of registers to be described as
23639    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23640
23641 static rtx_insn *
23642 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23643 {
23644   unsigned long regno;
23645   rtx par[10], tmp, reg;
23646   rtx_insn *insn;
23647   int i, j;
23648
23649   /* Build the parallel of the registers actually being stored.  */
23650   for (i = 0; mask; ++i, mask &= mask - 1)
23651     {
23652       regno = ctz_hwi (mask);
23653       reg = gen_rtx_REG (SImode, regno);
23654
23655       if (i == 0)
23656         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23657       else
23658         tmp = gen_rtx_USE (VOIDmode, reg);
23659
23660       par[i] = tmp;
23661     }
23662
23663   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23664   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23665   tmp = gen_frame_mem (BLKmode, tmp);
23666   tmp = gen_rtx_SET (tmp, par[0]);
23667   par[0] = tmp;
23668
23669   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23670   insn = emit_insn (tmp);
23671
23672   /* Always build the stack adjustment note for unwind info.  */
23673   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23674   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23675   par[0] = tmp;
23676
23677   /* Build the parallel of the registers recorded as saved for unwind.  */
23678   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23679     {
23680       regno = ctz_hwi (real_regs);
23681       reg = gen_rtx_REG (SImode, regno);
23682
23683       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23684       tmp = gen_frame_mem (SImode, tmp);
23685       tmp = gen_rtx_SET (tmp, reg);
23686       RTX_FRAME_RELATED_P (tmp) = 1;
23687       par[j + 1] = tmp;
23688     }
23689
23690   if (j == 0)
23691     tmp = par[0];
23692   else
23693     {
23694       RTX_FRAME_RELATED_P (par[0]) = 1;
23695       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23696     }
23697
23698   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23699
23700   return insn;
23701 }
23702
23703 /* Emit code to push or pop registers to or from the stack.  F is the
23704    assembly file.  MASK is the registers to pop.  */
23705 static void
23706 thumb_pop (FILE *f, unsigned long mask)
23707 {
23708   int regno;
23709   int lo_mask = mask & 0xFF;
23710   int pushed_words = 0;
23711
23712   gcc_assert (mask);
23713
23714   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23715     {
23716       /* Special case.  Do not generate a POP PC statement here, do it in
23717          thumb_exit() */
23718       thumb_exit (f, -1);
23719       return;
23720     }
23721
23722   fprintf (f, "\tpop\t{");
23723
23724   /* Look at the low registers first.  */
23725   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23726     {
23727       if (lo_mask & 1)
23728         {
23729           asm_fprintf (f, "%r", regno);
23730
23731           if ((lo_mask & ~1) != 0)
23732             fprintf (f, ", ");
23733
23734           pushed_words++;
23735         }
23736     }
23737
23738   if (mask & (1 << PC_REGNUM))
23739     {
23740       /* Catch popping the PC.  */
23741       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23742           || IS_CMSE_ENTRY (arm_current_func_type ()))
23743         {
23744           /* The PC is never poped directly, instead
23745              it is popped into r3 and then BX is used.  */
23746           fprintf (f, "}\n");
23747
23748           thumb_exit (f, -1);
23749
23750           return;
23751         }
23752       else
23753         {
23754           if (mask & 0xFF)
23755             fprintf (f, ", ");
23756
23757           asm_fprintf (f, "%r", PC_REGNUM);
23758         }
23759     }
23760
23761   fprintf (f, "}\n");
23762 }
23763
23764 /* Generate code to return from a thumb function.
23765    If 'reg_containing_return_addr' is -1, then the return address is
23766    actually on the stack, at the stack pointer.  */
23767 static void
23768 thumb_exit (FILE *f, int reg_containing_return_addr)
23769 {
23770   unsigned regs_available_for_popping;
23771   unsigned regs_to_pop;
23772   int pops_needed;
23773   unsigned available;
23774   unsigned required;
23775   machine_mode mode;
23776   int size;
23777   int restore_a4 = FALSE;
23778
23779   /* Compute the registers we need to pop.  */
23780   regs_to_pop = 0;
23781   pops_needed = 0;
23782
23783   if (reg_containing_return_addr == -1)
23784     {
23785       regs_to_pop |= 1 << LR_REGNUM;
23786       ++pops_needed;
23787     }
23788
23789   if (TARGET_BACKTRACE)
23790     {
23791       /* Restore the (ARM) frame pointer and stack pointer.  */
23792       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23793       pops_needed += 2;
23794     }
23795
23796   /* If there is nothing to pop then just emit the BX instruction and
23797      return.  */
23798   if (pops_needed == 0)
23799     {
23800       if (crtl->calls_eh_return)
23801         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23802
23803       if (IS_CMSE_ENTRY (arm_current_func_type ()))
23804         {
23805           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23806                        reg_containing_return_addr);
23807           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23808         }
23809       else
23810         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23811       return;
23812     }
23813   /* Otherwise if we are not supporting interworking and we have not created
23814      a backtrace structure and the function was not entered in ARM mode then
23815      just pop the return address straight into the PC.  */
23816   else if (!TARGET_INTERWORK
23817            && !TARGET_BACKTRACE
23818            && !is_called_in_ARM_mode (current_function_decl)
23819            && !crtl->calls_eh_return
23820            && !IS_CMSE_ENTRY (arm_current_func_type ()))
23821     {
23822       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23823       return;
23824     }
23825
23826   /* Find out how many of the (return) argument registers we can corrupt.  */
23827   regs_available_for_popping = 0;
23828
23829   /* If returning via __builtin_eh_return, the bottom three registers
23830      all contain information needed for the return.  */
23831   if (crtl->calls_eh_return)
23832     size = 12;
23833   else
23834     {
23835       /* If we can deduce the registers used from the function's
23836          return value.  This is more reliable that examining
23837          df_regs_ever_live_p () because that will be set if the register is
23838          ever used in the function, not just if the register is used
23839          to hold a return value.  */
23840
23841       if (crtl->return_rtx != 0)
23842         mode = GET_MODE (crtl->return_rtx);
23843       else
23844         mode = DECL_MODE (DECL_RESULT (current_function_decl));
23845
23846       size = GET_MODE_SIZE (mode);
23847
23848       if (size == 0)
23849         {
23850           /* In a void function we can use any argument register.
23851              In a function that returns a structure on the stack
23852              we can use the second and third argument registers.  */
23853           if (mode == VOIDmode)
23854             regs_available_for_popping =
23855               (1 << ARG_REGISTER (1))
23856               | (1 << ARG_REGISTER (2))
23857               | (1 << ARG_REGISTER (3));
23858           else
23859             regs_available_for_popping =
23860               (1 << ARG_REGISTER (2))
23861               | (1 << ARG_REGISTER (3));
23862         }
23863       else if (size <= 4)
23864         regs_available_for_popping =
23865           (1 << ARG_REGISTER (2))
23866           | (1 << ARG_REGISTER (3));
23867       else if (size <= 8)
23868         regs_available_for_popping =
23869           (1 << ARG_REGISTER (3));
23870     }
23871
23872   /* Match registers to be popped with registers into which we pop them.  */
23873   for (available = regs_available_for_popping,
23874        required  = regs_to_pop;
23875        required != 0 && available != 0;
23876        available &= ~(available & - available),
23877        required  &= ~(required  & - required))
23878     -- pops_needed;
23879
23880   /* If we have any popping registers left over, remove them.  */
23881   if (available > 0)
23882     regs_available_for_popping &= ~available;
23883
23884   /* Otherwise if we need another popping register we can use
23885      the fourth argument register.  */
23886   else if (pops_needed)
23887     {
23888       /* If we have not found any free argument registers and
23889          reg a4 contains the return address, we must move it.  */
23890       if (regs_available_for_popping == 0
23891           && reg_containing_return_addr == LAST_ARG_REGNUM)
23892         {
23893           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23894           reg_containing_return_addr = LR_REGNUM;
23895         }
23896       else if (size > 12)
23897         {
23898           /* Register a4 is being used to hold part of the return value,
23899              but we have dire need of a free, low register.  */
23900           restore_a4 = TRUE;
23901
23902           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23903         }
23904
23905       if (reg_containing_return_addr != LAST_ARG_REGNUM)
23906         {
23907           /* The fourth argument register is available.  */
23908           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23909
23910           --pops_needed;
23911         }
23912     }
23913
23914   /* Pop as many registers as we can.  */
23915   thumb_pop (f, regs_available_for_popping);
23916
23917   /* Process the registers we popped.  */
23918   if (reg_containing_return_addr == -1)
23919     {
23920       /* The return address was popped into the lowest numbered register.  */
23921       regs_to_pop &= ~(1 << LR_REGNUM);
23922
23923       reg_containing_return_addr =
23924         number_of_first_bit_set (regs_available_for_popping);
23925
23926       /* Remove this register for the mask of available registers, so that
23927          the return address will not be corrupted by further pops.  */
23928       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23929     }
23930
23931   /* If we popped other registers then handle them here.  */
23932   if (regs_available_for_popping)
23933     {
23934       int frame_pointer;
23935
23936       /* Work out which register currently contains the frame pointer.  */
23937       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23938
23939       /* Move it into the correct place.  */
23940       asm_fprintf (f, "\tmov\t%r, %r\n",
23941                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23942
23943       /* (Temporarily) remove it from the mask of popped registers.  */
23944       regs_available_for_popping &= ~(1 << frame_pointer);
23945       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23946
23947       if (regs_available_for_popping)
23948         {
23949           int stack_pointer;
23950
23951           /* We popped the stack pointer as well,
23952              find the register that contains it.  */
23953           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23954
23955           /* Move it into the stack register.  */
23956           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23957
23958           /* At this point we have popped all necessary registers, so
23959              do not worry about restoring regs_available_for_popping
23960              to its correct value:
23961
23962              assert (pops_needed == 0)
23963              assert (regs_available_for_popping == (1 << frame_pointer))
23964              assert (regs_to_pop == (1 << STACK_POINTER))  */
23965         }
23966       else
23967         {
23968           /* Since we have just move the popped value into the frame
23969              pointer, the popping register is available for reuse, and
23970              we know that we still have the stack pointer left to pop.  */
23971           regs_available_for_popping |= (1 << frame_pointer);
23972         }
23973     }
23974
23975   /* If we still have registers left on the stack, but we no longer have
23976      any registers into which we can pop them, then we must move the return
23977      address into the link register and make available the register that
23978      contained it.  */
23979   if (regs_available_for_popping == 0 && pops_needed > 0)
23980     {
23981       regs_available_for_popping |= 1 << reg_containing_return_addr;
23982
23983       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23984                    reg_containing_return_addr);
23985
23986       reg_containing_return_addr = LR_REGNUM;
23987     }
23988
23989   /* If we have registers left on the stack then pop some more.
23990      We know that at most we will want to pop FP and SP.  */
23991   if (pops_needed > 0)
23992     {
23993       int  popped_into;
23994       int  move_to;
23995
23996       thumb_pop (f, regs_available_for_popping);
23997
23998       /* We have popped either FP or SP.
23999          Move whichever one it is into the correct register.  */
24000       popped_into = number_of_first_bit_set (regs_available_for_popping);
24001       move_to     = number_of_first_bit_set (regs_to_pop);
24002
24003       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24004
24005       regs_to_pop &= ~(1 << move_to);
24006
24007       --pops_needed;
24008     }
24009
24010   /* If we still have not popped everything then we must have only
24011      had one register available to us and we are now popping the SP.  */
24012   if (pops_needed > 0)
24013     {
24014       int  popped_into;
24015
24016       thumb_pop (f, regs_available_for_popping);
24017
24018       popped_into = number_of_first_bit_set (regs_available_for_popping);
24019
24020       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24021       /*
24022         assert (regs_to_pop == (1 << STACK_POINTER))
24023         assert (pops_needed == 1)
24024       */
24025     }
24026
24027   /* If necessary restore the a4 register.  */
24028   if (restore_a4)
24029     {
24030       if (reg_containing_return_addr != LR_REGNUM)
24031         {
24032           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24033           reg_containing_return_addr = LR_REGNUM;
24034         }
24035
24036       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24037     }
24038
24039   if (crtl->calls_eh_return)
24040     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24041
24042   /* Return to caller.  */
24043   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24044     {
24045       /* This is for the cases where LR is not being used to contain the return
24046          address.  It may therefore contain information that we might not want
24047          to leak, hence it must be cleared.  The value in R0 will never be a
24048          secret at this point, so it is safe to use it, see the clearing code
24049          in 'cmse_nonsecure_entry_clear_before_return'.  */
24050       if (reg_containing_return_addr != LR_REGNUM)
24051         asm_fprintf (f, "\tmov\tlr, r0\n");
24052
24053       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24054       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24055     }
24056   else
24057     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24058 }
24059 \f
24060 /* Scan INSN just before assembler is output for it.
24061    For Thumb-1, we track the status of the condition codes; this
24062    information is used in the cbranchsi4_insn pattern.  */
24063 void
24064 thumb1_final_prescan_insn (rtx_insn *insn)
24065 {
24066   if (flag_print_asm_name)
24067     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24068                  INSN_ADDRESSES (INSN_UID (insn)));
24069   /* Don't overwrite the previous setter when we get to a cbranch.  */
24070   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24071     {
24072       enum attr_conds conds;
24073
24074       if (cfun->machine->thumb1_cc_insn)
24075         {
24076           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24077               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24078             CC_STATUS_INIT;
24079         }
24080       conds = get_attr_conds (insn);
24081       if (conds == CONDS_SET)
24082         {
24083           rtx set = single_set (insn);
24084           cfun->machine->thumb1_cc_insn = insn;
24085           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24086           cfun->machine->thumb1_cc_op1 = const0_rtx;
24087           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24088           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24089             {
24090               rtx src1 = XEXP (SET_SRC (set), 1);
24091               if (src1 == const0_rtx)
24092                 cfun->machine->thumb1_cc_mode = CCmode;
24093             }
24094           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24095             {
24096               /* Record the src register operand instead of dest because
24097                  cprop_hardreg pass propagates src.  */
24098               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24099             }
24100         }
24101       else if (conds != CONDS_NOCOND)
24102         cfun->machine->thumb1_cc_insn = NULL_RTX;
24103     }
24104
24105     /* Check if unexpected far jump is used.  */
24106     if (cfun->machine->lr_save_eliminated
24107         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24108       internal_error("Unexpected thumb1 far jump");
24109 }
24110
24111 int
24112 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24113 {
24114   unsigned HOST_WIDE_INT mask = 0xff;
24115   int i;
24116
24117   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24118   if (val == 0) /* XXX */
24119     return 0;
24120
24121   for (i = 0; i < 25; i++)
24122     if ((val & (mask << i)) == val)
24123       return 1;
24124
24125   return 0;
24126 }
24127
24128 /* Returns nonzero if the current function contains,
24129    or might contain a far jump.  */
24130 static int
24131 thumb_far_jump_used_p (void)
24132 {
24133   rtx_insn *insn;
24134   bool far_jump = false;
24135   unsigned int func_size = 0;
24136
24137   /* If we have already decided that far jumps may be used,
24138      do not bother checking again, and always return true even if
24139      it turns out that they are not being used.  Once we have made
24140      the decision that far jumps are present (and that hence the link
24141      register will be pushed onto the stack) we cannot go back on it.  */
24142   if (cfun->machine->far_jump_used)
24143     return 1;
24144
24145   /* If this function is not being called from the prologue/epilogue
24146      generation code then it must be being called from the
24147      INITIAL_ELIMINATION_OFFSET macro.  */
24148   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24149     {
24150       /* In this case we know that we are being asked about the elimination
24151          of the arg pointer register.  If that register is not being used,
24152          then there are no arguments on the stack, and we do not have to
24153          worry that a far jump might force the prologue to push the link
24154          register, changing the stack offsets.  In this case we can just
24155          return false, since the presence of far jumps in the function will
24156          not affect stack offsets.
24157
24158          If the arg pointer is live (or if it was live, but has now been
24159          eliminated and so set to dead) then we do have to test to see if
24160          the function might contain a far jump.  This test can lead to some
24161          false negatives, since before reload is completed, then length of
24162          branch instructions is not known, so gcc defaults to returning their
24163          longest length, which in turn sets the far jump attribute to true.
24164
24165          A false negative will not result in bad code being generated, but it
24166          will result in a needless push and pop of the link register.  We
24167          hope that this does not occur too often.
24168
24169          If we need doubleword stack alignment this could affect the other
24170          elimination offsets so we can't risk getting it wrong.  */
24171       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24172         cfun->machine->arg_pointer_live = 1;
24173       else if (!cfun->machine->arg_pointer_live)
24174         return 0;
24175     }
24176
24177   /* We should not change far_jump_used during or after reload, as there is
24178      no chance to change stack frame layout.  */
24179   if (reload_in_progress || reload_completed)
24180     return 0;
24181
24182   /* Check to see if the function contains a branch
24183      insn with the far jump attribute set.  */
24184   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24185     {
24186       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24187         {
24188           far_jump = true;
24189         }
24190       func_size += get_attr_length (insn);
24191     }
24192
24193   /* Attribute far_jump will always be true for thumb1 before
24194      shorten_branch pass.  So checking far_jump attribute before
24195      shorten_branch isn't much useful.
24196
24197      Following heuristic tries to estimate more accurately if a far jump
24198      may finally be used.  The heuristic is very conservative as there is
24199      no chance to roll-back the decision of not to use far jump.
24200
24201      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24202      2-byte insn is associated with a 4 byte constant pool.  Using
24203      function size 2048/3 as the threshold is conservative enough.  */
24204   if (far_jump)
24205     {
24206       if ((func_size * 3) >= 2048)
24207         {
24208           /* Record the fact that we have decided that
24209              the function does use far jumps.  */
24210           cfun->machine->far_jump_used = 1;
24211           return 1;
24212         }
24213     }
24214
24215   return 0;
24216 }
24217
24218 /* Return nonzero if FUNC must be entered in ARM mode.  */
24219 static bool
24220 is_called_in_ARM_mode (tree func)
24221 {
24222   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24223
24224   /* Ignore the problem about functions whose address is taken.  */
24225   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24226     return true;
24227
24228 #ifdef ARM_PE
24229   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24230 #else
24231   return false;
24232 #endif
24233 }
24234
24235 /* Given the stack offsets and register mask in OFFSETS, decide how
24236    many additional registers to push instead of subtracting a constant
24237    from SP.  For epilogues the principle is the same except we use pop.
24238    FOR_PROLOGUE indicates which we're generating.  */
24239 static int
24240 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24241 {
24242   HOST_WIDE_INT amount;
24243   unsigned long live_regs_mask = offsets->saved_regs_mask;
24244   /* Extract a mask of the ones we can give to the Thumb's push/pop
24245      instruction.  */
24246   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24247   /* Then count how many other high registers will need to be pushed.  */
24248   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24249   int n_free, reg_base, size;
24250
24251   if (!for_prologue && frame_pointer_needed)
24252     amount = offsets->locals_base - offsets->saved_regs;
24253   else
24254     amount = offsets->outgoing_args - offsets->saved_regs;
24255
24256   /* If the stack frame size is 512 exactly, we can save one load
24257      instruction, which should make this a win even when optimizing
24258      for speed.  */
24259   if (!optimize_size && amount != 512)
24260     return 0;
24261
24262   /* Can't do this if there are high registers to push.  */
24263   if (high_regs_pushed != 0)
24264     return 0;
24265
24266   /* Shouldn't do it in the prologue if no registers would normally
24267      be pushed at all.  In the epilogue, also allow it if we'll have
24268      a pop insn for the PC.  */
24269   if  (l_mask == 0
24270        && (for_prologue
24271            || TARGET_BACKTRACE
24272            || (live_regs_mask & 1 << LR_REGNUM) == 0
24273            || TARGET_INTERWORK
24274            || crtl->args.pretend_args_size != 0))
24275     return 0;
24276
24277   /* Don't do this if thumb_expand_prologue wants to emit instructions
24278      between the push and the stack frame allocation.  */
24279   if (for_prologue
24280       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24281           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24282     return 0;
24283
24284   reg_base = 0;
24285   n_free = 0;
24286   if (!for_prologue)
24287     {
24288       size = arm_size_return_regs ();
24289       reg_base = ARM_NUM_INTS (size);
24290       live_regs_mask >>= reg_base;
24291     }
24292
24293   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24294          && (for_prologue || call_used_regs[reg_base + n_free]))
24295     {
24296       live_regs_mask >>= 1;
24297       n_free++;
24298     }
24299
24300   if (n_free == 0)
24301     return 0;
24302   gcc_assert (amount / 4 * 4 == amount);
24303
24304   if (amount >= 512 && (amount - n_free * 4) < 512)
24305     return (amount - 508) / 4;
24306   if (amount <= n_free * 4)
24307     return amount / 4;
24308   return 0;
24309 }
24310
24311 /* The bits which aren't usefully expanded as rtl.  */
24312 const char *
24313 thumb1_unexpanded_epilogue (void)
24314 {
24315   arm_stack_offsets *offsets;
24316   int regno;
24317   unsigned long live_regs_mask = 0;
24318   int high_regs_pushed = 0;
24319   int extra_pop;
24320   int had_to_push_lr;
24321   int size;
24322
24323   if (cfun->machine->return_used_this_function != 0)
24324     return "";
24325
24326   if (IS_NAKED (arm_current_func_type ()))
24327     return "";
24328
24329   offsets = arm_get_frame_offsets ();
24330   live_regs_mask = offsets->saved_regs_mask;
24331   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24332
24333   /* If we can deduce the registers used from the function's return value.
24334      This is more reliable that examining df_regs_ever_live_p () because that
24335      will be set if the register is ever used in the function, not just if
24336      the register is used to hold a return value.  */
24337   size = arm_size_return_regs ();
24338
24339   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24340   if (extra_pop > 0)
24341     {
24342       unsigned long extra_mask = (1 << extra_pop) - 1;
24343       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24344     }
24345
24346   /* The prolog may have pushed some high registers to use as
24347      work registers.  e.g. the testsuite file:
24348      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24349      compiles to produce:
24350         push    {r4, r5, r6, r7, lr}
24351         mov     r7, r9
24352         mov     r6, r8
24353         push    {r6, r7}
24354      as part of the prolog.  We have to undo that pushing here.  */
24355
24356   if (high_regs_pushed)
24357     {
24358       unsigned long mask = live_regs_mask & 0xff;
24359       int next_hi_reg;
24360
24361       /* The available low registers depend on the size of the value we are
24362          returning.  */
24363       if (size <= 12)
24364         mask |=  1 << 3;
24365       if (size <= 8)
24366         mask |= 1 << 2;
24367
24368       if (mask == 0)
24369         /* Oh dear!  We have no low registers into which we can pop
24370            high registers!  */
24371         internal_error
24372           ("no low registers available for popping high registers");
24373
24374       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24375         if (live_regs_mask & (1 << next_hi_reg))
24376           break;
24377
24378       while (high_regs_pushed)
24379         {
24380           /* Find lo register(s) into which the high register(s) can
24381              be popped.  */
24382           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24383             {
24384               if (mask & (1 << regno))
24385                 high_regs_pushed--;
24386               if (high_regs_pushed == 0)
24387                 break;
24388             }
24389
24390           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24391
24392           /* Pop the values into the low register(s).  */
24393           thumb_pop (asm_out_file, mask);
24394
24395           /* Move the value(s) into the high registers.  */
24396           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24397             {
24398               if (mask & (1 << regno))
24399                 {
24400                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24401                                regno);
24402
24403                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24404                     if (live_regs_mask & (1 << next_hi_reg))
24405                       break;
24406                 }
24407             }
24408         }
24409       live_regs_mask &= ~0x0f00;
24410     }
24411
24412   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24413   live_regs_mask &= 0xff;
24414
24415   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24416     {
24417       /* Pop the return address into the PC.  */
24418       if (had_to_push_lr)
24419         live_regs_mask |= 1 << PC_REGNUM;
24420
24421       /* Either no argument registers were pushed or a backtrace
24422          structure was created which includes an adjusted stack
24423          pointer, so just pop everything.  */
24424       if (live_regs_mask)
24425         thumb_pop (asm_out_file, live_regs_mask);
24426
24427       /* We have either just popped the return address into the
24428          PC or it is was kept in LR for the entire function.
24429          Note that thumb_pop has already called thumb_exit if the
24430          PC was in the list.  */
24431       if (!had_to_push_lr)
24432         thumb_exit (asm_out_file, LR_REGNUM);
24433     }
24434   else
24435     {
24436       /* Pop everything but the return address.  */
24437       if (live_regs_mask)
24438         thumb_pop (asm_out_file, live_regs_mask);
24439
24440       if (had_to_push_lr)
24441         {
24442           if (size > 12)
24443             {
24444               /* We have no free low regs, so save one.  */
24445               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24446                            LAST_ARG_REGNUM);
24447             }
24448
24449           /* Get the return address into a temporary register.  */
24450           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24451
24452           if (size > 12)
24453             {
24454               /* Move the return address to lr.  */
24455               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24456                            LAST_ARG_REGNUM);
24457               /* Restore the low register.  */
24458               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24459                            IP_REGNUM);
24460               regno = LR_REGNUM;
24461             }
24462           else
24463             regno = LAST_ARG_REGNUM;
24464         }
24465       else
24466         regno = LR_REGNUM;
24467
24468       /* Remove the argument registers that were pushed onto the stack.  */
24469       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24470                    SP_REGNUM, SP_REGNUM,
24471                    crtl->args.pretend_args_size);
24472
24473       thumb_exit (asm_out_file, regno);
24474     }
24475
24476   return "";
24477 }
24478
24479 /* Functions to save and restore machine-specific function data.  */
24480 static struct machine_function *
24481 arm_init_machine_status (void)
24482 {
24483   struct machine_function *machine;
24484   machine = ggc_cleared_alloc<machine_function> ();
24485
24486 #if ARM_FT_UNKNOWN != 0
24487   machine->func_type = ARM_FT_UNKNOWN;
24488 #endif
24489   return machine;
24490 }
24491
24492 /* Return an RTX indicating where the return address to the
24493    calling function can be found.  */
24494 rtx
24495 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24496 {
24497   if (count != 0)
24498     return NULL_RTX;
24499
24500   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24501 }
24502
24503 /* Do anything needed before RTL is emitted for each function.  */
24504 void
24505 arm_init_expanders (void)
24506 {
24507   /* Arrange to initialize and mark the machine per-function status.  */
24508   init_machine_status = arm_init_machine_status;
24509
24510   /* This is to stop the combine pass optimizing away the alignment
24511      adjustment of va_arg.  */
24512   /* ??? It is claimed that this should not be necessary.  */
24513   if (cfun)
24514     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24515 }
24516
24517 /* Check that FUNC is called with a different mode.  */
24518
24519 bool
24520 arm_change_mode_p (tree func)
24521 {
24522   if (TREE_CODE (func) != FUNCTION_DECL)
24523     return false;
24524
24525   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24526
24527   if (!callee_tree)
24528     callee_tree = target_option_default_node;
24529
24530   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24531   int flags = callee_opts->x_target_flags;
24532
24533   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24534 }
24535
24536 /* Like arm_compute_initial_elimination offset.  Simpler because there
24537    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24538    to point at the base of the local variables after static stack
24539    space for a function has been allocated.  */
24540
24541 HOST_WIDE_INT
24542 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24543 {
24544   arm_stack_offsets *offsets;
24545
24546   offsets = arm_get_frame_offsets ();
24547
24548   switch (from)
24549     {
24550     case ARG_POINTER_REGNUM:
24551       switch (to)
24552         {
24553         case STACK_POINTER_REGNUM:
24554           return offsets->outgoing_args - offsets->saved_args;
24555
24556         case FRAME_POINTER_REGNUM:
24557           return offsets->soft_frame - offsets->saved_args;
24558
24559         case ARM_HARD_FRAME_POINTER_REGNUM:
24560           return offsets->saved_regs - offsets->saved_args;
24561
24562         case THUMB_HARD_FRAME_POINTER_REGNUM:
24563           return offsets->locals_base - offsets->saved_args;
24564
24565         default:
24566           gcc_unreachable ();
24567         }
24568       break;
24569
24570     case FRAME_POINTER_REGNUM:
24571       switch (to)
24572         {
24573         case STACK_POINTER_REGNUM:
24574           return offsets->outgoing_args - offsets->soft_frame;
24575
24576         case ARM_HARD_FRAME_POINTER_REGNUM:
24577           return offsets->saved_regs - offsets->soft_frame;
24578
24579         case THUMB_HARD_FRAME_POINTER_REGNUM:
24580           return offsets->locals_base - offsets->soft_frame;
24581
24582         default:
24583           gcc_unreachable ();
24584         }
24585       break;
24586
24587     default:
24588       gcc_unreachable ();
24589     }
24590 }
24591
24592 /* Generate the function's prologue.  */
24593
24594 void
24595 thumb1_expand_prologue (void)
24596 {
24597   rtx_insn *insn;
24598
24599   HOST_WIDE_INT amount;
24600   HOST_WIDE_INT size;
24601   arm_stack_offsets *offsets;
24602   unsigned long func_type;
24603   int regno;
24604   unsigned long live_regs_mask;
24605   unsigned long l_mask;
24606   unsigned high_regs_pushed = 0;
24607   bool lr_needs_saving;
24608
24609   func_type = arm_current_func_type ();
24610
24611   /* Naked functions don't have prologues.  */
24612   if (IS_NAKED (func_type))
24613     {
24614       if (flag_stack_usage_info)
24615         current_function_static_stack_size = 0;
24616       return;
24617     }
24618
24619   if (IS_INTERRUPT (func_type))
24620     {
24621       error ("interrupt Service Routines cannot be coded in Thumb mode");
24622       return;
24623     }
24624
24625   if (is_called_in_ARM_mode (current_function_decl))
24626     emit_insn (gen_prologue_thumb1_interwork ());
24627
24628   offsets = arm_get_frame_offsets ();
24629   live_regs_mask = offsets->saved_regs_mask;
24630   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24631
24632   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24633   l_mask = live_regs_mask & 0x40ff;
24634   /* Then count how many other high registers will need to be pushed.  */
24635   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24636
24637   if (crtl->args.pretend_args_size)
24638     {
24639       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24640
24641       if (cfun->machine->uses_anonymous_args)
24642         {
24643           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24644           unsigned long mask;
24645
24646           mask = 1ul << (LAST_ARG_REGNUM + 1);
24647           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24648
24649           insn = thumb1_emit_multi_reg_push (mask, 0);
24650         }
24651       else
24652         {
24653           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24654                                         stack_pointer_rtx, x));
24655         }
24656       RTX_FRAME_RELATED_P (insn) = 1;
24657     }
24658
24659   if (TARGET_BACKTRACE)
24660     {
24661       HOST_WIDE_INT offset = 0;
24662       unsigned work_register;
24663       rtx work_reg, x, arm_hfp_rtx;
24664
24665       /* We have been asked to create a stack backtrace structure.
24666          The code looks like this:
24667
24668          0   .align 2
24669          0   func:
24670          0     sub   SP, #16         Reserve space for 4 registers.
24671          2     push  {R7}            Push low registers.
24672          4     add   R7, SP, #20     Get the stack pointer before the push.
24673          6     str   R7, [SP, #8]    Store the stack pointer
24674                                         (before reserving the space).
24675          8     mov   R7, PC          Get hold of the start of this code + 12.
24676         10     str   R7, [SP, #16]   Store it.
24677         12     mov   R7, FP          Get hold of the current frame pointer.
24678         14     str   R7, [SP, #4]    Store it.
24679         16     mov   R7, LR          Get hold of the current return address.
24680         18     str   R7, [SP, #12]   Store it.
24681         20     add   R7, SP, #16     Point at the start of the
24682                                         backtrace structure.
24683         22     mov   FP, R7          Put this value into the frame pointer.  */
24684
24685       work_register = thumb_find_work_register (live_regs_mask);
24686       work_reg = gen_rtx_REG (SImode, work_register);
24687       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24688
24689       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24690                                     stack_pointer_rtx, GEN_INT (-16)));
24691       RTX_FRAME_RELATED_P (insn) = 1;
24692
24693       if (l_mask)
24694         {
24695           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24696           RTX_FRAME_RELATED_P (insn) = 1;
24697           lr_needs_saving = false;
24698
24699           offset = bit_count (l_mask) * UNITS_PER_WORD;
24700         }
24701
24702       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24703       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24704
24705       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24706       x = gen_frame_mem (SImode, x);
24707       emit_move_insn (x, work_reg);
24708
24709       /* Make sure that the instruction fetching the PC is in the right place
24710          to calculate "start of backtrace creation code + 12".  */
24711       /* ??? The stores using the common WORK_REG ought to be enough to
24712          prevent the scheduler from doing anything weird.  Failing that
24713          we could always move all of the following into an UNSPEC_VOLATILE.  */
24714       if (l_mask)
24715         {
24716           x = gen_rtx_REG (SImode, PC_REGNUM);
24717           emit_move_insn (work_reg, x);
24718
24719           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24720           x = gen_frame_mem (SImode, x);
24721           emit_move_insn (x, work_reg);
24722
24723           emit_move_insn (work_reg, arm_hfp_rtx);
24724
24725           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24726           x = gen_frame_mem (SImode, x);
24727           emit_move_insn (x, work_reg);
24728         }
24729       else
24730         {
24731           emit_move_insn (work_reg, arm_hfp_rtx);
24732
24733           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24734           x = gen_frame_mem (SImode, x);
24735           emit_move_insn (x, work_reg);
24736
24737           x = gen_rtx_REG (SImode, PC_REGNUM);
24738           emit_move_insn (work_reg, x);
24739
24740           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24741           x = gen_frame_mem (SImode, x);
24742           emit_move_insn (x, work_reg);
24743         }
24744
24745       x = gen_rtx_REG (SImode, LR_REGNUM);
24746       emit_move_insn (work_reg, x);
24747
24748       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24749       x = gen_frame_mem (SImode, x);
24750       emit_move_insn (x, work_reg);
24751
24752       x = GEN_INT (offset + 12);
24753       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24754
24755       emit_move_insn (arm_hfp_rtx, work_reg);
24756     }
24757   /* Optimization:  If we are not pushing any low registers but we are going
24758      to push some high registers then delay our first push.  This will just
24759      be a push of LR and we can combine it with the push of the first high
24760      register.  */
24761   else if ((l_mask & 0xff) != 0
24762            || (high_regs_pushed == 0 && lr_needs_saving))
24763     {
24764       unsigned long mask = l_mask;
24765       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24766       insn = thumb1_emit_multi_reg_push (mask, mask);
24767       RTX_FRAME_RELATED_P (insn) = 1;
24768       lr_needs_saving = false;
24769     }
24770
24771   if (high_regs_pushed)
24772     {
24773       unsigned pushable_regs;
24774       unsigned next_hi_reg;
24775       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24776                                                  : crtl->args.info.nregs;
24777       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24778
24779       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24780         if (live_regs_mask & (1 << next_hi_reg))
24781           break;
24782
24783       /* Here we need to mask out registers used for passing arguments
24784          even if they can be pushed.  This is to avoid using them to stash the high
24785          registers.  Such kind of stash may clobber the use of arguments.  */
24786       pushable_regs = l_mask & (~arg_regs_mask);
24787       if (lr_needs_saving)
24788         pushable_regs &= ~(1 << LR_REGNUM);
24789
24790       if (pushable_regs == 0)
24791         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24792
24793       while (high_regs_pushed > 0)
24794         {
24795           unsigned long real_regs_mask = 0;
24796           unsigned long push_mask = 0;
24797
24798           for (regno = LR_REGNUM; regno >= 0; regno --)
24799             {
24800               if (pushable_regs & (1 << regno))
24801                 {
24802                   emit_move_insn (gen_rtx_REG (SImode, regno),
24803                                   gen_rtx_REG (SImode, next_hi_reg));
24804
24805                   high_regs_pushed --;
24806                   real_regs_mask |= (1 << next_hi_reg);
24807                   push_mask |= (1 << regno);
24808
24809                   if (high_regs_pushed)
24810                     {
24811                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24812                            next_hi_reg --)
24813                         if (live_regs_mask & (1 << next_hi_reg))
24814                           break;
24815                     }
24816                   else
24817                     break;
24818                 }
24819             }
24820
24821           /* If we had to find a work register and we have not yet
24822              saved the LR then add it to the list of regs to push.  */
24823           if (lr_needs_saving)
24824             {
24825               push_mask |= 1 << LR_REGNUM;
24826               real_regs_mask |= 1 << LR_REGNUM;
24827               lr_needs_saving = false;
24828             }
24829
24830           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24831           RTX_FRAME_RELATED_P (insn) = 1;
24832         }
24833     }
24834
24835   /* Load the pic register before setting the frame pointer,
24836      so we can use r7 as a temporary work register.  */
24837   if (flag_pic && arm_pic_register != INVALID_REGNUM)
24838     arm_load_pic_register (live_regs_mask);
24839
24840   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24841     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24842                     stack_pointer_rtx);
24843
24844   size = offsets->outgoing_args - offsets->saved_args;
24845   if (flag_stack_usage_info)
24846     current_function_static_stack_size = size;
24847
24848   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
24849   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24850     sorry ("-fstack-check=specific for Thumb-1");
24851
24852   amount = offsets->outgoing_args - offsets->saved_regs;
24853   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24854   if (amount)
24855     {
24856       if (amount < 512)
24857         {
24858           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24859                                         GEN_INT (- amount)));
24860           RTX_FRAME_RELATED_P (insn) = 1;
24861         }
24862       else
24863         {
24864           rtx reg, dwarf;
24865
24866           /* The stack decrement is too big for an immediate value in a single
24867              insn.  In theory we could issue multiple subtracts, but after
24868              three of them it becomes more space efficient to place the full
24869              value in the constant pool and load into a register.  (Also the
24870              ARM debugger really likes to see only one stack decrement per
24871              function).  So instead we look for a scratch register into which
24872              we can load the decrement, and then we subtract this from the
24873              stack pointer.  Unfortunately on the thumb the only available
24874              scratch registers are the argument registers, and we cannot use
24875              these as they may hold arguments to the function.  Instead we
24876              attempt to locate a call preserved register which is used by this
24877              function.  If we can find one, then we know that it will have
24878              been pushed at the start of the prologue and so we can corrupt
24879              it now.  */
24880           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24881             if (live_regs_mask & (1 << regno))
24882               break;
24883
24884           gcc_assert(regno <= LAST_LO_REGNUM);
24885
24886           reg = gen_rtx_REG (SImode, regno);
24887
24888           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24889
24890           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24891                                         stack_pointer_rtx, reg));
24892
24893           dwarf = gen_rtx_SET (stack_pointer_rtx,
24894                                plus_constant (Pmode, stack_pointer_rtx,
24895                                               -amount));
24896           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24897           RTX_FRAME_RELATED_P (insn) = 1;
24898         }
24899     }
24900
24901   if (frame_pointer_needed)
24902     thumb_set_frame_pointer (offsets);
24903
24904   /* If we are profiling, make sure no instructions are scheduled before
24905      the call to mcount.  Similarly if the user has requested no
24906      scheduling in the prolog.  Similarly if we want non-call exceptions
24907      using the EABI unwinder, to prevent faulting instructions from being
24908      swapped with a stack adjustment.  */
24909   if (crtl->profile || !TARGET_SCHED_PROLOG
24910       || (arm_except_unwind_info (&global_options) == UI_TARGET
24911           && cfun->can_throw_non_call_exceptions))
24912     emit_insn (gen_blockage ());
24913
24914   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24915   if (live_regs_mask & 0xff)
24916     cfun->machine->lr_save_eliminated = 0;
24917 }
24918
24919 /* Clear caller saved registers not used to pass return values and leaked
24920    condition flags before exiting a cmse_nonsecure_entry function.  */
24921
24922 void
24923 cmse_nonsecure_entry_clear_before_return (void)
24924 {
24925   uint64_t to_clear_mask[2];
24926   uint32_t padding_bits_to_clear = 0;
24927   uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
24928   int regno, maxregno = IP_REGNUM;
24929   tree result_type;
24930   rtx result_rtl;
24931
24932   to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
24933   to_clear_mask[0] |= (1ULL << IP_REGNUM);
24934
24935   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
24936      registers.  We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
24937      to make sure the instructions used to clear them are present.  */
24938   if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
24939     {
24940       uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
24941       maxregno = LAST_VFP_REGNUM;
24942
24943       float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
24944       to_clear_mask[0] |= float_mask;
24945
24946       float_mask = (1ULL << (maxregno - 63)) - 1;
24947       to_clear_mask[1] = float_mask;
24948
24949       /* Make sure we don't clear the two scratch registers used to clear the
24950          relevant FPSCR bits in output_return_instruction.  */
24951       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
24952       to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
24953       emit_use (gen_rtx_REG (SImode, 4));
24954       to_clear_mask[0] &= ~(1ULL << 4);
24955     }
24956
24957   /* If the user has defined registers to be caller saved, these are no longer
24958      restored by the function before returning and must thus be cleared for
24959      security purposes.  */
24960   for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
24961     {
24962       /* We do not touch registers that can be used to pass arguments as per
24963          the AAPCS, since these should never be made callee-saved by user
24964          options.  */
24965       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
24966         continue;
24967       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
24968         continue;
24969       if (call_used_regs[regno])
24970         to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
24971     }
24972
24973   /* Make sure we do not clear the registers used to return the result in.  */
24974   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
24975   if (!VOID_TYPE_P (result_type))
24976     {
24977       result_rtl = arm_function_value (result_type, current_function_decl, 0);
24978
24979       /* No need to check that we return in registers, because we don't
24980          support returning on stack yet.  */
24981       to_clear_mask[0]
24982         &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
24983                                        padding_bits_to_clear_ptr);
24984     }
24985
24986   if (padding_bits_to_clear != 0)
24987     {
24988       rtx reg_rtx;
24989       /* Padding bits to clear is not 0 so we know we are dealing with
24990          returning a composite type, which only uses r0.  Let's make sure that
24991          r1-r3 is cleared too, we will use r1 as a scratch register.  */
24992       gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
24993
24994       reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
24995
24996       /* Fill the lower half of the negated padding_bits_to_clear.  */
24997       emit_move_insn (reg_rtx,
24998                       GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
24999
25000       /* Also fill the top half of the negated padding_bits_to_clear.  */
25001       if (((~padding_bits_to_clear) >> 16) > 0)
25002         emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25003                                                       GEN_INT (16),
25004                                                       GEN_INT (16)),
25005                                 GEN_INT ((~padding_bits_to_clear) >> 16)));
25006
25007       emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25008                            gen_rtx_REG (SImode, R0_REGNUM),
25009                            reg_rtx));
25010     }
25011
25012   for (regno = R0_REGNUM; regno <= maxregno; regno++)
25013     {
25014       if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25015         continue;
25016
25017       if (IS_VFP_REGNUM (regno))
25018         {
25019           /* If regno is an even vfp register and its successor is also to
25020              be cleared, use vmov.  */
25021           if (TARGET_VFP_DOUBLE
25022               && VFP_REGNO_OK_FOR_DOUBLE (regno)
25023               && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25024             {
25025               emit_move_insn (gen_rtx_REG (DFmode, regno),
25026                               CONST1_RTX (DFmode));
25027               emit_use (gen_rtx_REG (DFmode, regno));
25028               regno++;
25029             }
25030           else
25031             {
25032               emit_move_insn (gen_rtx_REG (SFmode, regno),
25033                               CONST1_RTX (SFmode));
25034               emit_use (gen_rtx_REG (SFmode, regno));
25035             }
25036         }
25037       else
25038         {
25039           if (TARGET_THUMB1)
25040             {
25041               if (regno == R0_REGNUM)
25042                 emit_move_insn (gen_rtx_REG (SImode, regno),
25043                                 const0_rtx);
25044               else
25045                 /* R0 has either been cleared before, see code above, or it
25046                    holds a return value, either way it is not secret
25047                    information.  */
25048                 emit_move_insn (gen_rtx_REG (SImode, regno),
25049                                 gen_rtx_REG (SImode, R0_REGNUM));
25050               emit_use (gen_rtx_REG (SImode, regno));
25051             }
25052           else
25053             {
25054               emit_move_insn (gen_rtx_REG (SImode, regno),
25055                               gen_rtx_REG (SImode, LR_REGNUM));
25056               emit_use (gen_rtx_REG (SImode, regno));
25057             }
25058         }
25059     }
25060 }
25061
25062 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25063    POP instruction can be generated.  LR should be replaced by PC.  All
25064    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25065    all we really need to check here is if single register is to be
25066    returned, or multiple register return.  */
25067 void
25068 thumb2_expand_return (bool simple_return)
25069 {
25070   int i, num_regs;
25071   unsigned long saved_regs_mask;
25072   arm_stack_offsets *offsets;
25073
25074   offsets = arm_get_frame_offsets ();
25075   saved_regs_mask = offsets->saved_regs_mask;
25076
25077   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25078     if (saved_regs_mask & (1 << i))
25079       num_regs++;
25080
25081   if (!simple_return && saved_regs_mask)
25082     {
25083       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25084          functions or adapt code to handle according to ACLE.  This path should
25085          not be reachable for cmse_nonsecure_entry functions though we prefer
25086          to assert it for now to ensure that future code changes do not silently
25087          change this behavior.  */
25088       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25089       if (num_regs == 1)
25090         {
25091           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25092           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25093           rtx addr = gen_rtx_MEM (SImode,
25094                                   gen_rtx_POST_INC (SImode,
25095                                                     stack_pointer_rtx));
25096           set_mem_alias_set (addr, get_frame_alias_set ());
25097           XVECEXP (par, 0, 0) = ret_rtx;
25098           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25099           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25100           emit_jump_insn (par);
25101         }
25102       else
25103         {
25104           saved_regs_mask &= ~ (1 << LR_REGNUM);
25105           saved_regs_mask |=   (1 << PC_REGNUM);
25106           arm_emit_multi_reg_pop (saved_regs_mask);
25107         }
25108     }
25109   else
25110     {
25111       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25112         cmse_nonsecure_entry_clear_before_return ();
25113       emit_jump_insn (simple_return_rtx);
25114     }
25115 }
25116
25117 void
25118 thumb1_expand_epilogue (void)
25119 {
25120   HOST_WIDE_INT amount;
25121   arm_stack_offsets *offsets;
25122   int regno;
25123
25124   /* Naked functions don't have prologues.  */
25125   if (IS_NAKED (arm_current_func_type ()))
25126     return;
25127
25128   offsets = arm_get_frame_offsets ();
25129   amount = offsets->outgoing_args - offsets->saved_regs;
25130
25131   if (frame_pointer_needed)
25132     {
25133       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25134       amount = offsets->locals_base - offsets->saved_regs;
25135     }
25136   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25137
25138   gcc_assert (amount >= 0);
25139   if (amount)
25140     {
25141       emit_insn (gen_blockage ());
25142
25143       if (amount < 512)
25144         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25145                                GEN_INT (amount)));
25146       else
25147         {
25148           /* r3 is always free in the epilogue.  */
25149           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25150
25151           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25152           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25153         }
25154     }
25155
25156   /* Emit a USE (stack_pointer_rtx), so that
25157      the stack adjustment will not be deleted.  */
25158   emit_insn (gen_force_register_use (stack_pointer_rtx));
25159
25160   if (crtl->profile || !TARGET_SCHED_PROLOG)
25161     emit_insn (gen_blockage ());
25162
25163   /* Emit a clobber for each insn that will be restored in the epilogue,
25164      so that flow2 will get register lifetimes correct.  */
25165   for (regno = 0; regno < 13; regno++)
25166     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25167       emit_clobber (gen_rtx_REG (SImode, regno));
25168
25169   if (! df_regs_ever_live_p (LR_REGNUM))
25170     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25171
25172   /* Clear all caller-saved regs that are not used to return.  */
25173   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25174     cmse_nonsecure_entry_clear_before_return ();
25175 }
25176
25177 /* Epilogue code for APCS frame.  */
25178 static void
25179 arm_expand_epilogue_apcs_frame (bool really_return)
25180 {
25181   unsigned long func_type;
25182   unsigned long saved_regs_mask;
25183   int num_regs = 0;
25184   int i;
25185   int floats_from_frame = 0;
25186   arm_stack_offsets *offsets;
25187
25188   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25189   func_type = arm_current_func_type ();
25190
25191   /* Get frame offsets for ARM.  */
25192   offsets = arm_get_frame_offsets ();
25193   saved_regs_mask = offsets->saved_regs_mask;
25194
25195   /* Find the offset of the floating-point save area in the frame.  */
25196   floats_from_frame
25197     = (offsets->saved_args
25198        + arm_compute_static_chain_stack_bytes ()
25199        - offsets->frame);
25200
25201   /* Compute how many core registers saved and how far away the floats are.  */
25202   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25203     if (saved_regs_mask & (1 << i))
25204       {
25205         num_regs++;
25206         floats_from_frame += 4;
25207       }
25208
25209   if (TARGET_HARD_FLOAT)
25210     {
25211       int start_reg;
25212       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25213
25214       /* The offset is from IP_REGNUM.  */
25215       int saved_size = arm_get_vfp_saved_size ();
25216       if (saved_size > 0)
25217         {
25218           rtx_insn *insn;
25219           floats_from_frame += saved_size;
25220           insn = emit_insn (gen_addsi3 (ip_rtx,
25221                                         hard_frame_pointer_rtx,
25222                                         GEN_INT (-floats_from_frame)));
25223           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25224                                        ip_rtx, hard_frame_pointer_rtx);
25225         }
25226
25227       /* Generate VFP register multi-pop.  */
25228       start_reg = FIRST_VFP_REGNUM;
25229
25230       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25231         /* Look for a case where a reg does not need restoring.  */
25232         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25233             && (!df_regs_ever_live_p (i + 1)
25234                 || call_used_regs[i + 1]))
25235           {
25236             if (start_reg != i)
25237               arm_emit_vfp_multi_reg_pop (start_reg,
25238                                           (i - start_reg) / 2,
25239                                           gen_rtx_REG (SImode,
25240                                                        IP_REGNUM));
25241             start_reg = i + 2;
25242           }
25243
25244       /* Restore the remaining regs that we have discovered (or possibly
25245          even all of them, if the conditional in the for loop never
25246          fired).  */
25247       if (start_reg != i)
25248         arm_emit_vfp_multi_reg_pop (start_reg,
25249                                     (i - start_reg) / 2,
25250                                     gen_rtx_REG (SImode, IP_REGNUM));
25251     }
25252
25253   if (TARGET_IWMMXT)
25254     {
25255       /* The frame pointer is guaranteed to be non-double-word aligned, as
25256          it is set to double-word-aligned old_stack_pointer - 4.  */
25257       rtx_insn *insn;
25258       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25259
25260       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25261         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25262           {
25263             rtx addr = gen_frame_mem (V2SImode,
25264                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25265                                                 - lrm_count * 4));
25266             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25267             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25268                                                gen_rtx_REG (V2SImode, i),
25269                                                NULL_RTX);
25270             lrm_count += 2;
25271           }
25272     }
25273
25274   /* saved_regs_mask should contain IP which contains old stack pointer
25275      at the time of activation creation.  Since SP and IP are adjacent registers,
25276      we can restore the value directly into SP.  */
25277   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25278   saved_regs_mask &= ~(1 << IP_REGNUM);
25279   saved_regs_mask |= (1 << SP_REGNUM);
25280
25281   /* There are two registers left in saved_regs_mask - LR and PC.  We
25282      only need to restore LR (the return address), but to
25283      save time we can load it directly into PC, unless we need a
25284      special function exit sequence, or we are not really returning.  */
25285   if (really_return
25286       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25287       && !crtl->calls_eh_return)
25288     /* Delete LR from the register mask, so that LR on
25289        the stack is loaded into the PC in the register mask.  */
25290     saved_regs_mask &= ~(1 << LR_REGNUM);
25291   else
25292     saved_regs_mask &= ~(1 << PC_REGNUM);
25293
25294   num_regs = bit_count (saved_regs_mask);
25295   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25296     {
25297       rtx_insn *insn;
25298       emit_insn (gen_blockage ());
25299       /* Unwind the stack to just below the saved registers.  */
25300       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25301                                     hard_frame_pointer_rtx,
25302                                     GEN_INT (- 4 * num_regs)));
25303
25304       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25305                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25306     }
25307
25308   arm_emit_multi_reg_pop (saved_regs_mask);
25309
25310   if (IS_INTERRUPT (func_type))
25311     {
25312       /* Interrupt handlers will have pushed the
25313          IP onto the stack, so restore it now.  */
25314       rtx_insn *insn;
25315       rtx addr = gen_rtx_MEM (SImode,
25316                               gen_rtx_POST_INC (SImode,
25317                               stack_pointer_rtx));
25318       set_mem_alias_set (addr, get_frame_alias_set ());
25319       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25320       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25321                                          gen_rtx_REG (SImode, IP_REGNUM),
25322                                          NULL_RTX);
25323     }
25324
25325   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25326     return;
25327
25328   if (crtl->calls_eh_return)
25329     emit_insn (gen_addsi3 (stack_pointer_rtx,
25330                            stack_pointer_rtx,
25331                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25332
25333   if (IS_STACKALIGN (func_type))
25334     /* Restore the original stack pointer.  Before prologue, the stack was
25335        realigned and the original stack pointer saved in r0.  For details,
25336        see comment in arm_expand_prologue.  */
25337     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25338
25339   emit_jump_insn (simple_return_rtx);
25340 }
25341
25342 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25343    function is not a sibcall.  */
25344 void
25345 arm_expand_epilogue (bool really_return)
25346 {
25347   unsigned long func_type;
25348   unsigned long saved_regs_mask;
25349   int num_regs = 0;
25350   int i;
25351   int amount;
25352   arm_stack_offsets *offsets;
25353
25354   func_type = arm_current_func_type ();
25355
25356   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25357      let output_return_instruction take care of instruction emission if any.  */
25358   if (IS_NAKED (func_type)
25359       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25360     {
25361       if (really_return)
25362         emit_jump_insn (simple_return_rtx);
25363       return;
25364     }
25365
25366   /* If we are throwing an exception, then we really must be doing a
25367      return, so we can't tail-call.  */
25368   gcc_assert (!crtl->calls_eh_return || really_return);
25369
25370   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25371     {
25372       arm_expand_epilogue_apcs_frame (really_return);
25373       return;
25374     }
25375
25376   /* Get frame offsets for ARM.  */
25377   offsets = arm_get_frame_offsets ();
25378   saved_regs_mask = offsets->saved_regs_mask;
25379   num_regs = bit_count (saved_regs_mask);
25380
25381   if (frame_pointer_needed)
25382     {
25383       rtx_insn *insn;
25384       /* Restore stack pointer if necessary.  */
25385       if (TARGET_ARM)
25386         {
25387           /* In ARM mode, frame pointer points to first saved register.
25388              Restore stack pointer to last saved register.  */
25389           amount = offsets->frame - offsets->saved_regs;
25390
25391           /* Force out any pending memory operations that reference stacked data
25392              before stack de-allocation occurs.  */
25393           emit_insn (gen_blockage ());
25394           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25395                             hard_frame_pointer_rtx,
25396                             GEN_INT (amount)));
25397           arm_add_cfa_adjust_cfa_note (insn, amount,
25398                                        stack_pointer_rtx,
25399                                        hard_frame_pointer_rtx);
25400
25401           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25402              deleted.  */
25403           emit_insn (gen_force_register_use (stack_pointer_rtx));
25404         }
25405       else
25406         {
25407           /* In Thumb-2 mode, the frame pointer points to the last saved
25408              register.  */
25409           amount = offsets->locals_base - offsets->saved_regs;
25410           if (amount)
25411             {
25412               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25413                                 hard_frame_pointer_rtx,
25414                                 GEN_INT (amount)));
25415               arm_add_cfa_adjust_cfa_note (insn, amount,
25416                                            hard_frame_pointer_rtx,
25417                                            hard_frame_pointer_rtx);
25418             }
25419
25420           /* Force out any pending memory operations that reference stacked data
25421              before stack de-allocation occurs.  */
25422           emit_insn (gen_blockage ());
25423           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25424                                        hard_frame_pointer_rtx));
25425           arm_add_cfa_adjust_cfa_note (insn, 0,
25426                                        stack_pointer_rtx,
25427                                        hard_frame_pointer_rtx);
25428           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25429              deleted.  */
25430           emit_insn (gen_force_register_use (stack_pointer_rtx));
25431         }
25432     }
25433   else
25434     {
25435       /* Pop off outgoing args and local frame to adjust stack pointer to
25436          last saved register.  */
25437       amount = offsets->outgoing_args - offsets->saved_regs;
25438       if (amount)
25439         {
25440           rtx_insn *tmp;
25441           /* Force out any pending memory operations that reference stacked data
25442              before stack de-allocation occurs.  */
25443           emit_insn (gen_blockage ());
25444           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25445                                        stack_pointer_rtx,
25446                                        GEN_INT (amount)));
25447           arm_add_cfa_adjust_cfa_note (tmp, amount,
25448                                        stack_pointer_rtx, stack_pointer_rtx);
25449           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25450              not deleted.  */
25451           emit_insn (gen_force_register_use (stack_pointer_rtx));
25452         }
25453     }
25454
25455   if (TARGET_HARD_FLOAT)
25456     {
25457       /* Generate VFP register multi-pop.  */
25458       int end_reg = LAST_VFP_REGNUM + 1;
25459
25460       /* Scan the registers in reverse order.  We need to match
25461          any groupings made in the prologue and generate matching
25462          vldm operations.  The need to match groups is because,
25463          unlike pop, vldm can only do consecutive regs.  */
25464       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25465         /* Look for a case where a reg does not need restoring.  */
25466         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25467             && (!df_regs_ever_live_p (i + 1)
25468                 || call_used_regs[i + 1]))
25469           {
25470             /* Restore the regs discovered so far (from reg+2 to
25471                end_reg).  */
25472             if (end_reg > i + 2)
25473               arm_emit_vfp_multi_reg_pop (i + 2,
25474                                           (end_reg - (i + 2)) / 2,
25475                                           stack_pointer_rtx);
25476             end_reg = i;
25477           }
25478
25479       /* Restore the remaining regs that we have discovered (or possibly
25480          even all of them, if the conditional in the for loop never
25481          fired).  */
25482       if (end_reg > i + 2)
25483         arm_emit_vfp_multi_reg_pop (i + 2,
25484                                     (end_reg - (i + 2)) / 2,
25485                                     stack_pointer_rtx);
25486     }
25487
25488   if (TARGET_IWMMXT)
25489     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25490       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25491         {
25492           rtx_insn *insn;
25493           rtx addr = gen_rtx_MEM (V2SImode,
25494                                   gen_rtx_POST_INC (SImode,
25495                                                     stack_pointer_rtx));
25496           set_mem_alias_set (addr, get_frame_alias_set ());
25497           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25498           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25499                                              gen_rtx_REG (V2SImode, i),
25500                                              NULL_RTX);
25501           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25502                                        stack_pointer_rtx, stack_pointer_rtx);
25503         }
25504
25505   if (saved_regs_mask)
25506     {
25507       rtx insn;
25508       bool return_in_pc = false;
25509
25510       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25511           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25512           && !IS_CMSE_ENTRY (func_type)
25513           && !IS_STACKALIGN (func_type)
25514           && really_return
25515           && crtl->args.pretend_args_size == 0
25516           && saved_regs_mask & (1 << LR_REGNUM)
25517           && !crtl->calls_eh_return)
25518         {
25519           saved_regs_mask &= ~(1 << LR_REGNUM);
25520           saved_regs_mask |= (1 << PC_REGNUM);
25521           return_in_pc = true;
25522         }
25523
25524       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25525         {
25526           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25527             if (saved_regs_mask & (1 << i))
25528               {
25529                 rtx addr = gen_rtx_MEM (SImode,
25530                                         gen_rtx_POST_INC (SImode,
25531                                                           stack_pointer_rtx));
25532                 set_mem_alias_set (addr, get_frame_alias_set ());
25533
25534                 if (i == PC_REGNUM)
25535                   {
25536                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25537                     XVECEXP (insn, 0, 0) = ret_rtx;
25538                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25539                                                         addr);
25540                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25541                     insn = emit_jump_insn (insn);
25542                   }
25543                 else
25544                   {
25545                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25546                                                  addr));
25547                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25548                                                        gen_rtx_REG (SImode, i),
25549                                                        NULL_RTX);
25550                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25551                                                  stack_pointer_rtx,
25552                                                  stack_pointer_rtx);
25553                   }
25554               }
25555         }
25556       else
25557         {
25558           if (TARGET_LDRD
25559               && current_tune->prefer_ldrd_strd
25560               && !optimize_function_for_size_p (cfun))
25561             {
25562               if (TARGET_THUMB2)
25563                 thumb2_emit_ldrd_pop (saved_regs_mask);
25564               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25565                 arm_emit_ldrd_pop (saved_regs_mask);
25566               else
25567                 arm_emit_multi_reg_pop (saved_regs_mask);
25568             }
25569           else
25570             arm_emit_multi_reg_pop (saved_regs_mask);
25571         }
25572
25573       if (return_in_pc)
25574         return;
25575     }
25576
25577   amount
25578     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25579   if (amount)
25580     {
25581       int i, j;
25582       rtx dwarf = NULL_RTX;
25583       rtx_insn *tmp =
25584         emit_insn (gen_addsi3 (stack_pointer_rtx,
25585                                stack_pointer_rtx,
25586                                GEN_INT (amount)));
25587
25588       RTX_FRAME_RELATED_P (tmp) = 1;
25589
25590       if (cfun->machine->uses_anonymous_args)
25591         {
25592           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25593              pretend_args in stack.  */
25594           int num_regs = crtl->args.pretend_args_size / 4;
25595           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25596           for (j = 0, i = 0; j < num_regs; i++)
25597             if (saved_regs_mask & (1 << i))
25598               {
25599                 rtx reg = gen_rtx_REG (SImode, i);
25600                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25601                 j++;
25602               }
25603           REG_NOTES (tmp) = dwarf;
25604         }
25605       arm_add_cfa_adjust_cfa_note (tmp, amount,
25606                                    stack_pointer_rtx, stack_pointer_rtx);
25607     }
25608
25609     /* Clear all caller-saved regs that are not used to return.  */
25610     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25611       {
25612         /* CMSE_ENTRY always returns.  */
25613         gcc_assert (really_return);
25614         cmse_nonsecure_entry_clear_before_return ();
25615       }
25616
25617   if (!really_return)
25618     return;
25619
25620   if (crtl->calls_eh_return)
25621     emit_insn (gen_addsi3 (stack_pointer_rtx,
25622                            stack_pointer_rtx,
25623                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25624
25625   if (IS_STACKALIGN (func_type))
25626     /* Restore the original stack pointer.  Before prologue, the stack was
25627        realigned and the original stack pointer saved in r0.  For details,
25628        see comment in arm_expand_prologue.  */
25629     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25630
25631   emit_jump_insn (simple_return_rtx);
25632 }
25633
25634 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25635    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25636
25637 const char *
25638 thumb1_output_interwork (void)
25639 {
25640   const char * name;
25641   FILE *f = asm_out_file;
25642
25643   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25644   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25645               == SYMBOL_REF);
25646   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25647
25648   /* Generate code sequence to switch us into Thumb mode.  */
25649   /* The .code 32 directive has already been emitted by
25650      ASM_DECLARE_FUNCTION_NAME.  */
25651   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25652   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25653
25654   /* Generate a label, so that the debugger will notice the
25655      change in instruction sets.  This label is also used by
25656      the assembler to bypass the ARM code when this function
25657      is called from a Thumb encoded function elsewhere in the
25658      same file.  Hence the definition of STUB_NAME here must
25659      agree with the definition in gas/config/tc-arm.c.  */
25660
25661 #define STUB_NAME ".real_start_of"
25662
25663   fprintf (f, "\t.code\t16\n");
25664 #ifdef ARM_PE
25665   if (arm_dllexport_name_p (name))
25666     name = arm_strip_name_encoding (name);
25667 #endif
25668   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25669   fprintf (f, "\t.thumb_func\n");
25670   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25671
25672   return "";
25673 }
25674
25675 /* Handle the case of a double word load into a low register from
25676    a computed memory address.  The computed address may involve a
25677    register which is overwritten by the load.  */
25678 const char *
25679 thumb_load_double_from_address (rtx *operands)
25680 {
25681   rtx addr;
25682   rtx base;
25683   rtx offset;
25684   rtx arg1;
25685   rtx arg2;
25686
25687   gcc_assert (REG_P (operands[0]));
25688   gcc_assert (MEM_P (operands[1]));
25689
25690   /* Get the memory address.  */
25691   addr = XEXP (operands[1], 0);
25692
25693   /* Work out how the memory address is computed.  */
25694   switch (GET_CODE (addr))
25695     {
25696     case REG:
25697       operands[2] = adjust_address (operands[1], SImode, 4);
25698
25699       if (REGNO (operands[0]) == REGNO (addr))
25700         {
25701           output_asm_insn ("ldr\t%H0, %2", operands);
25702           output_asm_insn ("ldr\t%0, %1", operands);
25703         }
25704       else
25705         {
25706           output_asm_insn ("ldr\t%0, %1", operands);
25707           output_asm_insn ("ldr\t%H0, %2", operands);
25708         }
25709       break;
25710
25711     case CONST:
25712       /* Compute <address> + 4 for the high order load.  */
25713       operands[2] = adjust_address (operands[1], SImode, 4);
25714
25715       output_asm_insn ("ldr\t%0, %1", operands);
25716       output_asm_insn ("ldr\t%H0, %2", operands);
25717       break;
25718
25719     case PLUS:
25720       arg1   = XEXP (addr, 0);
25721       arg2   = XEXP (addr, 1);
25722
25723       if (CONSTANT_P (arg1))
25724         base = arg2, offset = arg1;
25725       else
25726         base = arg1, offset = arg2;
25727
25728       gcc_assert (REG_P (base));
25729
25730       /* Catch the case of <address> = <reg> + <reg> */
25731       if (REG_P (offset))
25732         {
25733           int reg_offset = REGNO (offset);
25734           int reg_base   = REGNO (base);
25735           int reg_dest   = REGNO (operands[0]);
25736
25737           /* Add the base and offset registers together into the
25738              higher destination register.  */
25739           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25740                        reg_dest + 1, reg_base, reg_offset);
25741
25742           /* Load the lower destination register from the address in
25743              the higher destination register.  */
25744           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25745                        reg_dest, reg_dest + 1);
25746
25747           /* Load the higher destination register from its own address
25748              plus 4.  */
25749           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25750                        reg_dest + 1, reg_dest + 1);
25751         }
25752       else
25753         {
25754           /* Compute <address> + 4 for the high order load.  */
25755           operands[2] = adjust_address (operands[1], SImode, 4);
25756
25757           /* If the computed address is held in the low order register
25758              then load the high order register first, otherwise always
25759              load the low order register first.  */
25760           if (REGNO (operands[0]) == REGNO (base))
25761             {
25762               output_asm_insn ("ldr\t%H0, %2", operands);
25763               output_asm_insn ("ldr\t%0, %1", operands);
25764             }
25765           else
25766             {
25767               output_asm_insn ("ldr\t%0, %1", operands);
25768               output_asm_insn ("ldr\t%H0, %2", operands);
25769             }
25770         }
25771       break;
25772
25773     case LABEL_REF:
25774       /* With no registers to worry about we can just load the value
25775          directly.  */
25776       operands[2] = adjust_address (operands[1], SImode, 4);
25777
25778       output_asm_insn ("ldr\t%H0, %2", operands);
25779       output_asm_insn ("ldr\t%0, %1", operands);
25780       break;
25781
25782     default:
25783       gcc_unreachable ();
25784     }
25785
25786   return "";
25787 }
25788
25789 const char *
25790 thumb_output_move_mem_multiple (int n, rtx *operands)
25791 {
25792   switch (n)
25793     {
25794     case 2:
25795       if (REGNO (operands[4]) > REGNO (operands[5]))
25796         std::swap (operands[4], operands[5]);
25797
25798       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25799       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25800       break;
25801
25802     case 3:
25803       if (REGNO (operands[4]) > REGNO (operands[5]))
25804         std::swap (operands[4], operands[5]);
25805       if (REGNO (operands[5]) > REGNO (operands[6]))
25806         std::swap (operands[5], operands[6]);
25807       if (REGNO (operands[4]) > REGNO (operands[5]))
25808         std::swap (operands[4], operands[5]);
25809
25810       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25811       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25812       break;
25813
25814     default:
25815       gcc_unreachable ();
25816     }
25817
25818   return "";
25819 }
25820
25821 /* Output a call-via instruction for thumb state.  */
25822 const char *
25823 thumb_call_via_reg (rtx reg)
25824 {
25825   int regno = REGNO (reg);
25826   rtx *labelp;
25827
25828   gcc_assert (regno < LR_REGNUM);
25829
25830   /* If we are in the normal text section we can use a single instance
25831      per compilation unit.  If we are doing function sections, then we need
25832      an entry per section, since we can't rely on reachability.  */
25833   if (in_section == text_section)
25834     {
25835       thumb_call_reg_needed = 1;
25836
25837       if (thumb_call_via_label[regno] == NULL)
25838         thumb_call_via_label[regno] = gen_label_rtx ();
25839       labelp = thumb_call_via_label + regno;
25840     }
25841   else
25842     {
25843       if (cfun->machine->call_via[regno] == NULL)
25844         cfun->machine->call_via[regno] = gen_label_rtx ();
25845       labelp = cfun->machine->call_via + regno;
25846     }
25847
25848   output_asm_insn ("bl\t%a0", labelp);
25849   return "";
25850 }
25851
25852 /* Routines for generating rtl.  */
25853 void
25854 thumb_expand_movmemqi (rtx *operands)
25855 {
25856   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25857   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25858   HOST_WIDE_INT len = INTVAL (operands[2]);
25859   HOST_WIDE_INT offset = 0;
25860
25861   while (len >= 12)
25862     {
25863       emit_insn (gen_movmem12b (out, in, out, in));
25864       len -= 12;
25865     }
25866
25867   if (len >= 8)
25868     {
25869       emit_insn (gen_movmem8b (out, in, out, in));
25870       len -= 8;
25871     }
25872
25873   if (len >= 4)
25874     {
25875       rtx reg = gen_reg_rtx (SImode);
25876       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25877       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25878       len -= 4;
25879       offset += 4;
25880     }
25881
25882   if (len >= 2)
25883     {
25884       rtx reg = gen_reg_rtx (HImode);
25885       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25886                                               plus_constant (Pmode, in,
25887                                                              offset))));
25888       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25889                                                                 offset)),
25890                             reg));
25891       len -= 2;
25892       offset += 2;
25893     }
25894
25895   if (len)
25896     {
25897       rtx reg = gen_reg_rtx (QImode);
25898       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25899                                               plus_constant (Pmode, in,
25900                                                              offset))));
25901       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25902                                                                 offset)),
25903                             reg));
25904     }
25905 }
25906
25907 void
25908 thumb_reload_out_hi (rtx *operands)
25909 {
25910   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25911 }
25912
25913 /* Return the length of a function name prefix
25914     that starts with the character 'c'.  */
25915 static int
25916 arm_get_strip_length (int c)
25917 {
25918   switch (c)
25919     {
25920     ARM_NAME_ENCODING_LENGTHS
25921       default: return 0;
25922     }
25923 }
25924
25925 /* Return a pointer to a function's name with any
25926    and all prefix encodings stripped from it.  */
25927 const char *
25928 arm_strip_name_encoding (const char *name)
25929 {
25930   int skip;
25931
25932   while ((skip = arm_get_strip_length (* name)))
25933     name += skip;
25934
25935   return name;
25936 }
25937
25938 /* If there is a '*' anywhere in the name's prefix, then
25939    emit the stripped name verbatim, otherwise prepend an
25940    underscore if leading underscores are being used.  */
25941 void
25942 arm_asm_output_labelref (FILE *stream, const char *name)
25943 {
25944   int skip;
25945   int verbatim = 0;
25946
25947   while ((skip = arm_get_strip_length (* name)))
25948     {
25949       verbatim |= (*name == '*');
25950       name += skip;
25951     }
25952
25953   if (verbatim)
25954     fputs (name, stream);
25955   else
25956     asm_fprintf (stream, "%U%s", name);
25957 }
25958
25959 /* This function is used to emit an EABI tag and its associated value.
25960    We emit the numerical value of the tag in case the assembler does not
25961    support textual tags.  (Eg gas prior to 2.20).  If requested we include
25962    the tag name in a comment so that anyone reading the assembler output
25963    will know which tag is being set.
25964
25965    This function is not static because arm-c.c needs it too.  */
25966
25967 void
25968 arm_emit_eabi_attribute (const char *name, int num, int val)
25969 {
25970   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25971   if (flag_verbose_asm || flag_debug_asm)
25972     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25973   asm_fprintf (asm_out_file, "\n");
25974 }
25975
25976 /* This function is used to print CPU tuning information as comment
25977    in assembler file.  Pointers are not printed for now.  */
25978
25979 void
25980 arm_print_tune_info (void)
25981 {
25982   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
25983   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
25984                current_tune->constant_limit);
25985   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25986                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
25987   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25988                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
25989   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25990                "prefetch.l1_cache_size:\t%d\n",
25991                current_tune->prefetch.l1_cache_size);
25992   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25993                "prefetch.l1_cache_line_size:\t%d\n",
25994                current_tune->prefetch.l1_cache_line_size);
25995   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25996                "prefer_constant_pool:\t%d\n",
25997                (int) current_tune->prefer_constant_pool);
25998   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
25999                "branch_cost:\t(s:speed, p:predictable)\n");
26000   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26001   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26002                current_tune->branch_cost (false, false));
26003   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26004                current_tune->branch_cost (false, true));
26005   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26006                current_tune->branch_cost (true, false));
26007   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26008                current_tune->branch_cost (true, true));
26009   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26010                "prefer_ldrd_strd:\t%d\n",
26011                (int) current_tune->prefer_ldrd_strd);
26012   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26013                "logical_op_non_short_circuit:\t[%d,%d]\n",
26014                (int) current_tune->logical_op_non_short_circuit_thumb,
26015                (int) current_tune->logical_op_non_short_circuit_arm);
26016   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26017                "prefer_neon_for_64bits:\t%d\n",
26018                (int) current_tune->prefer_neon_for_64bits);
26019   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26020                "disparage_flag_setting_t16_encodings:\t%d\n",
26021                (int) current_tune->disparage_flag_setting_t16_encodings);
26022   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26023                "string_ops_prefer_neon:\t%d\n",
26024                (int) current_tune->string_ops_prefer_neon);
26025   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26026                "max_insns_inline_memset:\t%d\n",
26027                current_tune->max_insns_inline_memset);
26028   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26029                current_tune->fusible_ops);
26030   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26031                (int) current_tune->sched_autopref);
26032 }
26033
26034 static void
26035 arm_file_start (void)
26036 {
26037   int val;
26038
26039   if (TARGET_BPABI)
26040     {
26041       /* We don't have a specified CPU.  Use the architecture to
26042          generate the tags.
26043
26044          Note: it might be better to do this unconditionally, then the
26045          assembler would not need to know about all new CPU names as
26046          they are added.  */
26047       if (!arm_active_target.core_name)
26048         {
26049           /* armv7ve doesn't support any extensions.  */
26050           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26051             {
26052               /* Keep backward compatability for assemblers
26053                  which don't support armv7ve.  */
26054               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26055               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26056               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26057               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26058               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26059             }
26060           else
26061             {
26062               const char* pos = strchr (arm_active_target.arch_name, '+');
26063               if (pos)
26064                 {
26065                   char buf[32];
26066                   gcc_assert (strlen (arm_active_target.arch_name)
26067                               <= sizeof (buf) / sizeof (*pos));
26068                   strncpy (buf, arm_active_target.arch_name,
26069                            (pos - arm_active_target.arch_name) * sizeof (*pos));
26070                   buf[pos - arm_active_target.arch_name] = '\0';
26071                   asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
26072                   asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
26073                 }
26074               else
26075                 asm_fprintf (asm_out_file, "\t.arch %s\n",
26076                              arm_active_target.arch_name);
26077             }
26078         }
26079       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26080         asm_fprintf (asm_out_file, "\t.arch %s\n",
26081                      arm_active_target.core_name + 8);
26082       else
26083         {
26084           const char* truncated_name
26085             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26086           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26087         }
26088
26089       if (print_tune_info)
26090         arm_print_tune_info ();
26091
26092       if (! TARGET_SOFT_FLOAT)
26093         {
26094           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26095             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26096
26097           if (TARGET_HARD_FLOAT_ABI)
26098             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26099         }
26100
26101       /* Some of these attributes only apply when the corresponding features
26102          are used.  However we don't have any easy way of figuring this out.
26103          Conservatively record the setting that would have been used.  */
26104
26105       if (flag_rounding_math)
26106         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26107
26108       if (!flag_unsafe_math_optimizations)
26109         {
26110           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26111           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26112         }
26113       if (flag_signaling_nans)
26114         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26115
26116       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26117                            flag_finite_math_only ? 1 : 3);
26118
26119       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26120       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26121       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26122                                flag_short_enums ? 1 : 2);
26123
26124       /* Tag_ABI_optimization_goals.  */
26125       if (optimize_size)
26126         val = 4;
26127       else if (optimize >= 2)
26128         val = 2;
26129       else if (optimize)
26130         val = 1;
26131       else
26132         val = 6;
26133       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26134
26135       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26136                                unaligned_access);
26137
26138       if (arm_fp16_format)
26139         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26140                              (int) arm_fp16_format);
26141
26142       if (arm_lang_output_object_attributes_hook)
26143         arm_lang_output_object_attributes_hook();
26144     }
26145
26146   default_file_start ();
26147 }
26148
26149 static void
26150 arm_file_end (void)
26151 {
26152   int regno;
26153
26154   if (NEED_INDICATE_EXEC_STACK)
26155     /* Add .note.GNU-stack.  */
26156     file_end_indicate_exec_stack ();
26157
26158   if (! thumb_call_reg_needed)
26159     return;
26160
26161   switch_to_section (text_section);
26162   asm_fprintf (asm_out_file, "\t.code 16\n");
26163   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26164
26165   for (regno = 0; regno < LR_REGNUM; regno++)
26166     {
26167       rtx label = thumb_call_via_label[regno];
26168
26169       if (label != 0)
26170         {
26171           targetm.asm_out.internal_label (asm_out_file, "L",
26172                                           CODE_LABEL_NUMBER (label));
26173           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26174         }
26175     }
26176 }
26177
26178 #ifndef ARM_PE
26179 /* Symbols in the text segment can be accessed without indirecting via the
26180    constant pool; it may take an extra binary operation, but this is still
26181    faster than indirecting via memory.  Don't do this when not optimizing,
26182    since we won't be calculating al of the offsets necessary to do this
26183    simplification.  */
26184
26185 static void
26186 arm_encode_section_info (tree decl, rtx rtl, int first)
26187 {
26188   if (optimize > 0 && TREE_CONSTANT (decl))
26189     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26190
26191   default_encode_section_info (decl, rtl, first);
26192 }
26193 #endif /* !ARM_PE */
26194
26195 static void
26196 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26197 {
26198   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26199       && !strcmp (prefix, "L"))
26200     {
26201       arm_ccfsm_state = 0;
26202       arm_target_insn = NULL;
26203     }
26204   default_internal_label (stream, prefix, labelno);
26205 }
26206
26207 /* Output code to add DELTA to the first argument, and then jump
26208    to FUNCTION.  Used for C++ multiple inheritance.  */
26209
26210 static void
26211 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26212                      HOST_WIDE_INT, tree function)
26213 {
26214   static int thunk_label = 0;
26215   char label[256];
26216   char labelpc[256];
26217   int mi_delta = delta;
26218   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26219   int shift = 0;
26220   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26221                     ? 1 : 0);
26222   if (mi_delta < 0)
26223     mi_delta = - mi_delta;
26224
26225   final_start_function (emit_barrier (), file, 1);
26226
26227   if (TARGET_THUMB1)
26228     {
26229       int labelno = thunk_label++;
26230       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26231       /* Thunks are entered in arm mode when available.  */
26232       if (TARGET_THUMB1_ONLY)
26233         {
26234           /* push r3 so we can use it as a temporary.  */
26235           /* TODO: Omit this save if r3 is not used.  */
26236           fputs ("\tpush {r3}\n", file);
26237           fputs ("\tldr\tr3, ", file);
26238         }
26239       else
26240         {
26241           fputs ("\tldr\tr12, ", file);
26242         }
26243       assemble_name (file, label);
26244       fputc ('\n', file);
26245       if (flag_pic)
26246         {
26247           /* If we are generating PIC, the ldr instruction below loads
26248              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26249              the address of the add + 8, so we have:
26250
26251              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26252                  = target + 1.
26253
26254              Note that we have "+ 1" because some versions of GNU ld
26255              don't set the low bit of the result for R_ARM_REL32
26256              relocations against thumb function symbols.
26257              On ARMv6M this is +4, not +8.  */
26258           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26259           assemble_name (file, labelpc);
26260           fputs (":\n", file);
26261           if (TARGET_THUMB1_ONLY)
26262             {
26263               /* This is 2 insns after the start of the thunk, so we know it
26264                  is 4-byte aligned.  */
26265               fputs ("\tadd\tr3, pc, r3\n", file);
26266               fputs ("\tmov r12, r3\n", file);
26267             }
26268           else
26269             fputs ("\tadd\tr12, pc, r12\n", file);
26270         }
26271       else if (TARGET_THUMB1_ONLY)
26272         fputs ("\tmov r12, r3\n", file);
26273     }
26274   if (TARGET_THUMB1_ONLY)
26275     {
26276       if (mi_delta > 255)
26277         {
26278           fputs ("\tldr\tr3, ", file);
26279           assemble_name (file, label);
26280           fputs ("+4\n", file);
26281           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26282                        mi_op, this_regno, this_regno);
26283         }
26284       else if (mi_delta != 0)
26285         {
26286           /* Thumb1 unified syntax requires s suffix in instruction name when
26287              one of the operands is immediate.  */
26288           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26289                        mi_op, this_regno, this_regno,
26290                        mi_delta);
26291         }
26292     }
26293   else
26294     {
26295       /* TODO: Use movw/movt for large constants when available.  */
26296       while (mi_delta != 0)
26297         {
26298           if ((mi_delta & (3 << shift)) == 0)
26299             shift += 2;
26300           else
26301             {
26302               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26303                            mi_op, this_regno, this_regno,
26304                            mi_delta & (0xff << shift));
26305               mi_delta &= ~(0xff << shift);
26306               shift += 8;
26307             }
26308         }
26309     }
26310   if (TARGET_THUMB1)
26311     {
26312       if (TARGET_THUMB1_ONLY)
26313         fputs ("\tpop\t{r3}\n", file);
26314
26315       fprintf (file, "\tbx\tr12\n");
26316       ASM_OUTPUT_ALIGN (file, 2);
26317       assemble_name (file, label);
26318       fputs (":\n", file);
26319       if (flag_pic)
26320         {
26321           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26322           rtx tem = XEXP (DECL_RTL (function), 0);
26323           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26324              pipeline offset is four rather than eight.  Adjust the offset
26325              accordingly.  */
26326           tem = plus_constant (GET_MODE (tem), tem,
26327                                TARGET_THUMB1_ONLY ? -3 : -7);
26328           tem = gen_rtx_MINUS (GET_MODE (tem),
26329                                tem,
26330                                gen_rtx_SYMBOL_REF (Pmode,
26331                                                    ggc_strdup (labelpc)));
26332           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26333         }
26334       else
26335         /* Output ".word .LTHUNKn".  */
26336         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26337
26338       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26339         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26340     }
26341   else
26342     {
26343       fputs ("\tb\t", file);
26344       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26345       if (NEED_PLT_RELOC)
26346         fputs ("(PLT)", file);
26347       fputc ('\n', file);
26348     }
26349
26350   final_end_function ();
26351 }
26352
26353 /* MI thunk handling for TARGET_32BIT.  */
26354
26355 static void
26356 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26357                        HOST_WIDE_INT vcall_offset, tree function)
26358 {
26359   /* On ARM, this_regno is R0 or R1 depending on
26360      whether the function returns an aggregate or not.
26361   */
26362   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26363                                        function)
26364                     ? R1_REGNUM : R0_REGNUM);
26365
26366   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26367   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26368   reload_completed = 1;
26369   emit_note (NOTE_INSN_PROLOGUE_END);
26370
26371   /* Add DELTA to THIS_RTX.  */
26372   if (delta != 0)
26373     arm_split_constant (PLUS, Pmode, NULL_RTX,
26374                         delta, this_rtx, this_rtx, false);
26375
26376   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26377   if (vcall_offset != 0)
26378     {
26379       /* Load *THIS_RTX.  */
26380       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26381       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26382       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26383                           false);
26384       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26385       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26386       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26387     }
26388
26389   /* Generate a tail call to the target function.  */
26390   if (!TREE_USED (function))
26391     {
26392       assemble_external (function);
26393       TREE_USED (function) = 1;
26394     }
26395   rtx funexp = XEXP (DECL_RTL (function), 0);
26396   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26397   rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26398   SIBLING_CALL_P (insn) = 1;
26399
26400   insn = get_insns ();
26401   shorten_branches (insn);
26402   final_start_function (insn, file, 1);
26403   final (insn, file, 1);
26404   final_end_function ();
26405
26406   /* Stop pretending this is a post-reload pass.  */
26407   reload_completed = 0;
26408 }
26409
26410 /* Output code to add DELTA to the first argument, and then jump
26411    to FUNCTION.  Used for C++ multiple inheritance.  */
26412
26413 static void
26414 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26415                      HOST_WIDE_INT vcall_offset, tree function)
26416 {
26417   if (TARGET_32BIT)
26418     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26419   else
26420     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26421 }
26422
26423 int
26424 arm_emit_vector_const (FILE *file, rtx x)
26425 {
26426   int i;
26427   const char * pattern;
26428
26429   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26430
26431   switch (GET_MODE (x))
26432     {
26433     case V2SImode: pattern = "%08x"; break;
26434     case V4HImode: pattern = "%04x"; break;
26435     case V8QImode: pattern = "%02x"; break;
26436     default:       gcc_unreachable ();
26437     }
26438
26439   fprintf (file, "0x");
26440   for (i = CONST_VECTOR_NUNITS (x); i--;)
26441     {
26442       rtx element;
26443
26444       element = CONST_VECTOR_ELT (x, i);
26445       fprintf (file, pattern, INTVAL (element));
26446     }
26447
26448   return 1;
26449 }
26450
26451 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26452    HFmode constant pool entries are actually loaded with ldr.  */
26453 void
26454 arm_emit_fp16_const (rtx c)
26455 {
26456   long bits;
26457
26458   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26459   if (WORDS_BIG_ENDIAN)
26460     assemble_zeros (2);
26461   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26462   if (!WORDS_BIG_ENDIAN)
26463     assemble_zeros (2);
26464 }
26465
26466 const char *
26467 arm_output_load_gr (rtx *operands)
26468 {
26469   rtx reg;
26470   rtx offset;
26471   rtx wcgr;
26472   rtx sum;
26473
26474   if (!MEM_P (operands [1])
26475       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26476       || !REG_P (reg = XEXP (sum, 0))
26477       || !CONST_INT_P (offset = XEXP (sum, 1))
26478       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26479     return "wldrw%?\t%0, %1";
26480
26481   /* Fix up an out-of-range load of a GR register.  */
26482   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26483   wcgr = operands[0];
26484   operands[0] = reg;
26485   output_asm_insn ("ldr%?\t%0, %1", operands);
26486
26487   operands[0] = wcgr;
26488   operands[1] = reg;
26489   output_asm_insn ("tmcr%?\t%0, %1", operands);
26490   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26491
26492   return "";
26493 }
26494
26495 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26496
26497    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26498    named arg and all anonymous args onto the stack.
26499    XXX I know the prologue shouldn't be pushing registers, but it is faster
26500    that way.  */
26501
26502 static void
26503 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26504                             machine_mode mode,
26505                             tree type,
26506                             int *pretend_size,
26507                             int second_time ATTRIBUTE_UNUSED)
26508 {
26509   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26510   int nregs;
26511
26512   cfun->machine->uses_anonymous_args = 1;
26513   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26514     {
26515       nregs = pcum->aapcs_ncrn;
26516       if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26517         nregs++;
26518     }
26519   else
26520     nregs = pcum->nregs;
26521
26522   if (nregs < NUM_ARG_REGS)
26523     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26524 }
26525
26526 /* We can't rely on the caller doing the proper promotion when
26527    using APCS or ATPCS.  */
26528
26529 static bool
26530 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26531 {
26532     return !TARGET_AAPCS_BASED;
26533 }
26534
26535 static machine_mode
26536 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26537                            machine_mode mode,
26538                            int *punsignedp ATTRIBUTE_UNUSED,
26539                            const_tree fntype ATTRIBUTE_UNUSED,
26540                            int for_return ATTRIBUTE_UNUSED)
26541 {
26542   if (GET_MODE_CLASS (mode) == MODE_INT
26543       && GET_MODE_SIZE (mode) < 4)
26544     return SImode;
26545
26546   return mode;
26547 }
26548
26549 /* AAPCS based ABIs use short enums by default.  */
26550
26551 static bool
26552 arm_default_short_enums (void)
26553 {
26554   return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26555 }
26556
26557
26558 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26559
26560 static bool
26561 arm_align_anon_bitfield (void)
26562 {
26563   return TARGET_AAPCS_BASED;
26564 }
26565
26566
26567 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26568
26569 static tree
26570 arm_cxx_guard_type (void)
26571 {
26572   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26573 }
26574
26575
26576 /* The EABI says test the least significant bit of a guard variable.  */
26577
26578 static bool
26579 arm_cxx_guard_mask_bit (void)
26580 {
26581   return TARGET_AAPCS_BASED;
26582 }
26583
26584
26585 /* The EABI specifies that all array cookies are 8 bytes long.  */
26586
26587 static tree
26588 arm_get_cookie_size (tree type)
26589 {
26590   tree size;
26591
26592   if (!TARGET_AAPCS_BASED)
26593     return default_cxx_get_cookie_size (type);
26594
26595   size = build_int_cst (sizetype, 8);
26596   return size;
26597 }
26598
26599
26600 /* The EABI says that array cookies should also contain the element size.  */
26601
26602 static bool
26603 arm_cookie_has_size (void)
26604 {
26605   return TARGET_AAPCS_BASED;
26606 }
26607
26608
26609 /* The EABI says constructors and destructors should return a pointer to
26610    the object constructed/destroyed.  */
26611
26612 static bool
26613 arm_cxx_cdtor_returns_this (void)
26614 {
26615   return TARGET_AAPCS_BASED;
26616 }
26617
26618 /* The EABI says that an inline function may never be the key
26619    method.  */
26620
26621 static bool
26622 arm_cxx_key_method_may_be_inline (void)
26623 {
26624   return !TARGET_AAPCS_BASED;
26625 }
26626
26627 static void
26628 arm_cxx_determine_class_data_visibility (tree decl)
26629 {
26630   if (!TARGET_AAPCS_BASED
26631       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26632     return;
26633
26634   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26635      is exported.  However, on systems without dynamic vague linkage,
26636      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26637   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26638     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26639   else
26640     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26641   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26642 }
26643
26644 static bool
26645 arm_cxx_class_data_always_comdat (void)
26646 {
26647   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26648      vague linkage if the class has no key function.  */
26649   return !TARGET_AAPCS_BASED;
26650 }
26651
26652
26653 /* The EABI says __aeabi_atexit should be used to register static
26654    destructors.  */
26655
26656 static bool
26657 arm_cxx_use_aeabi_atexit (void)
26658 {
26659   return TARGET_AAPCS_BASED;
26660 }
26661
26662
26663 void
26664 arm_set_return_address (rtx source, rtx scratch)
26665 {
26666   arm_stack_offsets *offsets;
26667   HOST_WIDE_INT delta;
26668   rtx addr;
26669   unsigned long saved_regs;
26670
26671   offsets = arm_get_frame_offsets ();
26672   saved_regs = offsets->saved_regs_mask;
26673
26674   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26675     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26676   else
26677     {
26678       if (frame_pointer_needed)
26679         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26680       else
26681         {
26682           /* LR will be the first saved register.  */
26683           delta = offsets->outgoing_args - (offsets->frame + 4);
26684
26685
26686           if (delta >= 4096)
26687             {
26688               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26689                                      GEN_INT (delta & ~4095)));
26690               addr = scratch;
26691               delta &= 4095;
26692             }
26693           else
26694             addr = stack_pointer_rtx;
26695
26696           addr = plus_constant (Pmode, addr, delta);
26697         }
26698       /* The store needs to be marked as frame related in order to prevent
26699          DSE from deleting it as dead if it is based on fp.  */
26700       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26701       RTX_FRAME_RELATED_P (insn) = 1;
26702       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26703     }
26704 }
26705
26706
26707 void
26708 thumb_set_return_address (rtx source, rtx scratch)
26709 {
26710   arm_stack_offsets *offsets;
26711   HOST_WIDE_INT delta;
26712   HOST_WIDE_INT limit;
26713   int reg;
26714   rtx addr;
26715   unsigned long mask;
26716
26717   emit_use (source);
26718
26719   offsets = arm_get_frame_offsets ();
26720   mask = offsets->saved_regs_mask;
26721   if (mask & (1 << LR_REGNUM))
26722     {
26723       limit = 1024;
26724       /* Find the saved regs.  */
26725       if (frame_pointer_needed)
26726         {
26727           delta = offsets->soft_frame - offsets->saved_args;
26728           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26729           if (TARGET_THUMB1)
26730             limit = 128;
26731         }
26732       else
26733         {
26734           delta = offsets->outgoing_args - offsets->saved_args;
26735           reg = SP_REGNUM;
26736         }
26737       /* Allow for the stack frame.  */
26738       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26739         delta -= 16;
26740       /* The link register is always the first saved register.  */
26741       delta -= 4;
26742
26743       /* Construct the address.  */
26744       addr = gen_rtx_REG (SImode, reg);
26745       if (delta > limit)
26746         {
26747           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26748           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26749           addr = scratch;
26750         }
26751       else
26752         addr = plus_constant (Pmode, addr, delta);
26753
26754       /* The store needs to be marked as frame related in order to prevent
26755          DSE from deleting it as dead if it is based on fp.  */
26756       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26757       RTX_FRAME_RELATED_P (insn) = 1;
26758       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26759     }
26760   else
26761     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26762 }
26763
26764 /* Implements target hook vector_mode_supported_p.  */
26765 bool
26766 arm_vector_mode_supported_p (machine_mode mode)
26767 {
26768   /* Neon also supports V2SImode, etc. listed in the clause below.  */
26769   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26770       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26771       || mode == V2DImode || mode == V8HFmode))
26772     return true;
26773
26774   if ((TARGET_NEON || TARGET_IWMMXT)
26775       && ((mode == V2SImode)
26776           || (mode == V4HImode)
26777           || (mode == V8QImode)))
26778     return true;
26779
26780   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26781       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26782       || mode == V2HAmode))
26783     return true;
26784
26785   return false;
26786 }
26787
26788 /* Implements target hook array_mode_supported_p.  */
26789
26790 static bool
26791 arm_array_mode_supported_p (machine_mode mode,
26792                             unsigned HOST_WIDE_INT nelems)
26793 {
26794   if (TARGET_NEON
26795       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26796       && (nelems >= 2 && nelems <= 4))
26797     return true;
26798
26799   return false;
26800 }
26801
26802 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26803    registers when autovectorizing for Neon, at least until multiple vector
26804    widths are supported properly by the middle-end.  */
26805
26806 static machine_mode
26807 arm_preferred_simd_mode (machine_mode mode)
26808 {
26809   if (TARGET_NEON)
26810     switch (mode)
26811       {
26812       case SFmode:
26813         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26814       case SImode:
26815         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26816       case HImode:
26817         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26818       case QImode:
26819         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26820       case DImode:
26821         if (!TARGET_NEON_VECTORIZE_DOUBLE)
26822           return V2DImode;
26823         break;
26824
26825       default:;
26826       }
26827
26828   if (TARGET_REALLY_IWMMXT)
26829     switch (mode)
26830       {
26831       case SImode:
26832         return V2SImode;
26833       case HImode:
26834         return V4HImode;
26835       case QImode:
26836         return V8QImode;
26837
26838       default:;
26839       }
26840
26841   return word_mode;
26842 }
26843
26844 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26845
26846    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
26847    using r0-r4 for function arguments, r7 for the stack frame and don't have
26848    enough left over to do doubleword arithmetic.  For Thumb-2 all the
26849    potentially problematic instructions accept high registers so this is not
26850    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
26851    that require many low registers.  */
26852 static bool
26853 arm_class_likely_spilled_p (reg_class_t rclass)
26854 {
26855   if ((TARGET_THUMB1 && rclass == LO_REGS)
26856       || rclass  == CC_REG)
26857     return true;
26858
26859   return false;
26860 }
26861
26862 /* Implements target hook small_register_classes_for_mode_p.  */
26863 bool
26864 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26865 {
26866   return TARGET_THUMB1;
26867 }
26868
26869 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
26870    ARM insns and therefore guarantee that the shift count is modulo 256.
26871    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26872    guarantee no particular behavior for out-of-range counts.  */
26873
26874 static unsigned HOST_WIDE_INT
26875 arm_shift_truncation_mask (machine_mode mode)
26876 {
26877   return mode == SImode ? 255 : 0;
26878 }
26879
26880
26881 /* Map internal gcc register numbers to DWARF2 register numbers.  */
26882
26883 unsigned int
26884 arm_dbx_register_number (unsigned int regno)
26885 {
26886   if (regno < 16)
26887     return regno;
26888
26889   if (IS_VFP_REGNUM (regno))
26890     {
26891       /* See comment in arm_dwarf_register_span.  */
26892       if (VFP_REGNO_OK_FOR_SINGLE (regno))
26893         return 64 + regno - FIRST_VFP_REGNUM;
26894       else
26895         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26896     }
26897
26898   if (IS_IWMMXT_GR_REGNUM (regno))
26899     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26900
26901   if (IS_IWMMXT_REGNUM (regno))
26902     return 112 + regno - FIRST_IWMMXT_REGNUM;
26903
26904   return DWARF_FRAME_REGISTERS;
26905 }
26906
26907 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26908    GCC models tham as 64 32-bit registers, so we need to describe this to
26909    the DWARF generation code.  Other registers can use the default.  */
26910 static rtx
26911 arm_dwarf_register_span (rtx rtl)
26912 {
26913   machine_mode mode;
26914   unsigned regno;
26915   rtx parts[16];
26916   int nregs;
26917   int i;
26918
26919   regno = REGNO (rtl);
26920   if (!IS_VFP_REGNUM (regno))
26921     return NULL_RTX;
26922
26923   /* XXX FIXME: The EABI defines two VFP register ranges:
26924         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26925         256-287: D0-D31
26926      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26927      corresponding D register.  Until GDB supports this, we shall use the
26928      legacy encodings.  We also use these encodings for D0-D15 for
26929      compatibility with older debuggers.  */
26930   mode = GET_MODE (rtl);
26931   if (GET_MODE_SIZE (mode) < 8)
26932     return NULL_RTX;
26933
26934   if (VFP_REGNO_OK_FOR_SINGLE (regno))
26935     {
26936       nregs = GET_MODE_SIZE (mode) / 4;
26937       for (i = 0; i < nregs; i += 2)
26938         if (TARGET_BIG_END)
26939           {
26940             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26941             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26942           }
26943         else
26944           {
26945             parts[i] = gen_rtx_REG (SImode, regno + i);
26946             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26947           }
26948     }
26949   else
26950     {
26951       nregs = GET_MODE_SIZE (mode) / 8;
26952       for (i = 0; i < nregs; i++)
26953         parts[i] = gen_rtx_REG (DImode, regno + i);
26954     }
26955
26956   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26957 }
26958
26959 #if ARM_UNWIND_INFO
26960 /* Emit unwind directives for a store-multiple instruction or stack pointer
26961    push during alignment.
26962    These should only ever be generated by the function prologue code, so
26963    expect them to have a particular form.
26964    The store-multiple instruction sometimes pushes pc as the last register,
26965    although it should not be tracked into unwind information, or for -Os
26966    sometimes pushes some dummy registers before first register that needs
26967    to be tracked in unwind information; such dummy registers are there just
26968    to avoid separate stack adjustment, and will not be restored in the
26969    epilogue.  */
26970
26971 static void
26972 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26973 {
26974   int i;
26975   HOST_WIDE_INT offset;
26976   HOST_WIDE_INT nregs;
26977   int reg_size;
26978   unsigned reg;
26979   unsigned lastreg;
26980   unsigned padfirst = 0, padlast = 0;
26981   rtx e;
26982
26983   e = XVECEXP (p, 0, 0);
26984   gcc_assert (GET_CODE (e) == SET);
26985
26986   /* First insn will adjust the stack pointer.  */
26987   gcc_assert (GET_CODE (e) == SET
26988               && REG_P (SET_DEST (e))
26989               && REGNO (SET_DEST (e)) == SP_REGNUM
26990               && GET_CODE (SET_SRC (e)) == PLUS);
26991
26992   offset = -INTVAL (XEXP (SET_SRC (e), 1));
26993   nregs = XVECLEN (p, 0) - 1;
26994   gcc_assert (nregs);
26995
26996   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26997   if (reg < 16)
26998     {
26999       /* For -Os dummy registers can be pushed at the beginning to
27000          avoid separate stack pointer adjustment.  */
27001       e = XVECEXP (p, 0, 1);
27002       e = XEXP (SET_DEST (e), 0);
27003       if (GET_CODE (e) == PLUS)
27004         padfirst = INTVAL (XEXP (e, 1));
27005       gcc_assert (padfirst == 0 || optimize_size);
27006       /* The function prologue may also push pc, but not annotate it as it is
27007          never restored.  We turn this into a stack pointer adjustment.  */
27008       e = XVECEXP (p, 0, nregs);
27009       e = XEXP (SET_DEST (e), 0);
27010       if (GET_CODE (e) == PLUS)
27011         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27012       else
27013         padlast = offset - 4;
27014       gcc_assert (padlast == 0 || padlast == 4);
27015       if (padlast == 4)
27016         fprintf (asm_out_file, "\t.pad #4\n");
27017       reg_size = 4;
27018       fprintf (asm_out_file, "\t.save {");
27019     }
27020   else if (IS_VFP_REGNUM (reg))
27021     {
27022       reg_size = 8;
27023       fprintf (asm_out_file, "\t.vsave {");
27024     }
27025   else
27026     /* Unknown register type.  */
27027     gcc_unreachable ();
27028
27029   /* If the stack increment doesn't match the size of the saved registers,
27030      something has gone horribly wrong.  */
27031   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27032
27033   offset = padfirst;
27034   lastreg = 0;
27035   /* The remaining insns will describe the stores.  */
27036   for (i = 1; i <= nregs; i++)
27037     {
27038       /* Expect (set (mem <addr>) (reg)).
27039          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27040       e = XVECEXP (p, 0, i);
27041       gcc_assert (GET_CODE (e) == SET
27042                   && MEM_P (SET_DEST (e))
27043                   && REG_P (SET_SRC (e)));
27044
27045       reg = REGNO (SET_SRC (e));
27046       gcc_assert (reg >= lastreg);
27047
27048       if (i != 1)
27049         fprintf (asm_out_file, ", ");
27050       /* We can't use %r for vfp because we need to use the
27051          double precision register names.  */
27052       if (IS_VFP_REGNUM (reg))
27053         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27054       else
27055         asm_fprintf (asm_out_file, "%r", reg);
27056
27057       if (flag_checking)
27058         {
27059           /* Check that the addresses are consecutive.  */
27060           e = XEXP (SET_DEST (e), 0);
27061           if (GET_CODE (e) == PLUS)
27062             gcc_assert (REG_P (XEXP (e, 0))
27063                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27064                         && CONST_INT_P (XEXP (e, 1))
27065                         && offset == INTVAL (XEXP (e, 1)));
27066           else
27067             gcc_assert (i == 1
27068                         && REG_P (e)
27069                         && REGNO (e) == SP_REGNUM);
27070           offset += reg_size;
27071         }
27072     }
27073   fprintf (asm_out_file, "}\n");
27074   if (padfirst)
27075     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27076 }
27077
27078 /*  Emit unwind directives for a SET.  */
27079
27080 static void
27081 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27082 {
27083   rtx e0;
27084   rtx e1;
27085   unsigned reg;
27086
27087   e0 = XEXP (p, 0);
27088   e1 = XEXP (p, 1);
27089   switch (GET_CODE (e0))
27090     {
27091     case MEM:
27092       /* Pushing a single register.  */
27093       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27094           || !REG_P (XEXP (XEXP (e0, 0), 0))
27095           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27096         abort ();
27097
27098       asm_fprintf (asm_out_file, "\t.save ");
27099       if (IS_VFP_REGNUM (REGNO (e1)))
27100         asm_fprintf(asm_out_file, "{d%d}\n",
27101                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27102       else
27103         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27104       break;
27105
27106     case REG:
27107       if (REGNO (e0) == SP_REGNUM)
27108         {
27109           /* A stack increment.  */
27110           if (GET_CODE (e1) != PLUS
27111               || !REG_P (XEXP (e1, 0))
27112               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27113               || !CONST_INT_P (XEXP (e1, 1)))
27114             abort ();
27115
27116           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27117                        -INTVAL (XEXP (e1, 1)));
27118         }
27119       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27120         {
27121           HOST_WIDE_INT offset;
27122
27123           if (GET_CODE (e1) == PLUS)
27124             {
27125               if (!REG_P (XEXP (e1, 0))
27126                   || !CONST_INT_P (XEXP (e1, 1)))
27127                 abort ();
27128               reg = REGNO (XEXP (e1, 0));
27129               offset = INTVAL (XEXP (e1, 1));
27130               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27131                            HARD_FRAME_POINTER_REGNUM, reg,
27132                            offset);
27133             }
27134           else if (REG_P (e1))
27135             {
27136               reg = REGNO (e1);
27137               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27138                            HARD_FRAME_POINTER_REGNUM, reg);
27139             }
27140           else
27141             abort ();
27142         }
27143       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27144         {
27145           /* Move from sp to reg.  */
27146           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27147         }
27148      else if (GET_CODE (e1) == PLUS
27149               && REG_P (XEXP (e1, 0))
27150               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27151               && CONST_INT_P (XEXP (e1, 1)))
27152         {
27153           /* Set reg to offset from sp.  */
27154           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27155                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27156         }
27157       else
27158         abort ();
27159       break;
27160
27161     default:
27162       abort ();
27163     }
27164 }
27165
27166
27167 /* Emit unwind directives for the given insn.  */
27168
27169 static void
27170 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27171 {
27172   rtx note, pat;
27173   bool handled_one = false;
27174
27175   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27176     return;
27177
27178   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27179       && (TREE_NOTHROW (current_function_decl)
27180           || crtl->all_throwers_are_sibcalls))
27181     return;
27182
27183   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27184     return;
27185
27186   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27187     {
27188       switch (REG_NOTE_KIND (note))
27189         {
27190         case REG_FRAME_RELATED_EXPR:
27191           pat = XEXP (note, 0);
27192           goto found;
27193
27194         case REG_CFA_REGISTER:
27195           pat = XEXP (note, 0);
27196           if (pat == NULL)
27197             {
27198               pat = PATTERN (insn);
27199               if (GET_CODE (pat) == PARALLEL)
27200                 pat = XVECEXP (pat, 0, 0);
27201             }
27202
27203           /* Only emitted for IS_STACKALIGN re-alignment.  */
27204           {
27205             rtx dest, src;
27206             unsigned reg;
27207
27208             src = SET_SRC (pat);
27209             dest = SET_DEST (pat);
27210
27211             gcc_assert (src == stack_pointer_rtx);
27212             reg = REGNO (dest);
27213             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27214                          reg + 0x90, reg);
27215           }
27216           handled_one = true;
27217           break;
27218
27219         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27220            to get correct dwarf information for shrink-wrap.  We should not
27221            emit unwind information for it because these are used either for
27222            pretend arguments or notes to adjust sp and restore registers from
27223            stack.  */
27224         case REG_CFA_DEF_CFA:
27225         case REG_CFA_ADJUST_CFA:
27226         case REG_CFA_RESTORE:
27227           return;
27228
27229         case REG_CFA_EXPRESSION:
27230         case REG_CFA_OFFSET:
27231           /* ??? Only handling here what we actually emit.  */
27232           gcc_unreachable ();
27233
27234         default:
27235           break;
27236         }
27237     }
27238   if (handled_one)
27239     return;
27240   pat = PATTERN (insn);
27241  found:
27242
27243   switch (GET_CODE (pat))
27244     {
27245     case SET:
27246       arm_unwind_emit_set (asm_out_file, pat);
27247       break;
27248
27249     case SEQUENCE:
27250       /* Store multiple.  */
27251       arm_unwind_emit_sequence (asm_out_file, pat);
27252       break;
27253
27254     default:
27255       abort();
27256     }
27257 }
27258
27259
27260 /* Output a reference from a function exception table to the type_info
27261    object X.  The EABI specifies that the symbol should be relocated by
27262    an R_ARM_TARGET2 relocation.  */
27263
27264 static bool
27265 arm_output_ttype (rtx x)
27266 {
27267   fputs ("\t.word\t", asm_out_file);
27268   output_addr_const (asm_out_file, x);
27269   /* Use special relocations for symbol references.  */
27270   if (!CONST_INT_P (x))
27271     fputs ("(TARGET2)", asm_out_file);
27272   fputc ('\n', asm_out_file);
27273
27274   return TRUE;
27275 }
27276
27277 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27278
27279 static void
27280 arm_asm_emit_except_personality (rtx personality)
27281 {
27282   fputs ("\t.personality\t", asm_out_file);
27283   output_addr_const (asm_out_file, personality);
27284   fputc ('\n', asm_out_file);
27285 }
27286 #endif /* ARM_UNWIND_INFO */
27287
27288 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27289
27290 static void
27291 arm_asm_init_sections (void)
27292 {
27293 #if ARM_UNWIND_INFO
27294   exception_section = get_unnamed_section (0, output_section_asm_op,
27295                                            "\t.handlerdata");
27296 #endif /* ARM_UNWIND_INFO */
27297
27298 #ifdef OBJECT_FORMAT_ELF
27299   if (target_pure_code)
27300     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27301 #endif
27302 }
27303
27304 /* Output unwind directives for the start/end of a function.  */
27305
27306 void
27307 arm_output_fn_unwind (FILE * f, bool prologue)
27308 {
27309   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27310     return;
27311
27312   if (prologue)
27313     fputs ("\t.fnstart\n", f);
27314   else
27315     {
27316       /* If this function will never be unwound, then mark it as such.
27317          The came condition is used in arm_unwind_emit to suppress
27318          the frame annotations.  */
27319       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27320           && (TREE_NOTHROW (current_function_decl)
27321               || crtl->all_throwers_are_sibcalls))
27322         fputs("\t.cantunwind\n", f);
27323
27324       fputs ("\t.fnend\n", f);
27325     }
27326 }
27327
27328 static bool
27329 arm_emit_tls_decoration (FILE *fp, rtx x)
27330 {
27331   enum tls_reloc reloc;
27332   rtx val;
27333
27334   val = XVECEXP (x, 0, 0);
27335   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27336
27337   output_addr_const (fp, val);
27338
27339   switch (reloc)
27340     {
27341     case TLS_GD32:
27342       fputs ("(tlsgd)", fp);
27343       break;
27344     case TLS_LDM32:
27345       fputs ("(tlsldm)", fp);
27346       break;
27347     case TLS_LDO32:
27348       fputs ("(tlsldo)", fp);
27349       break;
27350     case TLS_IE32:
27351       fputs ("(gottpoff)", fp);
27352       break;
27353     case TLS_LE32:
27354       fputs ("(tpoff)", fp);
27355       break;
27356     case TLS_DESCSEQ:
27357       fputs ("(tlsdesc)", fp);
27358       break;
27359     default:
27360       gcc_unreachable ();
27361     }
27362
27363   switch (reloc)
27364     {
27365     case TLS_GD32:
27366     case TLS_LDM32:
27367     case TLS_IE32:
27368     case TLS_DESCSEQ:
27369       fputs (" + (. - ", fp);
27370       output_addr_const (fp, XVECEXP (x, 0, 2));
27371       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27372       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27373       output_addr_const (fp, XVECEXP (x, 0, 3));
27374       fputc (')', fp);
27375       break;
27376     default:
27377       break;
27378     }
27379
27380   return TRUE;
27381 }
27382
27383 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27384
27385 static void
27386 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27387 {
27388   gcc_assert (size == 4);
27389   fputs ("\t.word\t", file);
27390   output_addr_const (file, x);
27391   fputs ("(tlsldo)", file);
27392 }
27393
27394 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27395
27396 static bool
27397 arm_output_addr_const_extra (FILE *fp, rtx x)
27398 {
27399   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27400     return arm_emit_tls_decoration (fp, x);
27401   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27402     {
27403       char label[256];
27404       int labelno = INTVAL (XVECEXP (x, 0, 0));
27405
27406       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27407       assemble_name_raw (fp, label);
27408
27409       return TRUE;
27410     }
27411   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27412     {
27413       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27414       if (GOT_PCREL)
27415         fputs ("+.", fp);
27416       fputs ("-(", fp);
27417       output_addr_const (fp, XVECEXP (x, 0, 0));
27418       fputc (')', fp);
27419       return TRUE;
27420     }
27421   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27422     {
27423       output_addr_const (fp, XVECEXP (x, 0, 0));
27424       if (GOT_PCREL)
27425         fputs ("+.", fp);
27426       fputs ("-(", fp);
27427       output_addr_const (fp, XVECEXP (x, 0, 1));
27428       fputc (')', fp);
27429       return TRUE;
27430     }
27431   else if (GET_CODE (x) == CONST_VECTOR)
27432     return arm_emit_vector_const (fp, x);
27433
27434   return FALSE;
27435 }
27436
27437 /* Output assembly for a shift instruction.
27438    SET_FLAGS determines how the instruction modifies the condition codes.
27439    0 - Do not set condition codes.
27440    1 - Set condition codes.
27441    2 - Use smallest instruction.  */
27442 const char *
27443 arm_output_shift(rtx * operands, int set_flags)
27444 {
27445   char pattern[100];
27446   static const char flag_chars[3] = {'?', '.', '!'};
27447   const char *shift;
27448   HOST_WIDE_INT val;
27449   char c;
27450
27451   c = flag_chars[set_flags];
27452   shift = shift_op(operands[3], &val);
27453   if (shift)
27454     {
27455       if (val != -1)
27456         operands[2] = GEN_INT(val);
27457       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27458     }
27459   else
27460     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27461
27462   output_asm_insn (pattern, operands);
27463   return "";
27464 }
27465
27466 /* Output assembly for a WMMX immediate shift instruction.  */
27467 const char *
27468 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27469 {
27470   int shift = INTVAL (operands[2]);
27471   char templ[50];
27472   machine_mode opmode = GET_MODE (operands[0]);
27473
27474   gcc_assert (shift >= 0);
27475
27476   /* If the shift value in the register versions is > 63 (for D qualifier),
27477      31 (for W qualifier) or 15 (for H qualifier).  */
27478   if (((opmode == V4HImode) && (shift > 15))
27479         || ((opmode == V2SImode) && (shift > 31))
27480         || ((opmode == DImode) && (shift > 63)))
27481   {
27482     if (wror_or_wsra)
27483       {
27484         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27485         output_asm_insn (templ, operands);
27486         if (opmode == DImode)
27487           {
27488             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27489             output_asm_insn (templ, operands);
27490           }
27491       }
27492     else
27493       {
27494         /* The destination register will contain all zeros.  */
27495         sprintf (templ, "wzero\t%%0");
27496         output_asm_insn (templ, operands);
27497       }
27498     return "";
27499   }
27500
27501   if ((opmode == DImode) && (shift > 32))
27502     {
27503       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27504       output_asm_insn (templ, operands);
27505       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27506       output_asm_insn (templ, operands);
27507     }
27508   else
27509     {
27510       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27511       output_asm_insn (templ, operands);
27512     }
27513   return "";
27514 }
27515
27516 /* Output assembly for a WMMX tinsr instruction.  */
27517 const char *
27518 arm_output_iwmmxt_tinsr (rtx *operands)
27519 {
27520   int mask = INTVAL (operands[3]);
27521   int i;
27522   char templ[50];
27523   int units = mode_nunits[GET_MODE (operands[0])];
27524   gcc_assert ((mask & (mask - 1)) == 0);
27525   for (i = 0; i < units; ++i)
27526     {
27527       if ((mask & 0x01) == 1)
27528         {
27529           break;
27530         }
27531       mask >>= 1;
27532     }
27533   gcc_assert (i < units);
27534   {
27535     switch (GET_MODE (operands[0]))
27536       {
27537       case V8QImode:
27538         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27539         break;
27540       case V4HImode:
27541         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27542         break;
27543       case V2SImode:
27544         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27545         break;
27546       default:
27547         gcc_unreachable ();
27548         break;
27549       }
27550     output_asm_insn (templ, operands);
27551   }
27552   return "";
27553 }
27554
27555 /* Output a Thumb-1 casesi dispatch sequence.  */
27556 const char *
27557 thumb1_output_casesi (rtx *operands)
27558 {
27559   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27560
27561   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27562
27563   switch (GET_MODE(diff_vec))
27564     {
27565     case QImode:
27566       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27567               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27568     case HImode:
27569       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27570               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27571     case SImode:
27572       return "bl\t%___gnu_thumb1_case_si";
27573     default:
27574       gcc_unreachable ();
27575     }
27576 }
27577
27578 /* Output a Thumb-2 casesi instruction.  */
27579 const char *
27580 thumb2_output_casesi (rtx *operands)
27581 {
27582   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27583
27584   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27585
27586   output_asm_insn ("cmp\t%0, %1", operands);
27587   output_asm_insn ("bhi\t%l3", operands);
27588   switch (GET_MODE(diff_vec))
27589     {
27590     case QImode:
27591       return "tbb\t[%|pc, %0]";
27592     case HImode:
27593       return "tbh\t[%|pc, %0, lsl #1]";
27594     case SImode:
27595       if (flag_pic)
27596         {
27597           output_asm_insn ("adr\t%4, %l2", operands);
27598           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27599           output_asm_insn ("add\t%4, %4, %5", operands);
27600           return "bx\t%4";
27601         }
27602       else
27603         {
27604           output_asm_insn ("adr\t%4, %l2", operands);
27605           return "ldr\t%|pc, [%4, %0, lsl #2]";
27606         }
27607     default:
27608       gcc_unreachable ();
27609     }
27610 }
27611
27612 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
27613    per-core tuning structs.  */
27614 static int
27615 arm_issue_rate (void)
27616 {
27617   return current_tune->issue_rate;
27618 }
27619
27620 /* Return how many instructions should scheduler lookahead to choose the
27621    best one.  */
27622 static int
27623 arm_first_cycle_multipass_dfa_lookahead (void)
27624 {
27625   int issue_rate = arm_issue_rate ();
27626
27627   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27628 }
27629
27630 /* Enable modeling of L2 auto-prefetcher.  */
27631 static int
27632 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27633 {
27634   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27635 }
27636
27637 const char *
27638 arm_mangle_type (const_tree type)
27639 {
27640   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27641      has to be managled as if it is in the "std" namespace.  */
27642   if (TARGET_AAPCS_BASED
27643       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27644     return "St9__va_list";
27645
27646   /* Half-precision float.  */
27647   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27648     return "Dh";
27649
27650   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27651      builtin type.  */
27652   if (TYPE_NAME (type) != NULL)
27653     return arm_mangle_builtin_type (type);
27654
27655   /* Use the default mangling.  */
27656   return NULL;
27657 }
27658
27659 /* Order of allocation of core registers for Thumb: this allocation is
27660    written over the corresponding initial entries of the array
27661    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27662    first.  Saving and restoring a low register is usually cheaper than
27663    using a call-clobbered high register.  */
27664
27665 static const int thumb_core_reg_alloc_order[] =
27666 {
27667    3,  2,  1,  0,  4,  5,  6,  7,
27668   12, 14,  8,  9, 10, 11
27669 };
27670
27671 /* Adjust register allocation order when compiling for Thumb.  */
27672
27673 void
27674 arm_order_regs_for_local_alloc (void)
27675 {
27676   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27677   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27678   if (TARGET_THUMB)
27679     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27680             sizeof (thumb_core_reg_alloc_order));
27681 }
27682
27683 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27684
27685 bool
27686 arm_frame_pointer_required (void)
27687 {
27688   if (SUBTARGET_FRAME_POINTER_REQUIRED)
27689     return true;
27690
27691   /* If the function receives nonlocal gotos, it needs to save the frame
27692      pointer in the nonlocal_goto_save_area object.  */
27693   if (cfun->has_nonlocal_label)
27694     return true;
27695
27696   /* The frame pointer is required for non-leaf APCS frames.  */
27697   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27698     return true;
27699
27700   /* If we are probing the stack in the prologue, we will have a faulting
27701      instruction prior to the stack adjustment and this requires a frame
27702      pointer if we want to catch the exception using the EABI unwinder.  */
27703   if (!IS_INTERRUPT (arm_current_func_type ())
27704       && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27705       && arm_except_unwind_info (&global_options) == UI_TARGET
27706       && cfun->can_throw_non_call_exceptions)
27707     {
27708       HOST_WIDE_INT size = get_frame_size ();
27709
27710       /* That's irrelevant if there is no stack adjustment.  */
27711       if (size <= 0)
27712         return false;
27713
27714       /* That's relevant only if there is a stack probe.  */
27715       if (crtl->is_leaf && !cfun->calls_alloca)
27716         {
27717           /* We don't have the final size of the frame so adjust.  */
27718           size += 32 * UNITS_PER_WORD;
27719           if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27720             return true;
27721         }
27722       else
27723         return true;
27724     }
27725
27726   return false;
27727 }
27728
27729 /* Only thumb1 can't support conditional execution, so return true if
27730    the target is not thumb1.  */
27731 static bool
27732 arm_have_conditional_execution (void)
27733 {
27734   return !TARGET_THUMB1;
27735 }
27736
27737 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27738 static HOST_WIDE_INT
27739 arm_vector_alignment (const_tree type)
27740 {
27741   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27742
27743   if (TARGET_AAPCS_BASED)
27744     align = MIN (align, 64);
27745
27746   return align;
27747 }
27748
27749 static unsigned int
27750 arm_autovectorize_vector_sizes (void)
27751 {
27752   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27753 }
27754
27755 static bool
27756 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27757 {
27758   /* Vectors which aren't in packed structures will not be less aligned than
27759      the natural alignment of their element type, so this is safe.  */
27760   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27761     return !is_packed;
27762
27763   return default_builtin_vector_alignment_reachable (type, is_packed);
27764 }
27765
27766 static bool
27767 arm_builtin_support_vector_misalignment (machine_mode mode,
27768                                          const_tree type, int misalignment,
27769                                          bool is_packed)
27770 {
27771   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27772     {
27773       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27774
27775       if (is_packed)
27776         return align == 1;
27777
27778       /* If the misalignment is unknown, we should be able to handle the access
27779          so long as it is not to a member of a packed data structure.  */
27780       if (misalignment == -1)
27781         return true;
27782
27783       /* Return true if the misalignment is a multiple of the natural alignment
27784          of the vector's element type.  This is probably always going to be
27785          true in practice, since we've already established that this isn't a
27786          packed access.  */
27787       return ((misalignment % align) == 0);
27788     }
27789
27790   return default_builtin_support_vector_misalignment (mode, type, misalignment,
27791                                                       is_packed);
27792 }
27793
27794 static void
27795 arm_conditional_register_usage (void)
27796 {
27797   int regno;
27798
27799   if (TARGET_THUMB1 && optimize_size)
27800     {
27801       /* When optimizing for size on Thumb-1, it's better not
27802         to use the HI regs, because of the overhead of
27803         stacking them.  */
27804       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27805         fixed_regs[regno] = call_used_regs[regno] = 1;
27806     }
27807
27808   /* The link register can be clobbered by any branch insn,
27809      but we have no way to track that at present, so mark
27810      it as unavailable.  */
27811   if (TARGET_THUMB1)
27812     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27813
27814   if (TARGET_32BIT && TARGET_HARD_FLOAT)
27815     {
27816       /* VFPv3 registers are disabled when earlier VFP
27817          versions are selected due to the definition of
27818          LAST_VFP_REGNUM.  */
27819       for (regno = FIRST_VFP_REGNUM;
27820            regno <= LAST_VFP_REGNUM; ++ regno)
27821         {
27822           fixed_regs[regno] = 0;
27823           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27824             || regno >= FIRST_VFP_REGNUM + 32;
27825         }
27826     }
27827
27828   if (TARGET_REALLY_IWMMXT)
27829     {
27830       regno = FIRST_IWMMXT_GR_REGNUM;
27831       /* The 2002/10/09 revision of the XScale ABI has wCG0
27832          and wCG1 as call-preserved registers.  The 2002/11/21
27833          revision changed this so that all wCG registers are
27834          scratch registers.  */
27835       for (regno = FIRST_IWMMXT_GR_REGNUM;
27836            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27837         fixed_regs[regno] = 0;
27838       /* The XScale ABI has wR0 - wR9 as scratch registers,
27839          the rest as call-preserved registers.  */
27840       for (regno = FIRST_IWMMXT_REGNUM;
27841            regno <= LAST_IWMMXT_REGNUM; ++ regno)
27842         {
27843           fixed_regs[regno] = 0;
27844           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27845         }
27846     }
27847
27848   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27849     {
27850       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27851       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27852     }
27853   else if (TARGET_APCS_STACK)
27854     {
27855       fixed_regs[10]     = 1;
27856       call_used_regs[10] = 1;
27857     }
27858   /* -mcaller-super-interworking reserves r11 for calls to
27859      _interwork_r11_call_via_rN().  Making the register global
27860      is an easy way of ensuring that it remains valid for all
27861      calls.  */
27862   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27863       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27864     {
27865       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27866       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27867       if (TARGET_CALLER_INTERWORKING)
27868         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27869     }
27870   SUBTARGET_CONDITIONAL_REGISTER_USAGE
27871 }
27872
27873 static reg_class_t
27874 arm_preferred_rename_class (reg_class_t rclass)
27875 {
27876   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27877      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
27878      and code size can be reduced.  */
27879   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27880     return LO_REGS;
27881   else
27882     return NO_REGS;
27883 }
27884
27885 /* Compute the attribute "length" of insn "*push_multi".
27886    So this function MUST be kept in sync with that insn pattern.  */
27887 int
27888 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27889 {
27890   int i, regno, hi_reg;
27891   int num_saves = XVECLEN (parallel_op, 0);
27892
27893   /* ARM mode.  */
27894   if (TARGET_ARM)
27895     return 4;
27896   /* Thumb1 mode.  */
27897   if (TARGET_THUMB1)
27898     return 2;
27899
27900   /* Thumb2 mode.  */
27901   regno = REGNO (first_op);
27902   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27903      list is 8-bit.  Normally this means all registers in the list must be
27904      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
27905      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
27906      with 16-bit encoding.  */
27907   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27908   for (i = 1; i < num_saves && !hi_reg; i++)
27909     {
27910       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27911       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27912     }
27913
27914   if (!hi_reg)
27915     return 2;
27916   return 4;
27917 }
27918
27919 /* Compute the attribute "length" of insn.  Currently, this function is used
27920    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27921    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
27922    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
27923    true if OPERANDS contains insn which explicit updates base register.  */
27924
27925 int
27926 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
27927 {
27928   /* ARM mode.  */
27929   if (TARGET_ARM)
27930     return 4;
27931   /* Thumb1 mode.  */
27932   if (TARGET_THUMB1)
27933     return 2;
27934
27935   rtx parallel_op = operands[0];
27936   /* Initialize to elements number of PARALLEL.  */
27937   unsigned indx = XVECLEN (parallel_op, 0) - 1;
27938   /* Initialize the value to base register.  */
27939   unsigned regno = REGNO (operands[1]);
27940   /* Skip return and write back pattern.
27941      We only need register pop pattern for later analysis.  */
27942   unsigned first_indx = 0;
27943   first_indx += return_pc ? 1 : 0;
27944   first_indx += write_back_p ? 1 : 0;
27945
27946   /* A pop operation can be done through LDM or POP.  If the base register is SP
27947      and if it's with write back, then a LDM will be alias of POP.  */
27948   bool pop_p = (regno == SP_REGNUM && write_back_p);
27949   bool ldm_p = !pop_p;
27950
27951   /* Check base register for LDM.  */
27952   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
27953     return 4;
27954
27955   /* Check each register in the list.  */
27956   for (; indx >= first_indx; indx--)
27957     {
27958       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
27959       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
27960          comment in arm_attr_length_push_multi.  */
27961       if (REGNO_REG_CLASS (regno) == HI_REGS
27962           && (regno != PC_REGNUM || ldm_p))
27963         return 4;
27964     }
27965
27966   return 2;
27967 }
27968
27969 /* Compute the number of instructions emitted by output_move_double.  */
27970 int
27971 arm_count_output_move_double_insns (rtx *operands)
27972 {
27973   int count;
27974   rtx ops[2];
27975   /* output_move_double may modify the operands array, so call it
27976      here on a copy of the array.  */
27977   ops[0] = operands[0];
27978   ops[1] = operands[1];
27979   output_move_double (ops, false, &count);
27980   return count;
27981 }
27982
27983 int
27984 vfp3_const_double_for_fract_bits (rtx operand)
27985 {
27986   REAL_VALUE_TYPE r0;
27987
27988   if (!CONST_DOUBLE_P (operand))
27989     return 0;
27990
27991   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
27992   if (exact_real_inverse (DFmode, &r0)
27993       && !REAL_VALUE_NEGATIVE (r0))
27994     {
27995       if (exact_real_truncate (DFmode, &r0))
27996         {
27997           HOST_WIDE_INT value = real_to_integer (&r0);
27998           value = value & 0xffffffff;
27999           if ((value != 0) && ( (value & (value - 1)) == 0))
28000             {
28001               int ret = exact_log2 (value);
28002               gcc_assert (IN_RANGE (ret, 0, 31));
28003               return ret;
28004             }
28005         }
28006     }
28007   return 0;
28008 }
28009
28010 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28011    log2 is in [1, 32], return that log2.  Otherwise return -1.
28012    This is used in the patterns for vcvt.s32.f32 floating-point to
28013    fixed-point conversions.  */
28014
28015 int
28016 vfp3_const_double_for_bits (rtx x)
28017 {
28018   const REAL_VALUE_TYPE *r;
28019
28020   if (!CONST_DOUBLE_P (x))
28021     return -1;
28022
28023   r = CONST_DOUBLE_REAL_VALUE (x);
28024
28025   if (REAL_VALUE_NEGATIVE (*r)
28026       || REAL_VALUE_ISNAN (*r)
28027       || REAL_VALUE_ISINF (*r)
28028       || !real_isinteger (r, SFmode))
28029     return -1;
28030
28031   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28032
28033 /* The exact_log2 above will have returned -1 if this is
28034    not an exact log2.  */
28035   if (!IN_RANGE (hwint, 1, 32))
28036     return -1;
28037
28038   return hwint;
28039 }
28040
28041 \f
28042 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28043
28044 static void
28045 arm_pre_atomic_barrier (enum memmodel model)
28046 {
28047   if (need_atomic_barrier_p (model, true))
28048     emit_insn (gen_memory_barrier ());
28049 }
28050
28051 static void
28052 arm_post_atomic_barrier (enum memmodel model)
28053 {
28054   if (need_atomic_barrier_p (model, false))
28055     emit_insn (gen_memory_barrier ());
28056 }
28057
28058 /* Emit the load-exclusive and store-exclusive instructions.
28059    Use acquire and release versions if necessary.  */
28060
28061 static void
28062 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28063 {
28064   rtx (*gen) (rtx, rtx);
28065
28066   if (acq)
28067     {
28068       switch (mode)
28069         {
28070         case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28071         case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28072         case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28073         case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28074         default:
28075           gcc_unreachable ();
28076         }
28077     }
28078   else
28079     {
28080       switch (mode)
28081         {
28082         case QImode: gen = gen_arm_load_exclusiveqi; break;
28083         case HImode: gen = gen_arm_load_exclusivehi; break;
28084         case SImode: gen = gen_arm_load_exclusivesi; break;
28085         case DImode: gen = gen_arm_load_exclusivedi; break;
28086         default:
28087           gcc_unreachable ();
28088         }
28089     }
28090
28091   emit_insn (gen (rval, mem));
28092 }
28093
28094 static void
28095 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28096                           rtx mem, bool rel)
28097 {
28098   rtx (*gen) (rtx, rtx, rtx);
28099
28100   if (rel)
28101     {
28102       switch (mode)
28103         {
28104         case QImode: gen = gen_arm_store_release_exclusiveqi; break;
28105         case HImode: gen = gen_arm_store_release_exclusivehi; break;
28106         case SImode: gen = gen_arm_store_release_exclusivesi; break;
28107         case DImode: gen = gen_arm_store_release_exclusivedi; break;
28108         default:
28109           gcc_unreachable ();
28110         }
28111     }
28112   else
28113     {
28114       switch (mode)
28115         {
28116         case QImode: gen = gen_arm_store_exclusiveqi; break;
28117         case HImode: gen = gen_arm_store_exclusivehi; break;
28118         case SImode: gen = gen_arm_store_exclusivesi; break;
28119         case DImode: gen = gen_arm_store_exclusivedi; break;
28120         default:
28121           gcc_unreachable ();
28122         }
28123     }
28124
28125   emit_insn (gen (bval, rval, mem));
28126 }
28127
28128 /* Mark the previous jump instruction as unlikely.  */
28129
28130 static void
28131 emit_unlikely_jump (rtx insn)
28132 {
28133   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
28134
28135   rtx_insn *jump = emit_jump_insn (insn);
28136   add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
28137 }
28138
28139 /* Expand a compare and swap pattern.  */
28140
28141 void
28142 arm_expand_compare_and_swap (rtx operands[])
28143 {
28144   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28145   machine_mode mode;
28146   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28147
28148   bval = operands[0];
28149   rval = operands[1];
28150   mem = operands[2];
28151   oldval = operands[3];
28152   newval = operands[4];
28153   is_weak = operands[5];
28154   mod_s = operands[6];
28155   mod_f = operands[7];
28156   mode = GET_MODE (mem);
28157
28158   /* Normally the succ memory model must be stronger than fail, but in the
28159      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28160      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28161
28162   if (TARGET_HAVE_LDACQ
28163       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28164       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28165     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28166
28167   switch (mode)
28168     {
28169     case QImode:
28170     case HImode:
28171       /* For narrow modes, we're going to perform the comparison in SImode,
28172          so do the zero-extension now.  */
28173       rval = gen_reg_rtx (SImode);
28174       oldval = convert_modes (SImode, mode, oldval, true);
28175       /* FALLTHRU */
28176
28177     case SImode:
28178       /* Force the value into a register if needed.  We waited until after
28179          the zero-extension above to do this properly.  */
28180       if (!arm_add_operand (oldval, SImode))
28181         oldval = force_reg (SImode, oldval);
28182       break;
28183
28184     case DImode:
28185       if (!cmpdi_operand (oldval, mode))
28186         oldval = force_reg (mode, oldval);
28187       break;
28188
28189     default:
28190       gcc_unreachable ();
28191     }
28192
28193   switch (mode)
28194     {
28195     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
28196     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
28197     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
28198     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
28199     default:
28200       gcc_unreachable ();
28201     }
28202
28203   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CCmode, CC_REGNUM);
28204   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28205
28206   if (mode == QImode || mode == HImode)
28207     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28208
28209   /* In all cases, we arrange for success to be signaled by Z set.
28210      This arrangement allows for the boolean result to be used directly
28211      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28212      boolean negation of the result is also stored in bval because Thumb-1
28213      backend lacks dependency tracking for CC flag due to flag-setting not
28214      being represented at RTL level.  */
28215   if (TARGET_THUMB1)
28216       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28217   else
28218     {
28219       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28220       emit_insn (gen_rtx_SET (bval, x));
28221     }
28222 }
28223
28224 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28225    another memory store between the load-exclusive and store-exclusive can
28226    reset the monitor from Exclusive to Open state.  This means we must wait
28227    until after reload to split the pattern, lest we get a register spill in
28228    the middle of the atomic sequence.  Success of the compare and swap is
28229    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28230    for Thumb-1 targets (ie. negation of the boolean value returned by
28231    atomic_compare_and_swapmode standard pattern in operand 0).  */
28232
28233 void
28234 arm_split_compare_and_swap (rtx operands[])
28235 {
28236   rtx rval, mem, oldval, newval, neg_bval;
28237   machine_mode mode;
28238   enum memmodel mod_s, mod_f;
28239   bool is_weak;
28240   rtx_code_label *label1, *label2;
28241   rtx x, cond;
28242
28243   rval = operands[1];
28244   mem = operands[2];
28245   oldval = operands[3];
28246   newval = operands[4];
28247   is_weak = (operands[5] != const0_rtx);
28248   mod_s = memmodel_from_int (INTVAL (operands[6]));
28249   mod_f = memmodel_from_int (INTVAL (operands[7]));
28250   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28251   mode = GET_MODE (mem);
28252
28253   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28254
28255   bool use_acquire = TARGET_HAVE_LDACQ
28256                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28257                           || is_mm_release (mod_s));
28258
28259   bool use_release = TARGET_HAVE_LDACQ
28260                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28261                           || is_mm_acquire (mod_s));
28262
28263   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28264      a full barrier is emitted after the store-release.  */
28265   if (is_armv8_sync)
28266     use_acquire = false;
28267
28268   /* Checks whether a barrier is needed and emits one accordingly.  */
28269   if (!(use_acquire || use_release))
28270     arm_pre_atomic_barrier (mod_s);
28271
28272   label1 = NULL;
28273   if (!is_weak)
28274     {
28275       label1 = gen_label_rtx ();
28276       emit_label (label1);
28277     }
28278   label2 = gen_label_rtx ();
28279
28280   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28281
28282   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28283      as required to communicate with arm_expand_compare_and_swap.  */
28284   if (TARGET_32BIT)
28285     {
28286       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28287       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28288       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28289                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28290       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28291     }
28292   else
28293     {
28294       emit_move_insn (neg_bval, const1_rtx);
28295       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28296       if (thumb1_cmpneg_operand (oldval, SImode))
28297         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28298                                                     label2, cond));
28299       else
28300         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28301     }
28302
28303   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28304
28305   /* Weak or strong, we want EQ to be true for success, so that we
28306      match the flags that we got from the compare above.  */
28307   if (TARGET_32BIT)
28308     {
28309       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28310       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28311       emit_insn (gen_rtx_SET (cond, x));
28312     }
28313
28314   if (!is_weak)
28315     {
28316       /* Z is set to boolean value of !neg_bval, as required to communicate
28317          with arm_expand_compare_and_swap.  */
28318       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28319       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28320     }
28321
28322   if (!is_mm_relaxed (mod_f))
28323     emit_label (label2);
28324
28325   /* Checks whether a barrier is needed and emits one accordingly.  */
28326   if (is_armv8_sync
28327       || !(use_acquire || use_release))
28328     arm_post_atomic_barrier (mod_s);
28329
28330   if (is_mm_relaxed (mod_f))
28331     emit_label (label2);
28332 }
28333
28334 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28335    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28336    operation).  Operation is performed on the content at MEM and on VALUE
28337    following the memory model MODEL_RTX.  The content at MEM before and after
28338    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28339    success of the operation is returned in COND.  Using a scratch register or
28340    an operand register for these determines what result is returned for that
28341    pattern.  */
28342
28343 void
28344 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28345                      rtx value, rtx model_rtx, rtx cond)
28346 {
28347   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28348   machine_mode mode = GET_MODE (mem);
28349   machine_mode wmode = (mode == DImode ? DImode : SImode);
28350   rtx_code_label *label;
28351   bool all_low_regs, bind_old_new;
28352   rtx x;
28353
28354   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28355
28356   bool use_acquire = TARGET_HAVE_LDACQ
28357                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28358                           || is_mm_release (model));
28359
28360   bool use_release = TARGET_HAVE_LDACQ
28361                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28362                           || is_mm_acquire (model));
28363
28364   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28365      a full barrier is emitted after the store-release.  */
28366   if (is_armv8_sync)
28367     use_acquire = false;
28368
28369   /* Checks whether a barrier is needed and emits one accordingly.  */
28370   if (!(use_acquire || use_release))
28371     arm_pre_atomic_barrier (model);
28372
28373   label = gen_label_rtx ();
28374   emit_label (label);
28375
28376   if (new_out)
28377     new_out = gen_lowpart (wmode, new_out);
28378   if (old_out)
28379     old_out = gen_lowpart (wmode, old_out);
28380   else
28381     old_out = new_out;
28382   value = simplify_gen_subreg (wmode, value, mode, 0);
28383
28384   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28385
28386   /* Does the operation require destination and first operand to use the same
28387      register?  This is decided by register constraints of relevant insn
28388      patterns in thumb1.md.  */
28389   gcc_assert (!new_out || REG_P (new_out));
28390   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28391                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28392                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28393   bind_old_new =
28394     (TARGET_THUMB1
28395      && code != SET
28396      && code != MINUS
28397      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28398
28399   /* We want to return the old value while putting the result of the operation
28400      in the same register as the old value so copy the old value over to the
28401      destination register and use that register for the operation.  */
28402   if (old_out && bind_old_new)
28403     {
28404       emit_move_insn (new_out, old_out);
28405       old_out = new_out;
28406     }
28407
28408   switch (code)
28409     {
28410     case SET:
28411       new_out = value;
28412       break;
28413
28414     case NOT:
28415       x = gen_rtx_AND (wmode, old_out, value);
28416       emit_insn (gen_rtx_SET (new_out, x));
28417       x = gen_rtx_NOT (wmode, new_out);
28418       emit_insn (gen_rtx_SET (new_out, x));
28419       break;
28420
28421     case MINUS:
28422       if (CONST_INT_P (value))
28423         {
28424           value = GEN_INT (-INTVAL (value));
28425           code = PLUS;
28426         }
28427       /* FALLTHRU */
28428
28429     case PLUS:
28430       if (mode == DImode)
28431         {
28432           /* DImode plus/minus need to clobber flags.  */
28433           /* The adddi3 and subdi3 patterns are incorrectly written so that
28434              they require matching operands, even when we could easily support
28435              three operands.  Thankfully, this can be fixed up post-splitting,
28436              as the individual add+adc patterns do accept three operands and
28437              post-reload cprop can make these moves go away.  */
28438           emit_move_insn (new_out, old_out);
28439           if (code == PLUS)
28440             x = gen_adddi3 (new_out, new_out, value);
28441           else
28442             x = gen_subdi3 (new_out, new_out, value);
28443           emit_insn (x);
28444           break;
28445         }
28446       /* FALLTHRU */
28447
28448     default:
28449       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28450       emit_insn (gen_rtx_SET (new_out, x));
28451       break;
28452     }
28453
28454   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28455                             use_release);
28456
28457   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28458   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28459
28460   /* Checks whether a barrier is needed and emits one accordingly.  */
28461   if (is_armv8_sync
28462       || !(use_acquire || use_release))
28463     arm_post_atomic_barrier (model);
28464 }
28465 \f
28466 #define MAX_VECT_LEN 16
28467
28468 struct expand_vec_perm_d
28469 {
28470   rtx target, op0, op1;
28471   unsigned char perm[MAX_VECT_LEN];
28472   machine_mode vmode;
28473   unsigned char nelt;
28474   bool one_vector_p;
28475   bool testing_p;
28476 };
28477
28478 /* Generate a variable permutation.  */
28479
28480 static void
28481 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28482 {
28483   machine_mode vmode = GET_MODE (target);
28484   bool one_vector_p = rtx_equal_p (op0, op1);
28485
28486   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28487   gcc_checking_assert (GET_MODE (op0) == vmode);
28488   gcc_checking_assert (GET_MODE (op1) == vmode);
28489   gcc_checking_assert (GET_MODE (sel) == vmode);
28490   gcc_checking_assert (TARGET_NEON);
28491
28492   if (one_vector_p)
28493     {
28494       if (vmode == V8QImode)
28495         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28496       else
28497         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28498     }
28499   else
28500     {
28501       rtx pair;
28502
28503       if (vmode == V8QImode)
28504         {
28505           pair = gen_reg_rtx (V16QImode);
28506           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28507           pair = gen_lowpart (TImode, pair);
28508           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28509         }
28510       else
28511         {
28512           pair = gen_reg_rtx (OImode);
28513           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28514           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28515         }
28516     }
28517 }
28518
28519 void
28520 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28521 {
28522   machine_mode vmode = GET_MODE (target);
28523   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28524   bool one_vector_p = rtx_equal_p (op0, op1);
28525   rtx rmask[MAX_VECT_LEN], mask;
28526
28527   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28528      numbering of elements for big-endian, we must reverse the order.  */
28529   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28530
28531   /* The VTBL instruction does not use a modulo index, so we must take care
28532      of that ourselves.  */
28533   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28534   for (i = 0; i < nelt; ++i)
28535     rmask[i] = mask;
28536   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28537   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28538
28539   arm_expand_vec_perm_1 (target, op0, op1, sel);
28540 }
28541
28542 /* Map lane ordering between architectural lane order, and GCC lane order,
28543    taking into account ABI.  See comment above output_move_neon for details.  */
28544
28545 static int
28546 neon_endian_lane_map (machine_mode mode, int lane)
28547 {
28548   if (BYTES_BIG_ENDIAN)
28549   {
28550     int nelems = GET_MODE_NUNITS (mode);
28551     /* Reverse lane order.  */
28552     lane = (nelems - 1 - lane);
28553     /* Reverse D register order, to match ABI.  */
28554     if (GET_MODE_SIZE (mode) == 16)
28555       lane = lane ^ (nelems / 2);
28556   }
28557   return lane;
28558 }
28559
28560 /* Some permutations index into pairs of vectors, this is a helper function
28561    to map indexes into those pairs of vectors.  */
28562
28563 static int
28564 neon_pair_endian_lane_map (machine_mode mode, int lane)
28565 {
28566   int nelem = GET_MODE_NUNITS (mode);
28567   if (BYTES_BIG_ENDIAN)
28568     lane =
28569       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28570   return lane;
28571 }
28572
28573 /* Generate or test for an insn that supports a constant permutation.  */
28574
28575 /* Recognize patterns for the VUZP insns.  */
28576
28577 static bool
28578 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28579 {
28580   unsigned int i, odd, mask, nelt = d->nelt;
28581   rtx out0, out1, in0, in1;
28582   rtx (*gen)(rtx, rtx, rtx, rtx);
28583   int first_elem;
28584   int swap_nelt;
28585
28586   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28587     return false;
28588
28589   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28590      big endian pattern on 64 bit vectors, so we correct for that.  */
28591   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28592     && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28593
28594   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28595
28596   if (first_elem == neon_endian_lane_map (d->vmode, 0))
28597     odd = 0;
28598   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28599     odd = 1;
28600   else
28601     return false;
28602   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28603
28604   for (i = 0; i < nelt; i++)
28605     {
28606       unsigned elt =
28607         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28608       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28609         return false;
28610     }
28611
28612   /* Success!  */
28613   if (d->testing_p)
28614     return true;
28615
28616   switch (d->vmode)
28617     {
28618     case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28619     case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
28620     case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
28621     case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
28622     case V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
28623     case V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
28624     case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
28625     case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
28626     case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
28627     case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
28628     default:
28629       gcc_unreachable ();
28630     }
28631
28632   in0 = d->op0;
28633   in1 = d->op1;
28634   if (swap_nelt != 0)
28635     std::swap (in0, in1);
28636
28637   out0 = d->target;
28638   out1 = gen_reg_rtx (d->vmode);
28639   if (odd)
28640     std::swap (out0, out1);
28641
28642   emit_insn (gen (out0, in0, in1, out1));
28643   return true;
28644 }
28645
28646 /* Recognize patterns for the VZIP insns.  */
28647
28648 static bool
28649 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28650 {
28651   unsigned int i, high, mask, nelt = d->nelt;
28652   rtx out0, out1, in0, in1;
28653   rtx (*gen)(rtx, rtx, rtx, rtx);
28654   int first_elem;
28655   bool is_swapped;
28656
28657   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28658     return false;
28659
28660   is_swapped = BYTES_BIG_ENDIAN;
28661
28662   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28663
28664   high = nelt / 2;
28665   if (first_elem == neon_endian_lane_map (d->vmode, high))
28666     ;
28667   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28668     high = 0;
28669   else
28670     return false;
28671   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28672
28673   for (i = 0; i < nelt / 2; i++)
28674     {
28675       unsigned elt =
28676         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28677       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28678           != elt)
28679         return false;
28680       elt =
28681         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28682       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28683           != elt)
28684         return false;
28685     }
28686
28687   /* Success!  */
28688   if (d->testing_p)
28689     return true;
28690
28691   switch (d->vmode)
28692     {
28693     case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28694     case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
28695     case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
28696     case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
28697     case V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
28698     case V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
28699     case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
28700     case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
28701     case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
28702     case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
28703     default:
28704       gcc_unreachable ();
28705     }
28706
28707   in0 = d->op0;
28708   in1 = d->op1;
28709   if (is_swapped)
28710     std::swap (in0, in1);
28711
28712   out0 = d->target;
28713   out1 = gen_reg_rtx (d->vmode);
28714   if (high)
28715     std::swap (out0, out1);
28716
28717   emit_insn (gen (out0, in0, in1, out1));
28718   return true;
28719 }
28720
28721 /* Recognize patterns for the VREV insns.  */
28722
28723 static bool
28724 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28725 {
28726   unsigned int i, j, diff, nelt = d->nelt;
28727   rtx (*gen)(rtx, rtx);
28728
28729   if (!d->one_vector_p)
28730     return false;
28731
28732   diff = d->perm[0];
28733   switch (diff)
28734     {
28735     case 7:
28736       switch (d->vmode)
28737         {
28738         case V16QImode: gen = gen_neon_vrev64v16qi; break;
28739         case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
28740         default:
28741           return false;
28742         }
28743       break;
28744     case 3:
28745       switch (d->vmode)
28746         {
28747         case V16QImode: gen = gen_neon_vrev32v16qi; break;
28748         case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
28749         case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
28750         case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
28751         case V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
28752         case V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
28753         default:
28754           return false;
28755         }
28756       break;
28757     case 1:
28758       switch (d->vmode)
28759         {
28760         case V16QImode: gen = gen_neon_vrev16v16qi; break;
28761         case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
28762         case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
28763         case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
28764         case V4SImode:  gen = gen_neon_vrev64v4si;  break;
28765         case V2SImode:  gen = gen_neon_vrev64v2si;  break;
28766         case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
28767         case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
28768         default:
28769           return false;
28770         }
28771       break;
28772     default:
28773       return false;
28774     }
28775
28776   for (i = 0; i < nelt ; i += diff + 1)
28777     for (j = 0; j <= diff; j += 1)
28778       {
28779         /* This is guaranteed to be true as the value of diff
28780            is 7, 3, 1 and we should have enough elements in the
28781            queue to generate this. Getting a vector mask with a
28782            value of diff other than these values implies that
28783            something is wrong by the time we get here.  */
28784         gcc_assert (i + j < nelt);
28785         if (d->perm[i + j] != i + diff - j)
28786           return false;
28787       }
28788
28789   /* Success! */
28790   if (d->testing_p)
28791     return true;
28792
28793   emit_insn (gen (d->target, d->op0));
28794   return true;
28795 }
28796
28797 /* Recognize patterns for the VTRN insns.  */
28798
28799 static bool
28800 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28801 {
28802   unsigned int i, odd, mask, nelt = d->nelt;
28803   rtx out0, out1, in0, in1;
28804   rtx (*gen)(rtx, rtx, rtx, rtx);
28805
28806   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28807     return false;
28808
28809   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
28810   if (d->perm[0] == 0)
28811     odd = 0;
28812   else if (d->perm[0] == 1)
28813     odd = 1;
28814   else
28815     return false;
28816   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28817
28818   for (i = 0; i < nelt; i += 2)
28819     {
28820       if (d->perm[i] != i + odd)
28821         return false;
28822       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28823         return false;
28824     }
28825
28826   /* Success!  */
28827   if (d->testing_p)
28828     return true;
28829
28830   switch (d->vmode)
28831     {
28832     case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28833     case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
28834     case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
28835     case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
28836     case V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
28837     case V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
28838     case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
28839     case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
28840     case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
28841     case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
28842     default:
28843       gcc_unreachable ();
28844     }
28845
28846   in0 = d->op0;
28847   in1 = d->op1;
28848   if (BYTES_BIG_ENDIAN)
28849     {
28850       std::swap (in0, in1);
28851       odd = !odd;
28852     }
28853
28854   out0 = d->target;
28855   out1 = gen_reg_rtx (d->vmode);
28856   if (odd)
28857     std::swap (out0, out1);
28858
28859   emit_insn (gen (out0, in0, in1, out1));
28860   return true;
28861 }
28862
28863 /* Recognize patterns for the VEXT insns.  */
28864
28865 static bool
28866 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28867 {
28868   unsigned int i, nelt = d->nelt;
28869   rtx (*gen) (rtx, rtx, rtx, rtx);
28870   rtx offset;
28871
28872   unsigned int location;
28873
28874   unsigned int next  = d->perm[0] + 1;
28875
28876   /* TODO: Handle GCC's numbering of elements for big-endian.  */
28877   if (BYTES_BIG_ENDIAN)
28878     return false;
28879
28880   /* Check if the extracted indexes are increasing by one.  */
28881   for (i = 1; i < nelt; next++, i++)
28882     {
28883       /* If we hit the most significant element of the 2nd vector in
28884          the previous iteration, no need to test further.  */
28885       if (next == 2 * nelt)
28886         return false;
28887
28888       /* If we are operating on only one vector: it could be a
28889          rotation.  If there are only two elements of size < 64, let
28890          arm_evpc_neon_vrev catch it.  */
28891       if (d->one_vector_p && (next == nelt))
28892         {
28893           if ((nelt == 2) && (d->vmode != V2DImode))
28894             return false;
28895           else
28896             next = 0;
28897         }
28898
28899       if (d->perm[i] != next)
28900         return false;
28901     }
28902
28903   location = d->perm[0];
28904
28905   switch (d->vmode)
28906     {
28907     case V16QImode: gen = gen_neon_vextv16qi; break;
28908     case V8QImode: gen = gen_neon_vextv8qi; break;
28909     case V4HImode: gen = gen_neon_vextv4hi; break;
28910     case V8HImode: gen = gen_neon_vextv8hi; break;
28911     case V2SImode: gen = gen_neon_vextv2si; break;
28912     case V4SImode: gen = gen_neon_vextv4si; break;
28913     case V4HFmode: gen = gen_neon_vextv4hf; break;
28914     case V8HFmode: gen = gen_neon_vextv8hf; break;
28915     case V2SFmode: gen = gen_neon_vextv2sf; break;
28916     case V4SFmode: gen = gen_neon_vextv4sf; break;
28917     case V2DImode: gen = gen_neon_vextv2di; break;
28918     default:
28919       return false;
28920     }
28921
28922   /* Success! */
28923   if (d->testing_p)
28924     return true;
28925
28926   offset = GEN_INT (location);
28927   emit_insn (gen (d->target, d->op0, d->op1, offset));
28928   return true;
28929 }
28930
28931 /* The NEON VTBL instruction is a fully variable permuation that's even
28932    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
28933    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
28934    can do slightly better by expanding this as a constant where we don't
28935    have to apply a mask.  */
28936
28937 static bool
28938 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28939 {
28940   rtx rperm[MAX_VECT_LEN], sel;
28941   machine_mode vmode = d->vmode;
28942   unsigned int i, nelt = d->nelt;
28943
28944   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28945      numbering of elements for big-endian, we must reverse the order.  */
28946   if (BYTES_BIG_ENDIAN)
28947     return false;
28948
28949   if (d->testing_p)
28950     return true;
28951
28952   /* Generic code will try constant permutation twice.  Once with the
28953      original mode and again with the elements lowered to QImode.
28954      So wait and don't do the selector expansion ourselves.  */
28955   if (vmode != V8QImode && vmode != V16QImode)
28956     return false;
28957
28958   for (i = 0; i < nelt; ++i)
28959     rperm[i] = GEN_INT (d->perm[i]);
28960   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28961   sel = force_reg (vmode, sel);
28962
28963   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28964   return true;
28965 }
28966
28967 static bool
28968 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28969 {
28970   /* Check if the input mask matches vext before reordering the
28971      operands.  */
28972   if (TARGET_NEON)
28973     if (arm_evpc_neon_vext (d))
28974       return true;
28975
28976   /* The pattern matching functions above are written to look for a small
28977      number to begin the sequence (0, 1, N/2).  If we begin with an index
28978      from the second operand, we can swap the operands.  */
28979   if (d->perm[0] >= d->nelt)
28980     {
28981       unsigned i, nelt = d->nelt;
28982
28983       for (i = 0; i < nelt; ++i)
28984         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28985
28986       std::swap (d->op0, d->op1);
28987     }
28988
28989   if (TARGET_NEON)
28990     {
28991       if (arm_evpc_neon_vuzp (d))
28992         return true;
28993       if (arm_evpc_neon_vzip (d))
28994         return true;
28995       if (arm_evpc_neon_vrev (d))
28996         return true;
28997       if (arm_evpc_neon_vtrn (d))
28998         return true;
28999       return arm_evpc_neon_vtbl (d);
29000     }
29001   return false;
29002 }
29003
29004 /* Expand a vec_perm_const pattern.  */
29005
29006 bool
29007 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29008 {
29009   struct expand_vec_perm_d d;
29010   int i, nelt, which;
29011
29012   d.target = target;
29013   d.op0 = op0;
29014   d.op1 = op1;
29015
29016   d.vmode = GET_MODE (target);
29017   gcc_assert (VECTOR_MODE_P (d.vmode));
29018   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29019   d.testing_p = false;
29020
29021   for (i = which = 0; i < nelt; ++i)
29022     {
29023       rtx e = XVECEXP (sel, 0, i);
29024       int ei = INTVAL (e) & (2 * nelt - 1);
29025       which |= (ei < nelt ? 1 : 2);
29026       d.perm[i] = ei;
29027     }
29028
29029   switch (which)
29030     {
29031     default:
29032       gcc_unreachable();
29033
29034     case 3:
29035       d.one_vector_p = false;
29036       if (!rtx_equal_p (op0, op1))
29037         break;
29038
29039       /* The elements of PERM do not suggest that only the first operand
29040          is used, but both operands are identical.  Allow easier matching
29041          of the permutation by folding the permutation into the single
29042          input vector.  */
29043       /* FALLTHRU */
29044     case 2:
29045       for (i = 0; i < nelt; ++i)
29046         d.perm[i] &= nelt - 1;
29047       d.op0 = op1;
29048       d.one_vector_p = true;
29049       break;
29050
29051     case 1:
29052       d.op1 = op0;
29053       d.one_vector_p = true;
29054       break;
29055     }
29056
29057   return arm_expand_vec_perm_const_1 (&d);
29058 }
29059
29060 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
29061
29062 static bool
29063 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29064                                  const unsigned char *sel)
29065 {
29066   struct expand_vec_perm_d d;
29067   unsigned int i, nelt, which;
29068   bool ret;
29069
29070   d.vmode = vmode;
29071   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29072   d.testing_p = true;
29073   memcpy (d.perm, sel, nelt);
29074
29075   /* Categorize the set of elements in the selector.  */
29076   for (i = which = 0; i < nelt; ++i)
29077     {
29078       unsigned char e = d.perm[i];
29079       gcc_assert (e < 2 * nelt);
29080       which |= (e < nelt ? 1 : 2);
29081     }
29082
29083   /* For all elements from second vector, fold the elements to first.  */
29084   if (which == 2)
29085     for (i = 0; i < nelt; ++i)
29086       d.perm[i] -= nelt;
29087
29088   /* Check whether the mask can be applied to the vector type.  */
29089   d.one_vector_p = (which != 3);
29090
29091   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29092   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29093   if (!d.one_vector_p)
29094     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29095
29096   start_sequence ();
29097   ret = arm_expand_vec_perm_const_1 (&d);
29098   end_sequence ();
29099
29100   return ret;
29101 }
29102
29103 bool
29104 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29105 {
29106   /* If we are soft float and we do not have ldrd
29107      then all auto increment forms are ok.  */
29108   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29109     return true;
29110
29111   switch (code)
29112     {
29113       /* Post increment and Pre Decrement are supported for all
29114          instruction forms except for vector forms.  */
29115     case ARM_POST_INC:
29116     case ARM_PRE_DEC:
29117       if (VECTOR_MODE_P (mode))
29118         {
29119           if (code != ARM_PRE_DEC)
29120             return true;
29121           else
29122             return false;
29123         }
29124
29125       return true;
29126
29127     case ARM_POST_DEC:
29128     case ARM_PRE_INC:
29129       /* Without LDRD and mode size greater than
29130          word size, there is no point in auto-incrementing
29131          because ldm and stm will not have these forms.  */
29132       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29133         return false;
29134
29135       /* Vector and floating point modes do not support
29136          these auto increment forms.  */
29137       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29138         return false;
29139
29140       return true;
29141
29142     default:
29143       return false;
29144
29145     }
29146
29147   return false;
29148 }
29149
29150 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29151    on ARM, since we know that shifts by negative amounts are no-ops.
29152    Additionally, the default expansion code is not available or suitable
29153    for post-reload insn splits (this can occur when the register allocator
29154    chooses not to do a shift in NEON).
29155
29156    This function is used in both initial expand and post-reload splits, and
29157    handles all kinds of 64-bit shifts.
29158
29159    Input requirements:
29160     - It is safe for the input and output to be the same register, but
29161       early-clobber rules apply for the shift amount and scratch registers.
29162     - Shift by register requires both scratch registers.  In all other cases
29163       the scratch registers may be NULL.
29164     - Ashiftrt by a register also clobbers the CC register.  */
29165 void
29166 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29167                                rtx amount, rtx scratch1, rtx scratch2)
29168 {
29169   rtx out_high = gen_highpart (SImode, out);
29170   rtx out_low = gen_lowpart (SImode, out);
29171   rtx in_high = gen_highpart (SImode, in);
29172   rtx in_low = gen_lowpart (SImode, in);
29173
29174   /* Terminology:
29175         in = the register pair containing the input value.
29176         out = the destination register pair.
29177         up = the high- or low-part of each pair.
29178         down = the opposite part to "up".
29179      In a shift, we can consider bits to shift from "up"-stream to
29180      "down"-stream, so in a left-shift "up" is the low-part and "down"
29181      is the high-part of each register pair.  */
29182
29183   rtx out_up   = code == ASHIFT ? out_low : out_high;
29184   rtx out_down = code == ASHIFT ? out_high : out_low;
29185   rtx in_up   = code == ASHIFT ? in_low : in_high;
29186   rtx in_down = code == ASHIFT ? in_high : in_low;
29187
29188   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29189   gcc_assert (out
29190               && (REG_P (out) || GET_CODE (out) == SUBREG)
29191               && GET_MODE (out) == DImode);
29192   gcc_assert (in
29193               && (REG_P (in) || GET_CODE (in) == SUBREG)
29194               && GET_MODE (in) == DImode);
29195   gcc_assert (amount
29196               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29197                    && GET_MODE (amount) == SImode)
29198                   || CONST_INT_P (amount)));
29199   gcc_assert (scratch1 == NULL
29200               || (GET_CODE (scratch1) == SCRATCH)
29201               || (GET_MODE (scratch1) == SImode
29202                   && REG_P (scratch1)));
29203   gcc_assert (scratch2 == NULL
29204               || (GET_CODE (scratch2) == SCRATCH)
29205               || (GET_MODE (scratch2) == SImode
29206                   && REG_P (scratch2)));
29207   gcc_assert (!REG_P (out) || !REG_P (amount)
29208               || !HARD_REGISTER_P (out)
29209               || (REGNO (out) != REGNO (amount)
29210                   && REGNO (out) + 1 != REGNO (amount)));
29211
29212   /* Macros to make following code more readable.  */
29213   #define SUB_32(DEST,SRC) \
29214             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29215   #define RSB_32(DEST,SRC) \
29216             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29217   #define SUB_S_32(DEST,SRC) \
29218             gen_addsi3_compare0 ((DEST), (SRC), \
29219                                  GEN_INT (-32))
29220   #define SET(DEST,SRC) \
29221             gen_rtx_SET ((DEST), (SRC))
29222   #define SHIFT(CODE,SRC,AMOUNT) \
29223             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29224   #define LSHIFT(CODE,SRC,AMOUNT) \
29225             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29226                             SImode, (SRC), (AMOUNT))
29227   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29228             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29229                             SImode, (SRC), (AMOUNT))
29230   #define ORR(A,B) \
29231             gen_rtx_IOR (SImode, (A), (B))
29232   #define BRANCH(COND,LABEL) \
29233             gen_arm_cond_branch ((LABEL), \
29234                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29235                                                    const0_rtx), \
29236                                  cc_reg)
29237
29238   /* Shifts by register and shifts by constant are handled separately.  */
29239   if (CONST_INT_P (amount))
29240     {
29241       /* We have a shift-by-constant.  */
29242
29243       /* First, handle out-of-range shift amounts.
29244          In both cases we try to match the result an ARM instruction in a
29245          shift-by-register would give.  This helps reduce execution
29246          differences between optimization levels, but it won't stop other
29247          parts of the compiler doing different things.  This is "undefined
29248          behavior, in any case.  */
29249       if (INTVAL (amount) <= 0)
29250         emit_insn (gen_movdi (out, in));
29251       else if (INTVAL (amount) >= 64)
29252         {
29253           if (code == ASHIFTRT)
29254             {
29255               rtx const31_rtx = GEN_INT (31);
29256               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29257               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29258             }
29259           else
29260             emit_insn (gen_movdi (out, const0_rtx));
29261         }
29262
29263       /* Now handle valid shifts. */
29264       else if (INTVAL (amount) < 32)
29265         {
29266           /* Shifts by a constant less than 32.  */
29267           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29268
29269           /* Clearing the out register in DImode first avoids lots
29270              of spilling and results in less stack usage.
29271              Later this redundant insn is completely removed.
29272              Do that only if "in" and "out" are different registers.  */
29273           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29274             emit_insn (SET (out, const0_rtx));
29275           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29276           emit_insn (SET (out_down,
29277                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29278                                out_down)));
29279           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29280         }
29281       else
29282         {
29283           /* Shifts by a constant greater than 31.  */
29284           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29285
29286           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29287             emit_insn (SET (out, const0_rtx));
29288           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29289           if (code == ASHIFTRT)
29290             emit_insn (gen_ashrsi3 (out_up, in_up,
29291                                     GEN_INT (31)));
29292           else
29293             emit_insn (SET (out_up, const0_rtx));
29294         }
29295     }
29296   else
29297     {
29298       /* We have a shift-by-register.  */
29299       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29300
29301       /* This alternative requires the scratch registers.  */
29302       gcc_assert (scratch1 && REG_P (scratch1));
29303       gcc_assert (scratch2 && REG_P (scratch2));
29304
29305       /* We will need the values "amount-32" and "32-amount" later.
29306          Swapping them around now allows the later code to be more general. */
29307       switch (code)
29308         {
29309         case ASHIFT:
29310           emit_insn (SUB_32 (scratch1, amount));
29311           emit_insn (RSB_32 (scratch2, amount));
29312           break;
29313         case ASHIFTRT:
29314           emit_insn (RSB_32 (scratch1, amount));
29315           /* Also set CC = amount > 32.  */
29316           emit_insn (SUB_S_32 (scratch2, amount));
29317           break;
29318         case LSHIFTRT:
29319           emit_insn (RSB_32 (scratch1, amount));
29320           emit_insn (SUB_32 (scratch2, amount));
29321           break;
29322         default:
29323           gcc_unreachable ();
29324         }
29325
29326       /* Emit code like this:
29327
29328          arithmetic-left:
29329             out_down = in_down << amount;
29330             out_down = (in_up << (amount - 32)) | out_down;
29331             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29332             out_up = in_up << amount;
29333
29334          arithmetic-right:
29335             out_down = in_down >> amount;
29336             out_down = (in_up << (32 - amount)) | out_down;
29337             if (amount < 32)
29338               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29339             out_up = in_up << amount;
29340
29341          logical-right:
29342             out_down = in_down >> amount;
29343             out_down = (in_up << (32 - amount)) | out_down;
29344             if (amount < 32)
29345               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29346             out_up = in_up << amount;
29347
29348           The ARM and Thumb2 variants are the same but implemented slightly
29349           differently.  If this were only called during expand we could just
29350           use the Thumb2 case and let combine do the right thing, but this
29351           can also be called from post-reload splitters.  */
29352
29353       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29354
29355       if (!TARGET_THUMB2)
29356         {
29357           /* Emit code for ARM mode.  */
29358           emit_insn (SET (out_down,
29359                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29360           if (code == ASHIFTRT)
29361             {
29362               rtx_code_label *done_label = gen_label_rtx ();
29363               emit_jump_insn (BRANCH (LT, done_label));
29364               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29365                                              out_down)));
29366               emit_label (done_label);
29367             }
29368           else
29369             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29370                                            out_down)));
29371         }
29372       else
29373         {
29374           /* Emit code for Thumb2 mode.
29375              Thumb2 can't do shift and or in one insn.  */
29376           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29377           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29378
29379           if (code == ASHIFTRT)
29380             {
29381               rtx_code_label *done_label = gen_label_rtx ();
29382               emit_jump_insn (BRANCH (LT, done_label));
29383               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29384               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29385               emit_label (done_label);
29386             }
29387           else
29388             {
29389               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29390               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29391             }
29392         }
29393
29394       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29395     }
29396
29397   #undef SUB_32
29398   #undef RSB_32
29399   #undef SUB_S_32
29400   #undef SET
29401   #undef SHIFT
29402   #undef LSHIFT
29403   #undef REV_LSHIFT
29404   #undef ORR
29405   #undef BRANCH
29406 }
29407
29408 /* Returns true if the pattern is a valid symbolic address, which is either a
29409    symbol_ref or (symbol_ref + addend).
29410
29411    According to the ARM ELF ABI, the initial addend of REL-type relocations
29412    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29413    literal field of the instruction as a 16-bit signed value in the range
29414    -32768 <= A < 32768.  */
29415
29416 bool
29417 arm_valid_symbolic_address_p (rtx addr)
29418 {
29419   rtx xop0, xop1 = NULL_RTX;
29420   rtx tmp = addr;
29421
29422   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29423     return true;
29424
29425   /* (const (plus: symbol_ref const_int))  */
29426   if (GET_CODE (addr) == CONST)
29427     tmp = XEXP (addr, 0);
29428
29429   if (GET_CODE (tmp) == PLUS)
29430     {
29431       xop0 = XEXP (tmp, 0);
29432       xop1 = XEXP (tmp, 1);
29433
29434       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29435           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29436     }
29437
29438   return false;
29439 }
29440
29441 /* Returns true if a valid comparison operation and makes
29442    the operands in a form that is valid.  */
29443 bool
29444 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29445 {
29446   enum rtx_code code = GET_CODE (*comparison);
29447   int code_int;
29448   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29449     ? GET_MODE (*op2) : GET_MODE (*op1);
29450
29451   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29452
29453   if (code == UNEQ || code == LTGT)
29454     return false;
29455
29456   code_int = (int)code;
29457   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29458   PUT_CODE (*comparison, (enum rtx_code)code_int);
29459
29460   switch (mode)
29461     {
29462     case SImode:
29463       if (!arm_add_operand (*op1, mode))
29464         *op1 = force_reg (mode, *op1);
29465       if (!arm_add_operand (*op2, mode))
29466         *op2 = force_reg (mode, *op2);
29467       return true;
29468
29469     case DImode:
29470       if (!cmpdi_operand (*op1, mode))
29471         *op1 = force_reg (mode, *op1);
29472       if (!cmpdi_operand (*op2, mode))
29473         *op2 = force_reg (mode, *op2);
29474       return true;
29475
29476     case HFmode:
29477       if (!TARGET_VFP_FP16INST)
29478         break;
29479       /* FP16 comparisons are done in SF mode.  */
29480       mode = SFmode;
29481       *op1 = convert_to_mode (mode, *op1, 1);
29482       *op2 = convert_to_mode (mode, *op2, 1);
29483       /* Fall through.  */
29484     case SFmode:
29485     case DFmode:
29486       if (!vfp_compare_operand (*op1, mode))
29487         *op1 = force_reg (mode, *op1);
29488       if (!vfp_compare_operand (*op2, mode))
29489         *op2 = force_reg (mode, *op2);
29490       return true;
29491     default:
29492       break;
29493     }
29494
29495   return false;
29496
29497 }
29498
29499 /* Maximum number of instructions to set block of memory.  */
29500 static int
29501 arm_block_set_max_insns (void)
29502 {
29503   if (optimize_function_for_size_p (cfun))
29504     return 4;
29505   else
29506     return current_tune->max_insns_inline_memset;
29507 }
29508
29509 /* Return TRUE if it's profitable to set block of memory for
29510    non-vectorized case.  VAL is the value to set the memory
29511    with.  LENGTH is the number of bytes to set.  ALIGN is the
29512    alignment of the destination memory in bytes.  UNALIGNED_P
29513    is TRUE if we can only set the memory with instructions
29514    meeting alignment requirements.  USE_STRD_P is TRUE if we
29515    can use strd to set the memory.  */
29516 static bool
29517 arm_block_set_non_vect_profit_p (rtx val,
29518                                  unsigned HOST_WIDE_INT length,
29519                                  unsigned HOST_WIDE_INT align,
29520                                  bool unaligned_p, bool use_strd_p)
29521 {
29522   int num = 0;
29523   /* For leftovers in bytes of 0-7, we can set the memory block using
29524      strb/strh/str with minimum instruction number.  */
29525   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29526
29527   if (unaligned_p)
29528     {
29529       num = arm_const_inline_cost (SET, val);
29530       num += length / align + length % align;
29531     }
29532   else if (use_strd_p)
29533     {
29534       num = arm_const_double_inline_cost (val);
29535       num += (length >> 3) + leftover[length & 7];
29536     }
29537   else
29538     {
29539       num = arm_const_inline_cost (SET, val);
29540       num += (length >> 2) + leftover[length & 3];
29541     }
29542
29543   /* We may be able to combine last pair STRH/STRB into a single STR
29544      by shifting one byte back.  */
29545   if (unaligned_access && length > 3 && (length & 3) == 3)
29546     num--;
29547
29548   return (num <= arm_block_set_max_insns ());
29549 }
29550
29551 /* Return TRUE if it's profitable to set block of memory for
29552    vectorized case.  LENGTH is the number of bytes to set.
29553    ALIGN is the alignment of destination memory in bytes.
29554    MODE is the vector mode used to set the memory.  */
29555 static bool
29556 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29557                              unsigned HOST_WIDE_INT align,
29558                              machine_mode mode)
29559 {
29560   int num;
29561   bool unaligned_p = ((align & 3) != 0);
29562   unsigned int nelt = GET_MODE_NUNITS (mode);
29563
29564   /* Instruction loading constant value.  */
29565   num = 1;
29566   /* Instructions storing the memory.  */
29567   num += (length + nelt - 1) / nelt;
29568   /* Instructions adjusting the address expression.  Only need to
29569      adjust address expression if it's 4 bytes aligned and bytes
29570      leftover can only be stored by mis-aligned store instruction.  */
29571   if (!unaligned_p && (length & 3) != 0)
29572     num++;
29573
29574   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29575   if (!unaligned_p && mode == V16QImode)
29576     num--;
29577
29578   return (num <= arm_block_set_max_insns ());
29579 }
29580
29581 /* Set a block of memory using vectorization instructions for the
29582    unaligned case.  We fill the first LENGTH bytes of the memory
29583    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29584    the alignment requirement of memory.  Return TRUE if succeeded.  */
29585 static bool
29586 arm_block_set_unaligned_vect (rtx dstbase,
29587                               unsigned HOST_WIDE_INT length,
29588                               unsigned HOST_WIDE_INT value,
29589                               unsigned HOST_WIDE_INT align)
29590 {
29591   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29592   rtx dst, mem;
29593   rtx val_elt, val_vec, reg;
29594   rtx rval[MAX_VECT_LEN];
29595   rtx (*gen_func) (rtx, rtx);
29596   machine_mode mode;
29597   unsigned HOST_WIDE_INT v = value;
29598   unsigned int offset = 0;
29599   gcc_assert ((align & 0x3) != 0);
29600   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29601   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29602   if (length >= nelt_v16)
29603     {
29604       mode = V16QImode;
29605       gen_func = gen_movmisalignv16qi;
29606     }
29607   else
29608     {
29609       mode = V8QImode;
29610       gen_func = gen_movmisalignv8qi;
29611     }
29612   nelt_mode = GET_MODE_NUNITS (mode);
29613   gcc_assert (length >= nelt_mode);
29614   /* Skip if it isn't profitable.  */
29615   if (!arm_block_set_vect_profit_p (length, align, mode))
29616     return false;
29617
29618   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29619   mem = adjust_automodify_address (dstbase, mode, dst, offset);
29620
29621   v = sext_hwi (v, BITS_PER_WORD);
29622   val_elt = GEN_INT (v);
29623   for (j = 0; j < nelt_mode; j++)
29624     rval[j] = val_elt;
29625
29626   reg = gen_reg_rtx (mode);
29627   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29628   /* Emit instruction loading the constant value.  */
29629   emit_move_insn (reg, val_vec);
29630
29631   /* Handle nelt_mode bytes in a vector.  */
29632   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29633     {
29634       emit_insn ((*gen_func) (mem, reg));
29635       if (i + 2 * nelt_mode <= length)
29636         {
29637           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29638           offset += nelt_mode;
29639           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29640         }
29641     }
29642
29643   /* If there are not less than nelt_v8 bytes leftover, we must be in
29644      V16QI mode.  */
29645   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29646
29647   /* Handle (8, 16) bytes leftover.  */
29648   if (i + nelt_v8 < length)
29649     {
29650       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29651       offset += length - i;
29652       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29653
29654       /* We are shifting bytes back, set the alignment accordingly.  */
29655       if ((length & 1) != 0 && align >= 2)
29656         set_mem_align (mem, BITS_PER_UNIT);
29657
29658       emit_insn (gen_movmisalignv16qi (mem, reg));
29659     }
29660   /* Handle (0, 8] bytes leftover.  */
29661   else if (i < length && i + nelt_v8 >= length)
29662     {
29663       if (mode == V16QImode)
29664         reg = gen_lowpart (V8QImode, reg);
29665
29666       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29667                                               + (nelt_mode - nelt_v8))));
29668       offset += (length - i) + (nelt_mode - nelt_v8);
29669       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29670
29671       /* We are shifting bytes back, set the alignment accordingly.  */
29672       if ((length & 1) != 0 && align >= 2)
29673         set_mem_align (mem, BITS_PER_UNIT);
29674
29675       emit_insn (gen_movmisalignv8qi (mem, reg));
29676     }
29677
29678   return true;
29679 }
29680
29681 /* Set a block of memory using vectorization instructions for the
29682    aligned case.  We fill the first LENGTH bytes of the memory area
29683    starting from DSTBASE with byte constant VALUE.  ALIGN is the
29684    alignment requirement of memory.  Return TRUE if succeeded.  */
29685 static bool
29686 arm_block_set_aligned_vect (rtx dstbase,
29687                             unsigned HOST_WIDE_INT length,
29688                             unsigned HOST_WIDE_INT value,
29689                             unsigned HOST_WIDE_INT align)
29690 {
29691   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29692   rtx dst, addr, mem;
29693   rtx val_elt, val_vec, reg;
29694   rtx rval[MAX_VECT_LEN];
29695   machine_mode mode;
29696   unsigned HOST_WIDE_INT v = value;
29697   unsigned int offset = 0;
29698
29699   gcc_assert ((align & 0x3) == 0);
29700   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29701   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29702   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29703     mode = V16QImode;
29704   else
29705     mode = V8QImode;
29706
29707   nelt_mode = GET_MODE_NUNITS (mode);
29708   gcc_assert (length >= nelt_mode);
29709   /* Skip if it isn't profitable.  */
29710   if (!arm_block_set_vect_profit_p (length, align, mode))
29711     return false;
29712
29713   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29714
29715   v = sext_hwi (v, BITS_PER_WORD);
29716   val_elt = GEN_INT (v);
29717   for (j = 0; j < nelt_mode; j++)
29718     rval[j] = val_elt;
29719
29720   reg = gen_reg_rtx (mode);
29721   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29722   /* Emit instruction loading the constant value.  */
29723   emit_move_insn (reg, val_vec);
29724
29725   i = 0;
29726   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
29727   if (mode == V16QImode)
29728     {
29729       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29730       emit_insn (gen_movmisalignv16qi (mem, reg));
29731       i += nelt_mode;
29732       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
29733       if (i + nelt_v8 < length && i + nelt_v16 > length)
29734         {
29735           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29736           offset += length - nelt_mode;
29737           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29738           /* We are shifting bytes back, set the alignment accordingly.  */
29739           if ((length & 0x3) == 0)
29740             set_mem_align (mem, BITS_PER_UNIT * 4);
29741           else if ((length & 0x1) == 0)
29742             set_mem_align (mem, BITS_PER_UNIT * 2);
29743           else
29744             set_mem_align (mem, BITS_PER_UNIT);
29745
29746           emit_insn (gen_movmisalignv16qi (mem, reg));
29747           return true;
29748         }
29749       /* Fall through for bytes leftover.  */
29750       mode = V8QImode;
29751       nelt_mode = GET_MODE_NUNITS (mode);
29752       reg = gen_lowpart (V8QImode, reg);
29753     }
29754
29755   /* Handle 8 bytes in a vector.  */
29756   for (; (i + nelt_mode <= length); i += nelt_mode)
29757     {
29758       addr = plus_constant (Pmode, dst, i);
29759       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29760       emit_move_insn (mem, reg);
29761     }
29762
29763   /* Handle single word leftover by shifting 4 bytes back.  We can
29764      use aligned access for this case.  */
29765   if (i + UNITS_PER_WORD == length)
29766     {
29767       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29768       offset += i - UNITS_PER_WORD;
29769       mem = adjust_automodify_address (dstbase, mode, addr, offset);
29770       /* We are shifting 4 bytes back, set the alignment accordingly.  */
29771       if (align > UNITS_PER_WORD)
29772         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29773
29774       emit_move_insn (mem, reg);
29775     }
29776   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29777      We have to use unaligned access for this case.  */
29778   else if (i < length)
29779     {
29780       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29781       offset += length - nelt_mode;
29782       mem = adjust_automodify_address (dstbase, mode, dst, offset);
29783       /* We are shifting bytes back, set the alignment accordingly.  */
29784       if ((length & 1) == 0)
29785         set_mem_align (mem, BITS_PER_UNIT * 2);
29786       else
29787         set_mem_align (mem, BITS_PER_UNIT);
29788
29789       emit_insn (gen_movmisalignv8qi (mem, reg));
29790     }
29791
29792   return true;
29793 }
29794
29795 /* Set a block of memory using plain strh/strb instructions, only
29796    using instructions allowed by ALIGN on processor.  We fill the
29797    first LENGTH bytes of the memory area starting from DSTBASE
29798    with byte constant VALUE.  ALIGN is the alignment requirement
29799    of memory.  */
29800 static bool
29801 arm_block_set_unaligned_non_vect (rtx dstbase,
29802                                   unsigned HOST_WIDE_INT length,
29803                                   unsigned HOST_WIDE_INT value,
29804                                   unsigned HOST_WIDE_INT align)
29805 {
29806   unsigned int i;
29807   rtx dst, addr, mem;
29808   rtx val_exp, val_reg, reg;
29809   machine_mode mode;
29810   HOST_WIDE_INT v = value;
29811
29812   gcc_assert (align == 1 || align == 2);
29813
29814   if (align == 2)
29815     v |= (value << BITS_PER_UNIT);
29816
29817   v = sext_hwi (v, BITS_PER_WORD);
29818   val_exp = GEN_INT (v);
29819   /* Skip if it isn't profitable.  */
29820   if (!arm_block_set_non_vect_profit_p (val_exp, length,
29821                                         align, true, false))
29822     return false;
29823
29824   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29825   mode = (align == 2 ? HImode : QImode);
29826   val_reg = force_reg (SImode, val_exp);
29827   reg = gen_lowpart (mode, val_reg);
29828
29829   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29830     {
29831       addr = plus_constant (Pmode, dst, i);
29832       mem = adjust_automodify_address (dstbase, mode, addr, i);
29833       emit_move_insn (mem, reg);
29834     }
29835
29836   /* Handle single byte leftover.  */
29837   if (i + 1 == length)
29838     {
29839       reg = gen_lowpart (QImode, val_reg);
29840       addr = plus_constant (Pmode, dst, i);
29841       mem = adjust_automodify_address (dstbase, QImode, addr, i);
29842       emit_move_insn (mem, reg);
29843       i++;
29844     }
29845
29846   gcc_assert (i == length);
29847   return true;
29848 }
29849
29850 /* Set a block of memory using plain strd/str/strh/strb instructions,
29851    to permit unaligned copies on processors which support unaligned
29852    semantics for those instructions.  We fill the first LENGTH bytes
29853    of the memory area starting from DSTBASE with byte constant VALUE.
29854    ALIGN is the alignment requirement of memory.  */
29855 static bool
29856 arm_block_set_aligned_non_vect (rtx dstbase,
29857                                 unsigned HOST_WIDE_INT length,
29858                                 unsigned HOST_WIDE_INT value,
29859                                 unsigned HOST_WIDE_INT align)
29860 {
29861   unsigned int i;
29862   rtx dst, addr, mem;
29863   rtx val_exp, val_reg, reg;
29864   unsigned HOST_WIDE_INT v;
29865   bool use_strd_p;
29866
29867   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29868                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29869
29870   v = (value | (value << 8) | (value << 16) | (value << 24));
29871   if (length < UNITS_PER_WORD)
29872     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29873
29874   if (use_strd_p)
29875     v |= (v << BITS_PER_WORD);
29876   else
29877     v = sext_hwi (v, BITS_PER_WORD);
29878
29879   val_exp = GEN_INT (v);
29880   /* Skip if it isn't profitable.  */
29881   if (!arm_block_set_non_vect_profit_p (val_exp, length,
29882                                         align, false, use_strd_p))
29883     {
29884       if (!use_strd_p)
29885         return false;
29886
29887       /* Try without strd.  */
29888       v = (v >> BITS_PER_WORD);
29889       v = sext_hwi (v, BITS_PER_WORD);
29890       val_exp = GEN_INT (v);
29891       use_strd_p = false;
29892       if (!arm_block_set_non_vect_profit_p (val_exp, length,
29893                                             align, false, use_strd_p))
29894         return false;
29895     }
29896
29897   i = 0;
29898   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29899   /* Handle double words using strd if possible.  */
29900   if (use_strd_p)
29901     {
29902       val_reg = force_reg (DImode, val_exp);
29903       reg = val_reg;
29904       for (; (i + 8 <= length); i += 8)
29905         {
29906           addr = plus_constant (Pmode, dst, i);
29907           mem = adjust_automodify_address (dstbase, DImode, addr, i);
29908           emit_move_insn (mem, reg);
29909         }
29910     }
29911   else
29912     val_reg = force_reg (SImode, val_exp);
29913
29914   /* Handle words.  */
29915   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29916   for (; (i + 4 <= length); i += 4)
29917     {
29918       addr = plus_constant (Pmode, dst, i);
29919       mem = adjust_automodify_address (dstbase, SImode, addr, i);
29920       if ((align & 3) == 0)
29921         emit_move_insn (mem, reg);
29922       else
29923         emit_insn (gen_unaligned_storesi (mem, reg));
29924     }
29925
29926   /* Merge last pair of STRH and STRB into a STR if possible.  */
29927   if (unaligned_access && i > 0 && (i + 3) == length)
29928     {
29929       addr = plus_constant (Pmode, dst, i - 1);
29930       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29931       /* We are shifting one byte back, set the alignment accordingly.  */
29932       if ((align & 1) == 0)
29933         set_mem_align (mem, BITS_PER_UNIT);
29934
29935       /* Most likely this is an unaligned access, and we can't tell at
29936          compilation time.  */
29937       emit_insn (gen_unaligned_storesi (mem, reg));
29938       return true;
29939     }
29940
29941   /* Handle half word leftover.  */
29942   if (i + 2 <= length)
29943     {
29944       reg = gen_lowpart (HImode, val_reg);
29945       addr = plus_constant (Pmode, dst, i);
29946       mem = adjust_automodify_address (dstbase, HImode, addr, i);
29947       if ((align & 1) == 0)
29948         emit_move_insn (mem, reg);
29949       else
29950         emit_insn (gen_unaligned_storehi (mem, reg));
29951
29952       i += 2;
29953     }
29954
29955   /* Handle single byte leftover.  */
29956   if (i + 1 == length)
29957     {
29958       reg = gen_lowpart (QImode, val_reg);
29959       addr = plus_constant (Pmode, dst, i);
29960       mem = adjust_automodify_address (dstbase, QImode, addr, i);
29961       emit_move_insn (mem, reg);
29962     }
29963
29964   return true;
29965 }
29966
29967 /* Set a block of memory using vectorization instructions for both
29968    aligned and unaligned cases.  We fill the first LENGTH bytes of
29969    the memory area starting from DSTBASE with byte constant VALUE.
29970    ALIGN is the alignment requirement of memory.  */
29971 static bool
29972 arm_block_set_vect (rtx dstbase,
29973                     unsigned HOST_WIDE_INT length,
29974                     unsigned HOST_WIDE_INT value,
29975                     unsigned HOST_WIDE_INT align)
29976 {
29977   /* Check whether we need to use unaligned store instruction.  */
29978   if (((align & 3) != 0 || (length & 3) != 0)
29979       /* Check whether unaligned store instruction is available.  */
29980       && (!unaligned_access || BYTES_BIG_ENDIAN))
29981     return false;
29982
29983   if ((align & 3) == 0)
29984     return arm_block_set_aligned_vect (dstbase, length, value, align);
29985   else
29986     return arm_block_set_unaligned_vect (dstbase, length, value, align);
29987 }
29988
29989 /* Expand string store operation.  Firstly we try to do that by using
29990    vectorization instructions, then try with ARM unaligned access and
29991    double-word store if profitable.  OPERANDS[0] is the destination,
29992    OPERANDS[1] is the number of bytes, operands[2] is the value to
29993    initialize the memory, OPERANDS[3] is the known alignment of the
29994    destination.  */
29995 bool
29996 arm_gen_setmem (rtx *operands)
29997 {
29998   rtx dstbase = operands[0];
29999   unsigned HOST_WIDE_INT length;
30000   unsigned HOST_WIDE_INT value;
30001   unsigned HOST_WIDE_INT align;
30002
30003   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30004     return false;
30005
30006   length = UINTVAL (operands[1]);
30007   if (length > 64)
30008     return false;
30009
30010   value = (UINTVAL (operands[2]) & 0xFF);
30011   align = UINTVAL (operands[3]);
30012   if (TARGET_NEON && length >= 8
30013       && current_tune->string_ops_prefer_neon
30014       && arm_block_set_vect (dstbase, length, value, align))
30015     return true;
30016
30017   if (!unaligned_access && (align & 3) != 0)
30018     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30019
30020   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30021 }
30022
30023
30024 static bool
30025 arm_macro_fusion_p (void)
30026 {
30027   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30028 }
30029
30030 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30031    for MOVW / MOVT macro fusion.  */
30032
30033 static bool
30034 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30035 {
30036   /* We are trying to fuse
30037      movw imm / movt imm
30038     instructions as a group that gets scheduled together.  */
30039
30040   rtx set_dest = SET_DEST (curr_set);
30041
30042   if (GET_MODE (set_dest) != SImode)
30043     return false;
30044
30045   /* We are trying to match:
30046      prev (movw)  == (set (reg r0) (const_int imm16))
30047      curr (movt) == (set (zero_extract (reg r0)
30048                                         (const_int 16)
30049                                         (const_int 16))
30050                           (const_int imm16_1))
30051      or
30052      prev (movw) == (set (reg r1)
30053                           (high (symbol_ref ("SYM"))))
30054     curr (movt) == (set (reg r0)
30055                         (lo_sum (reg r1)
30056                                 (symbol_ref ("SYM"))))  */
30057
30058     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30059       {
30060         if (CONST_INT_P (SET_SRC (curr_set))
30061             && CONST_INT_P (SET_SRC (prev_set))
30062             && REG_P (XEXP (set_dest, 0))
30063             && REG_P (SET_DEST (prev_set))
30064             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30065           return true;
30066
30067       }
30068     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30069              && REG_P (SET_DEST (curr_set))
30070              && REG_P (SET_DEST (prev_set))
30071              && GET_CODE (SET_SRC (prev_set)) == HIGH
30072              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30073       return true;
30074
30075   return false;
30076 }
30077
30078 static bool
30079 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30080 {
30081   rtx prev_set = single_set (prev);
30082   rtx curr_set = single_set (curr);
30083
30084   if (!prev_set
30085       || !curr_set)
30086     return false;
30087
30088   if (any_condjump_p (curr))
30089     return false;
30090
30091   if (!arm_macro_fusion_p ())
30092     return false;
30093
30094   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30095       && aarch_crypto_can_dual_issue (prev, curr))
30096     return true;
30097
30098   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30099       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30100     return true;
30101
30102   return false;
30103 }
30104
30105 /* Return true iff the instruction fusion described by OP is enabled.  */
30106 bool
30107 arm_fusion_enabled_p (tune_params::fuse_ops op)
30108 {
30109   return current_tune->fusible_ops & op;
30110 }
30111
30112 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30113    scheduled for speculative execution.  Reject the long-running division
30114    and square-root instructions.  */
30115
30116 static bool
30117 arm_sched_can_speculate_insn (rtx_insn *insn)
30118 {
30119   switch (get_attr_type (insn))
30120     {
30121       case TYPE_SDIV:
30122       case TYPE_UDIV:
30123       case TYPE_FDIVS:
30124       case TYPE_FDIVD:
30125       case TYPE_FSQRTS:
30126       case TYPE_FSQRTD:
30127       case TYPE_NEON_FP_SQRT_S:
30128       case TYPE_NEON_FP_SQRT_D:
30129       case TYPE_NEON_FP_SQRT_S_Q:
30130       case TYPE_NEON_FP_SQRT_D_Q:
30131       case TYPE_NEON_FP_DIV_S:
30132       case TYPE_NEON_FP_DIV_D:
30133       case TYPE_NEON_FP_DIV_S_Q:
30134       case TYPE_NEON_FP_DIV_D_Q:
30135         return false;
30136       default:
30137         return true;
30138     }
30139 }
30140
30141 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30142
30143 static unsigned HOST_WIDE_INT
30144 arm_asan_shadow_offset (void)
30145 {
30146   return HOST_WIDE_INT_1U << 29;
30147 }
30148
30149
30150 /* This is a temporary fix for PR60655.  Ideally we need
30151    to handle most of these cases in the generic part but
30152    currently we reject minus (..) (sym_ref).  We try to
30153    ameliorate the case with minus (sym_ref1) (sym_ref2)
30154    where they are in the same section.  */
30155
30156 static bool
30157 arm_const_not_ok_for_debug_p (rtx p)
30158 {
30159   tree decl_op0 = NULL;
30160   tree decl_op1 = NULL;
30161
30162   if (GET_CODE (p) == MINUS)
30163     {
30164       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30165         {
30166           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30167           if (decl_op1
30168               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30169               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30170             {
30171               if ((VAR_P (decl_op1)
30172                    || TREE_CODE (decl_op1) == CONST_DECL)
30173                   && (VAR_P (decl_op0)
30174                       || TREE_CODE (decl_op0) == CONST_DECL))
30175                 return (get_variable_section (decl_op1, false)
30176                         != get_variable_section (decl_op0, false));
30177
30178               if (TREE_CODE (decl_op1) == LABEL_DECL
30179                   && TREE_CODE (decl_op0) == LABEL_DECL)
30180                 return (DECL_CONTEXT (decl_op1)
30181                         != DECL_CONTEXT (decl_op0));
30182             }
30183
30184           return true;
30185         }
30186     }
30187
30188   return false;
30189 }
30190
30191 /* return TRUE if x is a reference to a value in a constant pool */
30192 extern bool
30193 arm_is_constant_pool_ref (rtx x)
30194 {
30195   return (MEM_P (x)
30196           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30197           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30198 }
30199
30200 /* Remember the last target of arm_set_current_function.  */
30201 static GTY(()) tree arm_previous_fndecl;
30202
30203 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30204
30205 void
30206 save_restore_target_globals (tree new_tree)
30207 {
30208   /* If we have a previous state, use it.  */
30209   if (TREE_TARGET_GLOBALS (new_tree))
30210     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30211   else if (new_tree == target_option_default_node)
30212     restore_target_globals (&default_target_globals);
30213   else
30214     {
30215       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30216       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30217     }
30218
30219   arm_option_params_internal ();
30220 }
30221
30222 /* Invalidate arm_previous_fndecl.  */
30223
30224 void
30225 arm_reset_previous_fndecl (void)
30226 {
30227   arm_previous_fndecl = NULL_TREE;
30228 }
30229
30230 /* Establish appropriate back-end context for processing the function
30231    FNDECL.  The argument might be NULL to indicate processing at top
30232    level, outside of any function scope.  */
30233
30234 static void
30235 arm_set_current_function (tree fndecl)
30236 {
30237   if (!fndecl || fndecl == arm_previous_fndecl)
30238     return;
30239
30240   tree old_tree = (arm_previous_fndecl
30241                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30242                    : NULL_TREE);
30243
30244   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30245
30246   /* If current function has no attributes but previous one did,
30247      use the default node.  */
30248   if (! new_tree && old_tree)
30249     new_tree = target_option_default_node;
30250
30251   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30252      the default have been handled by save_restore_target_globals from
30253      arm_pragma_target_parse.  */
30254   if (old_tree == new_tree)
30255     return;
30256
30257   arm_previous_fndecl = fndecl;
30258
30259   /* First set the target options.  */
30260   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30261
30262   save_restore_target_globals (new_tree);
30263 }
30264
30265 /* Implement TARGET_OPTION_PRINT.  */
30266
30267 static void
30268 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30269 {
30270   int flags = ptr->x_target_flags;
30271   const char *fpu_name;
30272
30273   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30274               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30275
30276   fprintf (file, "%*sselected arch %s\n", indent, "",
30277            TARGET_THUMB2_P (flags) ? "thumb2" :
30278            TARGET_THUMB_P (flags) ? "thumb1" :
30279            "arm");
30280
30281   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30282 }
30283
30284 /* Hook to determine if one function can safely inline another.  */
30285
30286 static bool
30287 arm_can_inline_p (tree caller, tree callee)
30288 {
30289   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30290   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30291   bool can_inline = true;
30292
30293   struct cl_target_option *caller_opts
30294         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30295                                            : target_option_default_node);
30296
30297   struct cl_target_option *callee_opts
30298         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30299                                            : target_option_default_node);
30300
30301   if (callee_opts == caller_opts)
30302     return true;
30303
30304   /* Callee's ISA features should be a subset of the caller's.  */
30305   struct arm_build_target caller_target;
30306   struct arm_build_target callee_target;
30307   caller_target.isa = sbitmap_alloc (isa_num_bits);
30308   callee_target.isa = sbitmap_alloc (isa_num_bits);
30309
30310   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30311                               false);
30312   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30313                               false);
30314   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30315     can_inline = false;
30316
30317   sbitmap_free (caller_target.isa);
30318   sbitmap_free (callee_target.isa);
30319
30320   /* OK to inline between different modes.
30321      Function with mode specific instructions, e.g using asm,
30322      must be explicitly protected with noinline.  */
30323   return can_inline;
30324 }
30325
30326 /* Hook to fix function's alignment affected by target attribute.  */
30327
30328 static void
30329 arm_relayout_function (tree fndecl)
30330 {
30331   if (DECL_USER_ALIGN (fndecl))
30332     return;
30333
30334   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30335
30336   if (!callee_tree)
30337     callee_tree = target_option_default_node;
30338
30339   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30340   SET_DECL_ALIGN
30341     (fndecl,
30342      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30343 }
30344
30345 /* Inner function to process the attribute((target(...))), take an argument and
30346    set the current options from the argument.  If we have a list, recursively
30347    go over the list.  */
30348
30349 static bool
30350 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30351 {
30352   if (TREE_CODE (args) == TREE_LIST)
30353     {
30354       bool ret = true;
30355
30356       for (; args; args = TREE_CHAIN (args))
30357         if (TREE_VALUE (args)
30358             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30359           ret = false;
30360       return ret;
30361     }
30362
30363   else if (TREE_CODE (args) != STRING_CST)
30364     {
30365       error ("attribute %<target%> argument not a string");
30366       return false;
30367     }
30368
30369   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30370   char *q;
30371
30372   while ((q = strtok (argstr, ",")) != NULL)
30373     {
30374       while (ISSPACE (*q)) ++q;
30375
30376       argstr = NULL;
30377       if (!strncmp (q, "thumb", 5))
30378           opts->x_target_flags |= MASK_THUMB;
30379
30380       else if (!strncmp (q, "arm", 3))
30381           opts->x_target_flags &= ~MASK_THUMB;
30382
30383       else if (!strncmp (q, "fpu=", 4))
30384         {
30385           int fpu_index;
30386           if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30387                                        &fpu_index, CL_TARGET))
30388             {
30389               error ("invalid fpu for attribute(target(\"%s\"))", q);
30390               return false;
30391             }
30392           if (fpu_index == TARGET_FPU_auto)
30393             {
30394               /* This doesn't really make sense until we support
30395                  general dynamic selection of the architecture and all
30396                  sub-features.  */
30397               sorry ("auto fpu selection not currently permitted here");
30398               return false;
30399             }
30400           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30401         }
30402       else
30403         {
30404           error ("attribute(target(\"%s\")) is unknown", q);
30405           return false;
30406         }
30407     }
30408
30409   return true;
30410 }
30411
30412 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30413
30414 tree
30415 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30416                                  struct gcc_options *opts_set)
30417 {
30418   struct cl_target_option cl_opts;
30419
30420   if (!arm_valid_target_attribute_rec (args, opts))
30421     return NULL_TREE;
30422
30423   cl_target_option_save (&cl_opts, opts);
30424   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30425   arm_option_check_internal (opts);
30426   /* Do any overrides, such as global options arch=xxx.  */
30427   arm_option_override_internal (opts, opts_set);
30428
30429   return build_target_option_node (opts);
30430 }
30431
30432 static void
30433 add_attribute  (const char * mode, tree *attributes)
30434 {
30435   size_t len = strlen (mode);
30436   tree value = build_string (len, mode);
30437
30438   TREE_TYPE (value) = build_array_type (char_type_node,
30439                                         build_index_type (size_int (len)));
30440
30441   *attributes = tree_cons (get_identifier ("target"),
30442                            build_tree_list (NULL_TREE, value),
30443                            *attributes);
30444 }
30445
30446 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30447
30448 static void
30449 arm_insert_attributes (tree fndecl, tree * attributes)
30450 {
30451   const char *mode;
30452
30453   if (! TARGET_FLIP_THUMB)
30454     return;
30455
30456   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30457       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30458    return;
30459
30460   /* Nested definitions must inherit mode.  */
30461   if (current_function_decl)
30462    {
30463      mode = TARGET_THUMB ? "thumb" : "arm";
30464      add_attribute (mode, attributes);
30465      return;
30466    }
30467
30468   /* If there is already a setting don't change it.  */
30469   if (lookup_attribute ("target", *attributes) != NULL)
30470     return;
30471
30472   mode = thumb_flipper ? "thumb" : "arm";
30473   add_attribute (mode, attributes);
30474
30475   thumb_flipper = !thumb_flipper;
30476 }
30477
30478 /* Hook to validate attribute((target("string"))).  */
30479
30480 static bool
30481 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30482                               tree args, int ARG_UNUSED (flags))
30483 {
30484   bool ret = true;
30485   struct gcc_options func_options;
30486   tree cur_tree, new_optimize;
30487   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30488
30489   /* Get the optimization options of the current function.  */
30490   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30491
30492   /* If the function changed the optimization levels as well as setting target
30493      options, start with the optimizations specified.  */
30494   if (!func_optimize)
30495     func_optimize = optimization_default_node;
30496
30497   /* Init func_options.  */
30498   memset (&func_options, 0, sizeof (func_options));
30499   init_options_struct (&func_options, NULL);
30500   lang_hooks.init_options_struct (&func_options);
30501
30502   /* Initialize func_options to the defaults.  */
30503   cl_optimization_restore (&func_options,
30504                            TREE_OPTIMIZATION (func_optimize));
30505
30506   cl_target_option_restore (&func_options,
30507                             TREE_TARGET_OPTION (target_option_default_node));
30508
30509   /* Set func_options flags with new target mode.  */
30510   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30511                                               &global_options_set);
30512
30513   if (cur_tree == NULL_TREE)
30514     ret = false;
30515
30516   new_optimize = build_optimization_node (&func_options);
30517
30518   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30519
30520   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30521
30522   finalize_options_struct (&func_options);
30523
30524   return ret;
30525 }
30526
30527 /* Match an ISA feature bitmap to a named FPU.  We always use the
30528    first entry that exactly matches the feature set, so that we
30529    effectively canonicalize the FPU name for the assembler.  */
30530 static const char*
30531 arm_identify_fpu_from_isa (sbitmap isa)
30532 {
30533   auto_sbitmap fpubits (isa_num_bits);
30534   auto_sbitmap cand_fpubits (isa_num_bits);
30535
30536   bitmap_and (fpubits, isa, isa_all_fpubits);
30537
30538   /* If there are no ISA feature bits relating to the FPU, we must be
30539      doing soft-float.  */
30540   if (bitmap_empty_p (fpubits))
30541     return "softvfp";
30542
30543   for (unsigned int i = 0; i < ARRAY_SIZE (all_fpus); i++)
30544     {
30545       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30546       if (bitmap_equal_p (fpubits, cand_fpubits))
30547         return all_fpus[i].name;
30548     }
30549   /* We must find an entry, or things have gone wrong.  */
30550   gcc_unreachable ();
30551 }
30552
30553 void
30554 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30555 {
30556
30557   fprintf (stream, "\t.syntax unified\n");
30558
30559   if (TARGET_THUMB)
30560     {
30561       if (is_called_in_ARM_mode (decl)
30562           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30563               && cfun->is_thunk))
30564         fprintf (stream, "\t.code 32\n");
30565       else if (TARGET_THUMB1)
30566         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30567       else
30568         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30569     }
30570   else
30571     fprintf (stream, "\t.arm\n");
30572
30573   asm_fprintf (asm_out_file, "\t.fpu %s\n",
30574                (TARGET_SOFT_FLOAT
30575                 ? "softvfp"
30576                 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30577
30578   if (TARGET_POKE_FUNCTION_NAME)
30579     arm_poke_function_name (stream, (const char *) name);
30580 }
30581
30582 /* If MEM is in the form of [base+offset], extract the two parts
30583    of address and set to BASE and OFFSET, otherwise return false
30584    after clearing BASE and OFFSET.  */
30585
30586 static bool
30587 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30588 {
30589   rtx addr;
30590
30591   gcc_assert (MEM_P (mem));
30592
30593   addr = XEXP (mem, 0);
30594
30595   /* Strip off const from addresses like (const (addr)).  */
30596   if (GET_CODE (addr) == CONST)
30597     addr = XEXP (addr, 0);
30598
30599   if (GET_CODE (addr) == REG)
30600     {
30601       *base = addr;
30602       *offset = const0_rtx;
30603       return true;
30604     }
30605
30606   if (GET_CODE (addr) == PLUS
30607       && GET_CODE (XEXP (addr, 0)) == REG
30608       && CONST_INT_P (XEXP (addr, 1)))
30609     {
30610       *base = XEXP (addr, 0);
30611       *offset = XEXP (addr, 1);
30612       return true;
30613     }
30614
30615   *base = NULL_RTX;
30616   *offset = NULL_RTX;
30617
30618   return false;
30619 }
30620
30621 /* If INSN is a load or store of address in the form of [base+offset],
30622    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
30623    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
30624    otherwise return FALSE.  */
30625
30626 static bool
30627 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30628 {
30629   rtx x, dest, src;
30630
30631   gcc_assert (INSN_P (insn));
30632   x = PATTERN (insn);
30633   if (GET_CODE (x) != SET)
30634     return false;
30635
30636   src = SET_SRC (x);
30637   dest = SET_DEST (x);
30638   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30639     {
30640       *is_load = false;
30641       extract_base_offset_in_addr (dest, base, offset);
30642     }
30643   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30644     {
30645       *is_load = true;
30646       extract_base_offset_in_addr (src, base, offset);
30647     }
30648   else
30649     return false;
30650
30651   return (*base != NULL_RTX && *offset != NULL_RTX);
30652 }
30653
30654 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30655
30656    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30657    and PRI are only calculated for these instructions.  For other instruction,
30658    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
30659    instruction fusion can be supported by returning different priorities.
30660
30661    It's important that irrelevant instructions get the largest FUSION_PRI.  */
30662
30663 static void
30664 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30665                            int *fusion_pri, int *pri)
30666 {
30667   int tmp, off_val;
30668   bool is_load;
30669   rtx base, offset;
30670
30671   gcc_assert (INSN_P (insn));
30672
30673   tmp = max_pri - 1;
30674   if (!fusion_load_store (insn, &base, &offset, &is_load))
30675     {
30676       *pri = tmp;
30677       *fusion_pri = tmp;
30678       return;
30679     }
30680
30681   /* Load goes first.  */
30682   if (is_load)
30683     *fusion_pri = tmp - 1;
30684   else
30685     *fusion_pri = tmp - 2;
30686
30687   tmp /= 2;
30688
30689   /* INSN with smaller base register goes first.  */
30690   tmp -= ((REGNO (base) & 0xff) << 20);
30691
30692   /* INSN with smaller offset goes first.  */
30693   off_val = (int)(INTVAL (offset));
30694   if (off_val >= 0)
30695     tmp -= (off_val & 0xfffff);
30696   else
30697     tmp += ((- off_val) & 0xfffff);
30698
30699   *pri = tmp;
30700   return;
30701 }
30702
30703
30704 /* Construct and return a PARALLEL RTX vector with elements numbering the
30705    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30706    the vector - from the perspective of the architecture.  This does not
30707    line up with GCC's perspective on lane numbers, so we end up with
30708    different masks depending on our target endian-ness.  The diagram
30709    below may help.  We must draw the distinction when building masks
30710    which select one half of the vector.  An instruction selecting
30711    architectural low-lanes for a big-endian target, must be described using
30712    a mask selecting GCC high-lanes.
30713
30714                  Big-Endian             Little-Endian
30715
30716 GCC             0   1   2   3           3   2   1   0
30717               | x | x | x | x |       | x | x | x | x |
30718 Architecture    3   2   1   0           3   2   1   0
30719
30720 Low Mask:         { 2, 3 }                { 0, 1 }
30721 High Mask:        { 0, 1 }                { 2, 3 }
30722 */
30723
30724 rtx
30725 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30726 {
30727   int nunits = GET_MODE_NUNITS (mode);
30728   rtvec v = rtvec_alloc (nunits / 2);
30729   int high_base = nunits / 2;
30730   int low_base = 0;
30731   int base;
30732   rtx t1;
30733   int i;
30734
30735   if (BYTES_BIG_ENDIAN)
30736     base = high ? low_base : high_base;
30737   else
30738     base = high ? high_base : low_base;
30739
30740   for (i = 0; i < nunits / 2; i++)
30741     RTVEC_ELT (v, i) = GEN_INT (base + i);
30742
30743   t1 = gen_rtx_PARALLEL (mode, v);
30744   return t1;
30745 }
30746
30747 /* Check OP for validity as a PARALLEL RTX vector with elements
30748    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30749    from the perspective of the architecture.  See the diagram above
30750    arm_simd_vect_par_cnst_half_p for more details.  */
30751
30752 bool
30753 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30754                                        bool high)
30755 {
30756   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30757   HOST_WIDE_INT count_op = XVECLEN (op, 0);
30758   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30759   int i = 0;
30760
30761   if (!VECTOR_MODE_P (mode))
30762     return false;
30763
30764   if (count_op != count_ideal)
30765     return false;
30766
30767   for (i = 0; i < count_ideal; i++)
30768     {
30769       rtx elt_op = XVECEXP (op, 0, i);
30770       rtx elt_ideal = XVECEXP (ideal, 0, i);
30771
30772       if (!CONST_INT_P (elt_op)
30773           || INTVAL (elt_ideal) != INTVAL (elt_op))
30774         return false;
30775     }
30776   return true;
30777 }
30778
30779 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30780    in Thumb1.  */
30781 static bool
30782 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30783                          const_tree)
30784 {
30785   /* For now, we punt and not handle this for TARGET_THUMB1.  */
30786   if (vcall_offset && TARGET_THUMB1)
30787     return false;
30788
30789   /* Otherwise ok.  */
30790   return true;
30791 }
30792
30793 /* Generate RTL for a conditional branch with rtx comparison CODE in
30794    mode CC_MODE. The destination of the unlikely conditional branch
30795    is LABEL_REF.  */
30796
30797 void
30798 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30799                           rtx label_ref)
30800 {
30801   rtx x;
30802   x = gen_rtx_fmt_ee (code, VOIDmode,
30803                       gen_rtx_REG (cc_mode, CC_REGNUM),
30804                       const0_rtx);
30805
30806   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30807                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
30808                             pc_rtx);
30809   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30810 }
30811
30812 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30813
30814    For pure-code sections there is no letter code for this attribute, so
30815    output all the section flags numerically when this is needed.  */
30816
30817 static bool
30818 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30819 {
30820
30821   if (flags & SECTION_ARM_PURECODE)
30822     {
30823       *num = 0x20000000;
30824
30825       if (!(flags & SECTION_DEBUG))
30826         *num |= 0x2;
30827       if (flags & SECTION_EXCLUDE)
30828         *num |= 0x80000000;
30829       if (flags & SECTION_WRITE)
30830         *num |= 0x1;
30831       if (flags & SECTION_CODE)
30832         *num |= 0x4;
30833       if (flags & SECTION_MERGE)
30834         *num |= 0x10;
30835       if (flags & SECTION_STRINGS)
30836         *num |= 0x20;
30837       if (flags & SECTION_TLS)
30838         *num |= 0x400;
30839       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
30840         *num |= 0x200;
30841
30842         return true;
30843     }
30844
30845   return false;
30846 }
30847
30848 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30849
30850    If pure-code is passed as an option, make sure all functions are in
30851    sections that have the SHF_ARM_PURECODE attribute.  */
30852
30853 static section *
30854 arm_function_section (tree decl, enum node_frequency freq,
30855                       bool startup, bool exit)
30856 {
30857   const char * section_name;
30858   section * sec;
30859
30860   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
30861     return default_function_section (decl, freq, startup, exit);
30862
30863   if (!target_pure_code)
30864     return default_function_section (decl, freq, startup, exit);
30865
30866
30867   section_name = DECL_SECTION_NAME (decl);
30868
30869   /* If a function is not in a named section then it falls under the 'default'
30870      text section, also known as '.text'.  We can preserve previous behavior as
30871      the default text section already has the SHF_ARM_PURECODE section
30872      attribute.  */
30873   if (!section_name)
30874     {
30875       section *default_sec = default_function_section (decl, freq, startup,
30876                                                        exit);
30877
30878       /* If default_sec is not null, then it must be a special section like for
30879          example .text.startup.  We set the pure-code attribute and return the
30880          same section to preserve existing behavior.  */
30881       if (default_sec)
30882           default_sec->common.flags |= SECTION_ARM_PURECODE;
30883       return default_sec;
30884     }
30885
30886   /* Otherwise look whether a section has already been created with
30887      'section_name'.  */
30888   sec = get_named_section (decl, section_name, 0);
30889   if (!sec)
30890     /* If that is not the case passing NULL as the section's name to
30891        'get_named_section' will create a section with the declaration's
30892        section name.  */
30893     sec = get_named_section (decl, NULL, 0);
30894
30895   /* Set the SHF_ARM_PURECODE attribute.  */
30896   sec->common.flags |= SECTION_ARM_PURECODE;
30897
30898   return sec;
30899 }
30900
30901 /* Implements the TARGET_SECTION_FLAGS hook.
30902
30903    If DECL is a function declaration and pure-code is passed as an option
30904    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
30905    section's name and RELOC indicates whether the declarations initializer may
30906    contain runtime relocations.  */
30907
30908 static unsigned int
30909 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
30910 {
30911   unsigned int flags = default_section_type_flags (decl, name, reloc);
30912
30913   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
30914     flags |= SECTION_ARM_PURECODE;
30915
30916   return flags;
30917 }
30918
30919 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
30920
30921 static void
30922 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
30923                            rtx op0, rtx op1,
30924                            rtx *quot_p, rtx *rem_p)
30925 {
30926   if (mode == SImode)
30927     gcc_assert (!TARGET_IDIV);
30928
30929   machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode),
30930                                                      MODE_INT);
30931
30932   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
30933                                         libval_mode, 2,
30934                                         op0, GET_MODE (op0),
30935                                         op1, GET_MODE (op1));
30936
30937   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
30938   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
30939                                        GET_MODE_SIZE (mode));
30940
30941   gcc_assert (quotient);
30942   gcc_assert (remainder);
30943
30944   *quot_p = quotient;
30945   *rem_p = remainder;
30946 }
30947
30948 /*  This function checks for the availability of the coprocessor builtin passed
30949     in BUILTIN for the current target.  Returns true if it is available and
30950     false otherwise.  If a BUILTIN is passed for which this function has not
30951     been implemented it will cause an exception.  */
30952
30953 bool
30954 arm_coproc_builtin_available (enum unspecv builtin)
30955 {
30956   /* None of these builtins are available in Thumb mode if the target only
30957      supports Thumb-1.  */
30958   if (TARGET_THUMB1)
30959     return false;
30960
30961   switch (builtin)
30962     {
30963       case VUNSPEC_CDP:
30964       case VUNSPEC_LDC:
30965       case VUNSPEC_LDCL:
30966       case VUNSPEC_STC:
30967       case VUNSPEC_STCL:
30968       case VUNSPEC_MCR:
30969       case VUNSPEC_MRC:
30970         if (arm_arch4)
30971           return true;
30972         break;
30973       case VUNSPEC_CDP2:
30974       case VUNSPEC_LDC2:
30975       case VUNSPEC_LDC2L:
30976       case VUNSPEC_STC2:
30977       case VUNSPEC_STC2L:
30978       case VUNSPEC_MCR2:
30979       case VUNSPEC_MRC2:
30980         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
30981            ARMv8-{A,M}.  */
30982         if (arm_arch5)
30983           return true;
30984         break;
30985       case VUNSPEC_MCRR:
30986       case VUNSPEC_MRRC:
30987         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
30988            ARMv8-{A,M}.  */
30989         if (arm_arch6 || arm_arch5te)
30990           return true;
30991         break;
30992       case VUNSPEC_MCRR2:
30993       case VUNSPEC_MRRC2:
30994         if (arm_arch6)
30995           return true;
30996         break;
30997       default:
30998         gcc_unreachable ();
30999     }
31000   return false;
31001 }
31002
31003 /* This function returns true if OP is a valid memory operand for the ldc and
31004    stc coprocessor instructions and false otherwise.  */
31005
31006 bool
31007 arm_coproc_ldc_stc_legitimate_address (rtx op)
31008 {
31009   HOST_WIDE_INT range;
31010   /* Has to be a memory operand.  */
31011   if (!MEM_P (op))
31012     return false;
31013
31014   op = XEXP (op, 0);
31015
31016   /* We accept registers.  */
31017   if (REG_P (op))
31018     return true;
31019
31020   switch GET_CODE (op)
31021     {
31022       case PLUS:
31023         {
31024           /* Or registers with an offset.  */
31025           if (!REG_P (XEXP (op, 0)))
31026             return false;
31027
31028           op = XEXP (op, 1);
31029
31030           /* The offset must be an immediate though.  */
31031           if (!CONST_INT_P (op))
31032             return false;
31033
31034           range = INTVAL (op);
31035
31036           /* Within the range of [-1020,1020].  */
31037           if (!IN_RANGE (range, -1020, 1020))
31038             return false;
31039
31040           /* And a multiple of 4.  */
31041           return (range % 4) == 0;
31042         }
31043       case PRE_INC:
31044       case POST_INC:
31045       case PRE_DEC:
31046       case POST_DEC:
31047         return REG_P (XEXP (op, 0));
31048       default:
31049         gcc_unreachable ();
31050     }
31051   return false;
31052 }
31053 #include "gt-arm.h"