gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2015 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "hash-table.h"
  27 #include "tm.h"
  28 #include "rtl.h"
  29 #include "hash-set.h"
  30 #include "machmode.h"
  31 #include "vec.h"
  32 #include "double-int.h"
  33 #include "input.h"
  34 #include "alias.h"
  35 #include "symtab.h"
  36 #include "wide-int.h"
  37 #include "inchash.h"
  38 #include "tree.h"
  39 #include "fold-const.h"
  40 #include "stringpool.h"
  41 #include "stor-layout.h"
  42 #include "calls.h"
  43 #include "varasm.h"
  44 #include "obstack.h"
  45 #include "regs.h"
  46 #include "hard-reg-set.h"
  47 #include "insn-config.h"
  48 #include "conditions.h"
  49 #include "output.h"
  50 #include "insn-attr.h"
  51 #include "flags.h"
  52 #include "reload.h"
  53 #include "function.h"
  54 #include "hashtab.h"
  55 #include "statistics.h"
  56 #include "real.h"
  57 #include "fixed-value.h"
  58 #include "expmed.h"
  59 #include "dojump.h"
  60 #include "explow.h"
  61 #include "emit-rtl.h"
  62 #include "stmt.h"
  63 #include "expr.h"
  64 #include "insn-codes.h"
  65 #include "optabs.h"
  66 #include "diagnostic-core.h"
  67 #include "recog.h"
  68 #include "predict.h"
  69 #include "dominance.h"
  70 #include "cfg.h"
  71 #include "cfgrtl.h"
  72 #include "cfganal.h"
  73 #include "lcm.h"
  74 #include "cfgbuild.h"
  75 #include "cfgcleanup.h"
  76 #include "basic-block.h"
  77 #include "hash-map.h"
  78 #include "is-a.h"
  79 #include "plugin-api.h"
  80 #include "ipa-ref.h"
  81 #include "cgraph.h"
  82 #include "ggc.h"
  83 #include "except.h"
  84 #include "tm_p.h"
  85 #include "target.h"
  86 #include "sched-int.h"
  87 #include "target-def.h"
  88 #include "debug.h"
  89 #include "langhooks.h"
  90 #include "df.h"
  91 #include "intl.h"
  92 #include "libfuncs.h"
  93 #include "params.h"
  94 #include "opts.h"
  95 #include "dumpfile.h"
  96 #include "gimple-expr.h"
  97 #include "builtins.h"
  98 #include "tm-constrs.h"
  99 #include "rtl-iter.h"
 100 #include "sched-int.h"
 101
 102 /* Forward definitions of types.  */
 103 typedef struct minipool_node    Mnode;
 104 typedef struct minipool_fixup   Mfix;
 105
 106 void (*arm_lang_output_object_attributes_hook)(void);
 107
 108 struct four_ints
 109 {
 110   int i[4];
 111 };
 112
 113 /* Forward function declarations.  */
 114 static bool arm_const_not_ok_for_debug_p (rtx);
 115 static bool arm_needs_doubleword_align (machine_mode, const_tree);
 116 static int arm_compute_static_chain_stack_bytes (void);
 117 static arm_stack_offsets *arm_get_frame_offsets (void);
 118 static void arm_add_gc_roots (void);
 119 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 120                              HOST_WIDE_INT, rtx, rtx, int, int);
 121 static unsigned bit_count (unsigned long);
 122 static int arm_address_register_rtx_p (rtx, int);
 123 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 124 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 125 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 126 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 127 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 128 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 129 inline static int thumb1_index_register_rtx_p (rtx, int);
 130 static int thumb_far_jump_used_p (void);
 131 static bool thumb_force_lr_save (void);
 132 static unsigned arm_size_return_regs (void);
 133 static bool arm_assemble_integer (rtx, unsigned int, int);
 134 static void arm_print_operand (FILE *, rtx, int);
 135 static void arm_print_operand_address (FILE *, rtx);
 136 static bool arm_print_operand_punct_valid_p (unsigned char code);
 137 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 138 static arm_cc get_arm_condition_code (rtx);
 139 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
 140 static const char *output_multi_immediate (rtx *, const char *, const char *,
 141                                            int, HOST_WIDE_INT);
 142 static const char *shift_op (rtx, HOST_WIDE_INT *);
 143 static struct machine_function *arm_init_machine_status (void);
 144 static void thumb_exit (FILE *, int);
 145 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 146 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 147 static Mnode *add_minipool_forward_ref (Mfix *);
 148 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 149 static Mnode *add_minipool_backward_ref (Mfix *);
 150 static void assign_minipool_offsets (Mfix *);
 151 static void arm_print_value (FILE *, rtx);
 152 static void dump_minipool (rtx_insn *);
 153 static int arm_barrier_cost (rtx);
 154 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 155 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 156 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 157                                machine_mode, rtx);
 158 static void arm_reorg (void);
 159 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 160 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 161 static unsigned long arm_compute_save_reg_mask (void);
 162 static unsigned long arm_isr_value (tree);
 163 static unsigned long arm_compute_func_type (void);
 164 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 165 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 166 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 167 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 168 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 169 #endif
 170 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
 171 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
 172 static int arm_comp_type_attributes (const_tree, const_tree);
 173 static void arm_set_default_type_attributes (tree);
 174 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
 175 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 176 static int optimal_immediate_sequence (enum rtx_code code,
 177                                        unsigned HOST_WIDE_INT val,
 178                                        struct four_ints *return_sequence);
 179 static int optimal_immediate_sequence_1 (enum rtx_code code,
 180                                          unsigned HOST_WIDE_INT val,
 181                                          struct four_ints *return_sequence,
 182                                          int i);
 183 static int arm_get_strip_length (int);
 184 static bool arm_function_ok_for_sibcall (tree, tree);
 185 static machine_mode arm_promote_function_mode (const_tree,
 186                                                     machine_mode, int *,
 187                                                     const_tree, int);
 188 static bool arm_return_in_memory (const_tree, const_tree);
 189 static rtx arm_function_value (const_tree, const_tree, bool);
 190 static rtx arm_libcall_value_1 (machine_mode);
 191 static rtx arm_libcall_value (machine_mode, const_rtx);
 192 static bool arm_function_value_regno_p (const unsigned int);
 193 static void arm_internal_label (FILE *, const char *, unsigned long);
 194 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 195                                  tree);
 196 static bool arm_have_conditional_execution (void);
 197 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 198 static bool arm_legitimate_constant_p (machine_mode, rtx);
 199 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
 200 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
 201 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 202 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 203 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 204 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 205 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
 206 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 207 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 208 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 209 static void emit_constant_insn (rtx cond, rtx pattern);
 210 static rtx_insn *emit_set_insn (rtx, rtx);
 211 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 212 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 213                                   tree, bool);
 214 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 215                              const_tree, bool);
 216 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 217                                       const_tree, bool);
 218 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 219 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 220                                       const_tree);
 221 static rtx aapcs_libcall_value (machine_mode);
 222 static int aapcs_select_return_coproc (const_tree, const_tree);
 223
 224 #ifdef OBJECT_FORMAT_ELF
 225 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 226 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 227 #endif
 228 #ifndef ARM_PE
 229 static void arm_encode_section_info (tree, rtx, int);
 230 #endif
 231
 232 static void arm_file_end (void);
 233 static void arm_file_start (void);
 234
 235 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 236                                         tree, int *, int);
 237 static bool arm_pass_by_reference (cumulative_args_t,
 238                                    machine_mode, const_tree, bool);
 239 static bool arm_promote_prototypes (const_tree);
 240 static bool arm_default_short_enums (void);
 241 static bool arm_align_anon_bitfield (void);
 242 static bool arm_return_in_msb (const_tree);
 243 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 244 static bool arm_return_in_memory (const_tree, const_tree);
 245 #if ARM_UNWIND_INFO
 246 static void arm_unwind_emit (FILE *, rtx_insn *);
 247 static bool arm_output_ttype (rtx);
 248 static void arm_asm_emit_except_personality (rtx);
 249 static void arm_asm_init_sections (void);
 250 #endif
 251 static rtx arm_dwarf_register_span (rtx);
 252
 253 static tree arm_cxx_guard_type (void);
 254 static bool arm_cxx_guard_mask_bit (void);
 255 static tree arm_get_cookie_size (tree);
 256 static bool arm_cookie_has_size (void);
 257 static bool arm_cxx_cdtor_returns_this (void);
 258 static bool arm_cxx_key_method_may_be_inline (void);
 259 static void arm_cxx_determine_class_data_visibility (tree);
 260 static bool arm_cxx_class_data_always_comdat (void);
 261 static bool arm_cxx_use_aeabi_atexit (void);
 262 static void arm_init_libfuncs (void);
 263 static tree arm_build_builtin_va_list (void);
 264 static void arm_expand_builtin_va_start (tree, rtx);
 265 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 266 static void arm_option_override (void);
 267 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 268 static bool arm_macro_fusion_p (void);
 269 static bool arm_cannot_copy_insn_p (rtx_insn *);
 270 static int arm_issue_rate (void);
 271 static int arm_first_cycle_multipass_dfa_lookahead (void);
 272 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 273 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 274 static bool arm_output_addr_const_extra (FILE *, rtx);
 275 static bool arm_allocate_stack_slots_for_args (void);
 276 static bool arm_warn_func_return (tree);
 277 static const char *arm_invalid_parameter_type (const_tree t);
 278 static const char *arm_invalid_return_type (const_tree t);
 279 static tree arm_promoted_type (const_tree t);
 280 static tree arm_convert_to_type (tree type, tree expr);
 281 static bool arm_scalar_mode_supported_p (machine_mode);
 282 static bool arm_frame_pointer_required (void);
 283 static bool arm_can_eliminate (const int, const int);
 284 static void arm_asm_trampoline_template (FILE *);
 285 static void arm_trampoline_init (rtx, tree, rtx);
 286 static rtx arm_trampoline_adjust_address (rtx);
 287 static rtx arm_pic_static_addr (rtx orig, rtx reg);
 288 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 289 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 290 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 291 static bool arm_array_mode_supported_p (machine_mode,
 292                                         unsigned HOST_WIDE_INT);
 293 static machine_mode arm_preferred_simd_mode (machine_mode);
 294 static bool arm_class_likely_spilled_p (reg_class_t);
 295 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 296 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 297 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 298                                                      const_tree type,
 299                                                      int misalignment,
 300                                                      bool is_packed);
 301 static void arm_conditional_register_usage (void);
 302 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 303 static unsigned int arm_autovectorize_vector_sizes (void);
 304 static int arm_default_branch_cost (bool, bool);
 305 static int arm_cortex_a5_branch_cost (bool, bool);
 306 static int arm_cortex_m_branch_cost (bool, bool);
 307 static int arm_cortex_m7_branch_cost (bool, bool);
 308
 309 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
 310                                              const unsigned char *sel);
 311
 312 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 313
 314 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 315                                            tree vectype,
 316                                            int misalign ATTRIBUTE_UNUSED);
 317 static unsigned arm_add_stmt_cost (void *data, int count,
 318                                    enum vect_cost_for_stmt kind,
 319                                    struct _stmt_vec_info *stmt_info,
 320                                    int misalign,
 321                                    enum vect_cost_model_location where);
 322
 323 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 324                                          bool op0_preserve_value);
 325 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 326
 327 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 328 \f
 329 /* Table of machine attributes.  */
 330 static const struct attribute_spec arm_attribute_table[] =
 331 {
 332   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 333        affects_type_identity } */
 334   /* Function calls made to this symbol must be done indirectly, because
 335      it may lie outside of the 26 bit addressing range of a normal function
 336      call.  */
 337   { "long_call",    0, 0, false, true,  true,  NULL, false },
 338   /* Whereas these functions are always known to reside within the 26 bit
 339      addressing range.  */
 340   { "short_call",   0, 0, false, true,  true,  NULL, false },
 341   /* Specify the procedure call conventions for a function.  */
 342   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 343     false },
 344   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 345   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 346     false },
 347   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 348     false },
 349   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 350     false },
 351 #ifdef ARM_PE
 352   /* ARM/PE has three new attributes:
 353      interfacearm - ?
 354      dllexport - for exporting a function/variable that will live in a dll
 355      dllimport - for importing a function/variable from a dll
 356
 357      Microsoft allows multiple declspecs in one __declspec, separating
 358      them with spaces.  We do NOT support this.  Instead, use __declspec
 359      multiple times.
 360   */
 361   { "dllimport",    0, 0, true,  false, false, NULL, false },
 362   { "dllexport",    0, 0, true,  false, false, NULL, false },
 363   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 364     false },
 365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 366   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 367   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 368   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 369     false },
 370 #endif
 371   { NULL,           0, 0, false, false, false, NULL, false }
 372 };
 373 \f
 374 /* Initialize the GCC target structure.  */
 375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 376 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 377 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 378 #endif
 379
 380 #undef TARGET_LEGITIMIZE_ADDRESS
 381 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 382
 383 #undef TARGET_LRA_P
 384 #define TARGET_LRA_P hook_bool_void_true
 385
 386 #undef  TARGET_ATTRIBUTE_TABLE
 387 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 388
 389 #undef TARGET_ASM_FILE_START
 390 #define TARGET_ASM_FILE_START arm_file_start
 391 #undef TARGET_ASM_FILE_END
 392 #define TARGET_ASM_FILE_END arm_file_end
 393
 394 #undef  TARGET_ASM_ALIGNED_SI_OP
 395 #define TARGET_ASM_ALIGNED_SI_OP NULL
 396 #undef  TARGET_ASM_INTEGER
 397 #define TARGET_ASM_INTEGER arm_assemble_integer
 398
 399 #undef TARGET_PRINT_OPERAND
 400 #define TARGET_PRINT_OPERAND arm_print_operand
 401 #undef TARGET_PRINT_OPERAND_ADDRESS
 402 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 403 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 404 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 405
 406 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 407 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 408
 409 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 410 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 411
 412 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 413 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 414
 415 #undef  TARGET_OPTION_OVERRIDE
 416 #define TARGET_OPTION_OVERRIDE arm_option_override
 417
 418 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 419 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 420
 421 #undef TARGET_SCHED_MACRO_FUSION_P
 422 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 423
 424 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 425 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 426
 427 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 428 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 429
 430 #undef  TARGET_SCHED_ADJUST_COST
 431 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 432
 433 #undef TARGET_SCHED_REORDER
 434 #define TARGET_SCHED_REORDER arm_sched_reorder
 435
 436 #undef TARGET_REGISTER_MOVE_COST
 437 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 438
 439 #undef TARGET_MEMORY_MOVE_COST
 440 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 441
 442 #undef TARGET_ENCODE_SECTION_INFO
 443 #ifdef ARM_PE
 444 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 445 #else
 446 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 447 #endif
 448
 449 #undef  TARGET_STRIP_NAME_ENCODING
 450 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 451
 452 #undef  TARGET_ASM_INTERNAL_LABEL
 453 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 454
 455 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 456 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 457
 458 #undef  TARGET_FUNCTION_VALUE
 459 #define TARGET_FUNCTION_VALUE arm_function_value
 460
 461 #undef  TARGET_LIBCALL_VALUE
 462 #define TARGET_LIBCALL_VALUE arm_libcall_value
 463
 464 #undef TARGET_FUNCTION_VALUE_REGNO_P
 465 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 466
 467 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 468 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 469 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
 471
 472 #undef  TARGET_RTX_COSTS
 473 #define TARGET_RTX_COSTS arm_rtx_costs
 474 #undef  TARGET_ADDRESS_COST
 475 #define TARGET_ADDRESS_COST arm_address_cost
 476
 477 #undef TARGET_SHIFT_TRUNCATION_MASK
 478 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 479 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 480 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 481 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 482 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 483 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 484 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 485 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 486 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 487   arm_autovectorize_vector_sizes
 488
 489 #undef  TARGET_MACHINE_DEPENDENT_REORG
 490 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 491
 492 #undef  TARGET_INIT_BUILTINS
 493 #define TARGET_INIT_BUILTINS  arm_init_builtins
 494 #undef  TARGET_EXPAND_BUILTIN
 495 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 496 #undef  TARGET_BUILTIN_DECL
 497 #define TARGET_BUILTIN_DECL arm_builtin_decl
 498
 499 #undef TARGET_INIT_LIBFUNCS
 500 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 501
 502 #undef TARGET_PROMOTE_FUNCTION_MODE
 503 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 504 #undef TARGET_PROMOTE_PROTOTYPES
 505 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 506 #undef TARGET_PASS_BY_REFERENCE
 507 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 508 #undef TARGET_ARG_PARTIAL_BYTES
 509 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 510 #undef TARGET_FUNCTION_ARG
 511 #define TARGET_FUNCTION_ARG arm_function_arg
 512 #undef TARGET_FUNCTION_ARG_ADVANCE
 513 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 514 #undef TARGET_FUNCTION_ARG_BOUNDARY
 515 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 516
 517 #undef  TARGET_SETUP_INCOMING_VARARGS
 518 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 519
 520 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 521 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 522
 523 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 524 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 525 #undef TARGET_TRAMPOLINE_INIT
 526 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 527 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 528 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 529
 530 #undef TARGET_WARN_FUNC_RETURN
 531 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 532
 533 #undef TARGET_DEFAULT_SHORT_ENUMS
 534 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 535
 536 #undef TARGET_ALIGN_ANON_BITFIELD
 537 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 538
 539 #undef TARGET_NARROW_VOLATILE_BITFIELD
 540 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 541
 542 #undef TARGET_CXX_GUARD_TYPE
 543 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 544
 545 #undef TARGET_CXX_GUARD_MASK_BIT
 546 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 547
 548 #undef TARGET_CXX_GET_COOKIE_SIZE
 549 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 550
 551 #undef TARGET_CXX_COOKIE_HAS_SIZE
 552 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 553
 554 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 555 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 556
 557 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 558 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 559
 560 #undef TARGET_CXX_USE_AEABI_ATEXIT
 561 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 562
 563 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 564 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 565   arm_cxx_determine_class_data_visibility
 566
 567 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 568 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 569
 570 #undef TARGET_RETURN_IN_MSB
 571 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 572
 573 #undef TARGET_RETURN_IN_MEMORY
 574 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 575
 576 #undef TARGET_MUST_PASS_IN_STACK
 577 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 578
 579 #if ARM_UNWIND_INFO
 580 #undef TARGET_ASM_UNWIND_EMIT
 581 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 582
 583 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 584 #undef TARGET_ASM_TTYPE
 585 #define TARGET_ASM_TTYPE arm_output_ttype
 586
 587 #undef TARGET_ARM_EABI_UNWINDER
 588 #define TARGET_ARM_EABI_UNWINDER true
 589
 590 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 591 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 592
 593 #undef TARGET_ASM_INIT_SECTIONS
 594 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 595 #endif /* ARM_UNWIND_INFO */
 596
 597 #undef TARGET_DWARF_REGISTER_SPAN
 598 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 599
 600 #undef  TARGET_CANNOT_COPY_INSN_P
 601 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 602
 603 #ifdef HAVE_AS_TLS
 604 #undef TARGET_HAVE_TLS
 605 #define TARGET_HAVE_TLS true
 606 #endif
 607
 608 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 609 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 610
 611 #undef TARGET_LEGITIMATE_CONSTANT_P
 612 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 613
 614 #undef TARGET_CANNOT_FORCE_CONST_MEM
 615 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 616
 617 #undef TARGET_MAX_ANCHOR_OFFSET
 618 #define TARGET_MAX_ANCHOR_OFFSET 4095
 619
 620 /* The minimum is set such that the total size of the block
 621    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 622    divisible by eight, ensuring natural spacing of anchors.  */
 623 #undef TARGET_MIN_ANCHOR_OFFSET
 624 #define TARGET_MIN_ANCHOR_OFFSET -4088
 625
 626 #undef TARGET_SCHED_ISSUE_RATE
 627 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 628
 629 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 630 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 631   arm_first_cycle_multipass_dfa_lookahead
 632
 633 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 634 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 635   arm_first_cycle_multipass_dfa_lookahead_guard
 636
 637 #undef TARGET_MANGLE_TYPE
 638 #define TARGET_MANGLE_TYPE arm_mangle_type
 639
 640 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 641 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 642
 643 #undef TARGET_BUILD_BUILTIN_VA_LIST
 644 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 645 #undef TARGET_EXPAND_BUILTIN_VA_START
 646 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 647 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 648 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 649
 650 #ifdef HAVE_AS_TLS
 651 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 652 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 653 #endif
 654
 655 #undef TARGET_LEGITIMATE_ADDRESS_P
 656 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 657
 658 #undef TARGET_PREFERRED_RELOAD_CLASS
 659 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 660
 661 #undef TARGET_INVALID_PARAMETER_TYPE
 662 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
 663
 664 #undef TARGET_INVALID_RETURN_TYPE
 665 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
 666
 667 #undef TARGET_PROMOTED_TYPE
 668 #define TARGET_PROMOTED_TYPE arm_promoted_type
 669
 670 #undef TARGET_CONVERT_TO_TYPE
 671 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
 672
 673 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 674 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 675
 676 #undef TARGET_FRAME_POINTER_REQUIRED
 677 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 678
 679 #undef TARGET_CAN_ELIMINATE
 680 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 681
 682 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 683 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 684
 685 #undef TARGET_CLASS_LIKELY_SPILLED_P
 686 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 687
 688 #undef TARGET_VECTORIZE_BUILTINS
 689 #define TARGET_VECTORIZE_BUILTINS
 690
 691 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 692 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 693   arm_builtin_vectorized_function
 694
 695 #undef TARGET_VECTOR_ALIGNMENT
 696 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 697
 698 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 699 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 700   arm_vector_alignment_reachable
 701
 702 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 703 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 704   arm_builtin_support_vector_misalignment
 705
 706 #undef TARGET_PREFERRED_RENAME_CLASS
 707 #define TARGET_PREFERRED_RENAME_CLASS \
 708   arm_preferred_rename_class
 709
 710 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 711 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 712   arm_vectorize_vec_perm_const_ok
 713
 714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 716   arm_builtin_vectorization_cost
 717 #undef TARGET_VECTORIZE_ADD_STMT_COST
 718 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 719
 720 #undef TARGET_CANONICALIZE_COMPARISON
 721 #define TARGET_CANONICALIZE_COMPARISON \
 722   arm_canonicalize_comparison
 723
 724 #undef TARGET_ASAN_SHADOW_OFFSET
 725 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 726
 727 #undef MAX_INSN_PER_IT_BLOCK
 728 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 729
 730 #undef TARGET_CAN_USE_DOLOOP_P
 731 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 732
 733 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 734 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 735
 736 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 737 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 738
 739 #undef TARGET_SCHED_FUSION_PRIORITY
 740 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 741
 742 struct gcc_target targetm = TARGET_INITIALIZER;
 743 \f
 744 /* Obstack for minipool constant handling.  */
 745 static struct obstack minipool_obstack;
 746 static char *         minipool_startobj;
 747
 748 /* The maximum number of insns skipped which
 749    will be conditionalised if possible.  */
 750 static int max_insns_skipped = 5;
 751
 752 extern FILE * asm_out_file;
 753
 754 /* True if we are currently building a constant table.  */
 755 int making_const_table;
 756
 757 /* The processor for which instructions should be scheduled.  */
 758 enum processor_type arm_tune = arm_none;
 759
 760 /* The current tuning set.  */
 761 const struct tune_params *current_tune;
 762
 763 /* Which floating point hardware to schedule for.  */
 764 int arm_fpu_attr;
 765
 766 /* Which floating popint hardware to use.  */
 767 const struct arm_fpu_desc *arm_fpu_desc;
 768
 769 /* Used for Thumb call_via trampolines.  */
 770 rtx thumb_call_via_label[14];
 771 static int thumb_call_reg_needed;
 772
 773 /* The bits in this mask specify which
 774    instructions we are allowed to generate.  */
 775 unsigned long insn_flags = 0;
 776
 777 /* The bits in this mask specify which instruction scheduling options should
 778    be used.  */
 779 unsigned long tune_flags = 0;
 780
 781 /* The highest ARM architecture version supported by the
 782    target.  */
 783 enum base_architecture arm_base_arch = BASE_ARCH_0;
 784
 785 /* The following are used in the arm.md file as equivalents to bits
 786    in the above two flag variables.  */
 787
 788 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 789 int arm_arch3m = 0;
 790
 791 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 792 int arm_arch4 = 0;
 793
 794 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 795 int arm_arch4t = 0;
 796
 797 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 798 int arm_arch5 = 0;
 799
 800 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 801 int arm_arch5e = 0;
 802
 803 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 804 int arm_arch6 = 0;
 805
 806 /* Nonzero if this chip supports the ARM 6K extensions.  */
 807 int arm_arch6k = 0;
 808
 809 /* Nonzero if instructions present in ARMv6-M can be used.  */
 810 int arm_arch6m = 0;
 811
 812 /* Nonzero if this chip supports the ARM 7 extensions.  */
 813 int arm_arch7 = 0;
 814
 815 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 816 int arm_arch_notm = 0;
 817
 818 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 819 int arm_arch7em = 0;
 820
 821 /* Nonzero if instructions present in ARMv8 can be used.  */
 822 int arm_arch8 = 0;
 823
 824 /* Nonzero if this chip can benefit from load scheduling.  */
 825 int arm_ld_sched = 0;
 826
 827 /* Nonzero if this chip is a StrongARM.  */
 828 int arm_tune_strongarm = 0;
 829
 830 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 831 int arm_arch_iwmmxt = 0;
 832
 833 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 834 int arm_arch_iwmmxt2 = 0;
 835
 836 /* Nonzero if this chip is an XScale.  */
 837 int arm_arch_xscale = 0;
 838
 839 /* Nonzero if tuning for XScale  */
 840 int arm_tune_xscale = 0;
 841
 842 /* Nonzero if we want to tune for stores that access the write-buffer.
 843    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 844 int arm_tune_wbuf = 0;
 845
 846 /* Nonzero if tuning for Cortex-A9.  */
 847 int arm_tune_cortex_a9 = 0;
 848
 849 /* Nonzero if generating Thumb instructions.  */
 850 int thumb_code = 0;
 851
 852 /* Nonzero if generating Thumb-1 instructions.  */
 853 int thumb1_code = 0;
 854
 855 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 856    preprocessor.
 857    XXX This is a bit of a hack, it's intended to help work around
 858    problems in GLD which doesn't understand that armv5t code is
 859    interworking clean.  */
 860 int arm_cpp_interwork = 0;
 861
 862 /* Nonzero if chip supports Thumb 2.  */
 863 int arm_arch_thumb2;
 864
 865 /* Nonzero if chip supports integer division instruction.  */
 866 int arm_arch_arm_hwdiv;
 867 int arm_arch_thumb_hwdiv;
 868
 869 /* Nonzero if chip disallows volatile memory access in IT block.  */
 870 int arm_arch_no_volatile_ce;
 871
 872 /* Nonzero if we should use Neon to handle 64-bits operations rather
 873    than core registers.  */
 874 int prefer_neon_for_64bits = 0;
 875
 876 /* Nonzero if we shouldn't use literal pools.  */
 877 bool arm_disable_literal_pool = false;
 878
 879 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
 880    we must report the mode of the memory reference from
 881    TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
 882 machine_mode output_memory_reference_mode;
 883
 884 /* The register number to be used for the PIC offset register.  */
 885 unsigned arm_pic_register = INVALID_REGNUM;
 886
 887 enum arm_pcs arm_pcs_default;
 888
 889 /* For an explanation of these variables, see final_prescan_insn below.  */
 890 int arm_ccfsm_state;
 891 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 892 enum arm_cond_code arm_current_cc;
 893
 894 rtx arm_target_insn;
 895 int arm_target_label;
 896 /* The number of conditionally executed insns, including the current insn.  */
 897 int arm_condexec_count = 0;
 898 /* A bitmask specifying the patterns for the IT block.
 899    Zero means do not output an IT block before this insn. */
 900 int arm_condexec_mask = 0;
 901 /* The number of bits used in arm_condexec_mask.  */
 902 int arm_condexec_masklen = 0;
 903
 904 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 905 int arm_arch_crc = 0;
 906
 907 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 908 int arm_m_profile_small_mul = 0;
 909
 910 /* The condition codes of the ARM, and the inverse function.  */
 911 static const char * const arm_condition_codes[] =
 912 {
 913   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 914   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 915 };
 916
 917 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 918 int arm_regs_in_sequence[] =
 919 {
 920   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 921 };
 922
 923 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
 924 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 925
 926 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 927                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 928                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 929 \f
 930 /* Initialization code.  */
 931
 932 struct processors
 933 {
 934   const char *const name;
 935   enum processor_type core;
 936   const char *arch;
 937   enum base_architecture base_arch;
 938   const unsigned long flags;
 939   const struct tune_params *const tune;
 940 };
 941
 942
 943 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
 944 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
 945   prefetch_slots, \
 946   l1_size, \
 947   l1_line_size
 948
 949 /* arm generic vectorizer costs.  */
 950 static const
 951 struct cpu_vec_costs arm_default_vec_cost = {
 952   1,                                    /* scalar_stmt_cost.  */
 953   1,                                    /* scalar load_cost.  */
 954   1,                                    /* scalar_store_cost.  */
 955   1,                                    /* vec_stmt_cost.  */
 956   1,                                    /* vec_to_scalar_cost.  */
 957   1,                                    /* scalar_to_vec_cost.  */
 958   1,                                    /* vec_align_load_cost.  */
 959   1,                                    /* vec_unalign_load_cost.  */
 960   1,                                    /* vec_unalign_store_cost.  */
 961   1,                                    /* vec_store_cost.  */
 962   3,                                    /* cond_taken_branch_cost.  */
 963   1,                                    /* cond_not_taken_branch_cost.  */
 964 };
 965
 966 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
 967 #include "aarch-cost-tables.h"
 968
 969
 970
 971 const struct cpu_cost_table cortexa9_extra_costs =
 972 {
 973   /* ALU */
 974   {
 975     0,                  /* arith.  */
 976     0,                  /* logical.  */
 977     0,                  /* shift.  */
 978     COSTS_N_INSNS (1),  /* shift_reg.  */
 979     COSTS_N_INSNS (1),  /* arith_shift.  */
 980     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
 981     0,                  /* log_shift.  */
 982     COSTS_N_INSNS (1),  /* log_shift_reg.  */
 983     COSTS_N_INSNS (1),  /* extend.  */
 984     COSTS_N_INSNS (2),  /* extend_arith.  */
 985     COSTS_N_INSNS (1),  /* bfi.  */
 986     COSTS_N_INSNS (1),  /* bfx.  */
 987     0,                  /* clz.  */
 988     0,                  /* rev.  */
 989     0,                  /* non_exec.  */
 990     true                /* non_exec_costs_exec.  */
 991   },
 992   {
 993     /* MULT SImode */
 994     {
 995       COSTS_N_INSNS (3),        /* simple.  */
 996       COSTS_N_INSNS (3),        /* flag_setting.  */
 997       COSTS_N_INSNS (2),        /* extend.  */
 998       COSTS_N_INSNS (3),        /* add.  */
 999       COSTS_N_INSNS (2),        /* extend_add.  */
1000       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1001     },
1002     /* MULT DImode */
1003     {
1004       0,                        /* simple (N/A).  */
1005       0,                        /* flag_setting (N/A).  */
1006       COSTS_N_INSNS (4),        /* extend.  */
1007       0,                        /* add (N/A).  */
1008       COSTS_N_INSNS (4),        /* extend_add.  */
1009       0                         /* idiv (N/A).  */
1010     }
1011   },
1012   /* LD/ST */
1013   {
1014     COSTS_N_INSNS (2),  /* load.  */
1015     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1016     COSTS_N_INSNS (2),  /* ldrd.  */
1017     COSTS_N_INSNS (2),  /* ldm_1st.  */
1018     1,                  /* ldm_regs_per_insn_1st.  */
1019     2,                  /* ldm_regs_per_insn_subsequent.  */
1020     COSTS_N_INSNS (5),  /* loadf.  */
1021     COSTS_N_INSNS (5),  /* loadd.  */
1022     COSTS_N_INSNS (1),  /* load_unaligned.  */
1023     COSTS_N_INSNS (2),  /* store.  */
1024     COSTS_N_INSNS (2),  /* strd.  */
1025     COSTS_N_INSNS (2),  /* stm_1st.  */
1026     1,                  /* stm_regs_per_insn_1st.  */
1027     2,                  /* stm_regs_per_insn_subsequent.  */
1028     COSTS_N_INSNS (1),  /* storef.  */
1029     COSTS_N_INSNS (1),  /* stored.  */
1030     COSTS_N_INSNS (1)   /* store_unaligned.  */
1031   },
1032   {
1033     /* FP SFmode */
1034     {
1035       COSTS_N_INSNS (14),       /* div.  */
1036       COSTS_N_INSNS (4),        /* mult.  */
1037       COSTS_N_INSNS (7),        /* mult_addsub. */
1038       COSTS_N_INSNS (30),       /* fma.  */
1039       COSTS_N_INSNS (3),        /* addsub.  */
1040       COSTS_N_INSNS (1),        /* fpconst.  */
1041       COSTS_N_INSNS (1),        /* neg.  */
1042       COSTS_N_INSNS (3),        /* compare.  */
1043       COSTS_N_INSNS (3),        /* widen.  */
1044       COSTS_N_INSNS (3),        /* narrow.  */
1045       COSTS_N_INSNS (3),        /* toint.  */
1046       COSTS_N_INSNS (3),        /* fromint.  */
1047       COSTS_N_INSNS (3)         /* roundint.  */
1048     },
1049     /* FP DFmode */
1050     {
1051       COSTS_N_INSNS (24),       /* div.  */
1052       COSTS_N_INSNS (5),        /* mult.  */
1053       COSTS_N_INSNS (8),        /* mult_addsub.  */
1054       COSTS_N_INSNS (30),       /* fma.  */
1055       COSTS_N_INSNS (3),        /* addsub.  */
1056       COSTS_N_INSNS (1),        /* fpconst.  */
1057       COSTS_N_INSNS (1),        /* neg.  */
1058       COSTS_N_INSNS (3),        /* compare.  */
1059       COSTS_N_INSNS (3),        /* widen.  */
1060       COSTS_N_INSNS (3),        /* narrow.  */
1061       COSTS_N_INSNS (3),        /* toint.  */
1062       COSTS_N_INSNS (3),        /* fromint.  */
1063       COSTS_N_INSNS (3)         /* roundint.  */
1064     }
1065   },
1066   /* Vector */
1067   {
1068     COSTS_N_INSNS (1)   /* alu.  */
1069   }
1070 };
1071
1072 const struct cpu_cost_table cortexa8_extra_costs =
1073 {
1074   /* ALU */
1075   {
1076     0,                  /* arith.  */
1077     0,                  /* logical.  */
1078     COSTS_N_INSNS (1),  /* shift.  */
1079     0,                  /* shift_reg.  */
1080     COSTS_N_INSNS (1),  /* arith_shift.  */
1081     0,                  /* arith_shift_reg.  */
1082     COSTS_N_INSNS (1),  /* log_shift.  */
1083     0,                  /* log_shift_reg.  */
1084     0,                  /* extend.  */
1085     0,                  /* extend_arith.  */
1086     0,                  /* bfi.  */
1087     0,                  /* bfx.  */
1088     0,                  /* clz.  */
1089     0,                  /* rev.  */
1090     0,                  /* non_exec.  */
1091     true                /* non_exec_costs_exec.  */
1092   },
1093   {
1094     /* MULT SImode */
1095     {
1096       COSTS_N_INSNS (1),        /* simple.  */
1097       COSTS_N_INSNS (1),        /* flag_setting.  */
1098       COSTS_N_INSNS (1),        /* extend.  */
1099       COSTS_N_INSNS (1),        /* add.  */
1100       COSTS_N_INSNS (1),        /* extend_add.  */
1101       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1102     },
1103     /* MULT DImode */
1104     {
1105       0,                        /* simple (N/A).  */
1106       0,                        /* flag_setting (N/A).  */
1107       COSTS_N_INSNS (2),        /* extend.  */
1108       0,                        /* add (N/A).  */
1109       COSTS_N_INSNS (2),        /* extend_add.  */
1110       0                         /* idiv (N/A).  */
1111     }
1112   },
1113   /* LD/ST */
1114   {
1115     COSTS_N_INSNS (1),  /* load.  */
1116     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1117     COSTS_N_INSNS (1),  /* ldrd.  */
1118     COSTS_N_INSNS (1),  /* ldm_1st.  */
1119     1,                  /* ldm_regs_per_insn_1st.  */
1120     2,                  /* ldm_regs_per_insn_subsequent.  */
1121     COSTS_N_INSNS (1),  /* loadf.  */
1122     COSTS_N_INSNS (1),  /* loadd.  */
1123     COSTS_N_INSNS (1),  /* load_unaligned.  */
1124     COSTS_N_INSNS (1),  /* store.  */
1125     COSTS_N_INSNS (1),  /* strd.  */
1126     COSTS_N_INSNS (1),  /* stm_1st.  */
1127     1,                  /* stm_regs_per_insn_1st.  */
1128     2,                  /* stm_regs_per_insn_subsequent.  */
1129     COSTS_N_INSNS (1),  /* storef.  */
1130     COSTS_N_INSNS (1),  /* stored.  */
1131     COSTS_N_INSNS (1)   /* store_unaligned.  */
1132   },
1133   {
1134     /* FP SFmode */
1135     {
1136       COSTS_N_INSNS (36),       /* div.  */
1137       COSTS_N_INSNS (11),       /* mult.  */
1138       COSTS_N_INSNS (20),       /* mult_addsub. */
1139       COSTS_N_INSNS (30),       /* fma.  */
1140       COSTS_N_INSNS (9),        /* addsub.  */
1141       COSTS_N_INSNS (3),        /* fpconst.  */
1142       COSTS_N_INSNS (3),        /* neg.  */
1143       COSTS_N_INSNS (6),        /* compare.  */
1144       COSTS_N_INSNS (4),        /* widen.  */
1145       COSTS_N_INSNS (4),        /* narrow.  */
1146       COSTS_N_INSNS (8),        /* toint.  */
1147       COSTS_N_INSNS (8),        /* fromint.  */
1148       COSTS_N_INSNS (8)         /* roundint.  */
1149     },
1150     /* FP DFmode */
1151     {
1152       COSTS_N_INSNS (64),       /* div.  */
1153       COSTS_N_INSNS (16),       /* mult.  */
1154       COSTS_N_INSNS (25),       /* mult_addsub.  */
1155       COSTS_N_INSNS (30),       /* fma.  */
1156       COSTS_N_INSNS (9),        /* addsub.  */
1157       COSTS_N_INSNS (3),        /* fpconst.  */
1158       COSTS_N_INSNS (3),        /* neg.  */
1159       COSTS_N_INSNS (6),        /* compare.  */
1160       COSTS_N_INSNS (6),        /* widen.  */
1161       COSTS_N_INSNS (6),        /* narrow.  */
1162       COSTS_N_INSNS (8),        /* toint.  */
1163       COSTS_N_INSNS (8),        /* fromint.  */
1164       COSTS_N_INSNS (8)         /* roundint.  */
1165     }
1166   },
1167   /* Vector */
1168   {
1169     COSTS_N_INSNS (1)   /* alu.  */
1170   }
1171 };
1172
1173 const struct cpu_cost_table cortexa5_extra_costs =
1174 {
1175   /* ALU */
1176   {
1177     0,                  /* arith.  */
1178     0,                  /* logical.  */
1179     COSTS_N_INSNS (1),  /* shift.  */
1180     COSTS_N_INSNS (1),  /* shift_reg.  */
1181     COSTS_N_INSNS (1),  /* arith_shift.  */
1182     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1183     COSTS_N_INSNS (1),  /* log_shift.  */
1184     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1185     COSTS_N_INSNS (1),  /* extend.  */
1186     COSTS_N_INSNS (1),  /* extend_arith.  */
1187     COSTS_N_INSNS (1),  /* bfi.  */
1188     COSTS_N_INSNS (1),  /* bfx.  */
1189     COSTS_N_INSNS (1),  /* clz.  */
1190     COSTS_N_INSNS (1),  /* rev.  */
1191     0,                  /* non_exec.  */
1192     true                /* non_exec_costs_exec.  */
1193   },
1194
1195   {
1196     /* MULT SImode */
1197     {
1198       0,                        /* simple.  */
1199       COSTS_N_INSNS (1),        /* flag_setting.  */
1200       COSTS_N_INSNS (1),        /* extend.  */
1201       COSTS_N_INSNS (1),        /* add.  */
1202       COSTS_N_INSNS (1),        /* extend_add.  */
1203       COSTS_N_INSNS (7)         /* idiv.  */
1204     },
1205     /* MULT DImode */
1206     {
1207       0,                        /* simple (N/A).  */
1208       0,                        /* flag_setting (N/A).  */
1209       COSTS_N_INSNS (1),        /* extend.  */
1210       0,                        /* add.  */
1211       COSTS_N_INSNS (2),        /* extend_add.  */
1212       0                         /* idiv (N/A).  */
1213     }
1214   },
1215   /* LD/ST */
1216   {
1217     COSTS_N_INSNS (1),  /* load.  */
1218     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1219     COSTS_N_INSNS (6),  /* ldrd.  */
1220     COSTS_N_INSNS (1),  /* ldm_1st.  */
1221     1,                  /* ldm_regs_per_insn_1st.  */
1222     2,                  /* ldm_regs_per_insn_subsequent.  */
1223     COSTS_N_INSNS (2),  /* loadf.  */
1224     COSTS_N_INSNS (4),  /* loadd.  */
1225     COSTS_N_INSNS (1),  /* load_unaligned.  */
1226     COSTS_N_INSNS (1),  /* store.  */
1227     COSTS_N_INSNS (3),  /* strd.  */
1228     COSTS_N_INSNS (1),  /* stm_1st.  */
1229     1,                  /* stm_regs_per_insn_1st.  */
1230     2,                  /* stm_regs_per_insn_subsequent.  */
1231     COSTS_N_INSNS (2),  /* storef.  */
1232     COSTS_N_INSNS (2),  /* stored.  */
1233     COSTS_N_INSNS (1)   /* store_unaligned.  */
1234   },
1235   {
1236     /* FP SFmode */
1237     {
1238       COSTS_N_INSNS (15),       /* div.  */
1239       COSTS_N_INSNS (3),        /* mult.  */
1240       COSTS_N_INSNS (7),        /* mult_addsub. */
1241       COSTS_N_INSNS (7),        /* fma.  */
1242       COSTS_N_INSNS (3),        /* addsub.  */
1243       COSTS_N_INSNS (3),        /* fpconst.  */
1244       COSTS_N_INSNS (3),        /* neg.  */
1245       COSTS_N_INSNS (3),        /* compare.  */
1246       COSTS_N_INSNS (3),        /* widen.  */
1247       COSTS_N_INSNS (3),        /* narrow.  */
1248       COSTS_N_INSNS (3),        /* toint.  */
1249       COSTS_N_INSNS (3),        /* fromint.  */
1250       COSTS_N_INSNS (3)         /* roundint.  */
1251     },
1252     /* FP DFmode */
1253     {
1254       COSTS_N_INSNS (30),       /* div.  */
1255       COSTS_N_INSNS (6),        /* mult.  */
1256       COSTS_N_INSNS (10),       /* mult_addsub.  */
1257       COSTS_N_INSNS (7),        /* fma.  */
1258       COSTS_N_INSNS (3),        /* addsub.  */
1259       COSTS_N_INSNS (3),        /* fpconst.  */
1260       COSTS_N_INSNS (3),        /* neg.  */
1261       COSTS_N_INSNS (3),        /* compare.  */
1262       COSTS_N_INSNS (3),        /* widen.  */
1263       COSTS_N_INSNS (3),        /* narrow.  */
1264       COSTS_N_INSNS (3),        /* toint.  */
1265       COSTS_N_INSNS (3),        /* fromint.  */
1266       COSTS_N_INSNS (3)         /* roundint.  */
1267     }
1268   },
1269   /* Vector */
1270   {
1271     COSTS_N_INSNS (1)   /* alu.  */
1272   }
1273 };
1274
1275
1276 const struct cpu_cost_table cortexa7_extra_costs =
1277 {
1278   /* ALU */
1279   {
1280     0,                  /* arith.  */
1281     0,                  /* logical.  */
1282     COSTS_N_INSNS (1),  /* shift.  */
1283     COSTS_N_INSNS (1),  /* shift_reg.  */
1284     COSTS_N_INSNS (1),  /* arith_shift.  */
1285     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1286     COSTS_N_INSNS (1),  /* log_shift.  */
1287     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1288     COSTS_N_INSNS (1),  /* extend.  */
1289     COSTS_N_INSNS (1),  /* extend_arith.  */
1290     COSTS_N_INSNS (1),  /* bfi.  */
1291     COSTS_N_INSNS (1),  /* bfx.  */
1292     COSTS_N_INSNS (1),  /* clz.  */
1293     COSTS_N_INSNS (1),  /* rev.  */
1294     0,                  /* non_exec.  */
1295     true                /* non_exec_costs_exec.  */
1296   },
1297
1298   {
1299     /* MULT SImode */
1300     {
1301       0,                        /* simple.  */
1302       COSTS_N_INSNS (1),        /* flag_setting.  */
1303       COSTS_N_INSNS (1),        /* extend.  */
1304       COSTS_N_INSNS (1),        /* add.  */
1305       COSTS_N_INSNS (1),        /* extend_add.  */
1306       COSTS_N_INSNS (7)         /* idiv.  */
1307     },
1308     /* MULT DImode */
1309     {
1310       0,                        /* simple (N/A).  */
1311       0,                        /* flag_setting (N/A).  */
1312       COSTS_N_INSNS (1),        /* extend.  */
1313       0,                        /* add.  */
1314       COSTS_N_INSNS (2),        /* extend_add.  */
1315       0                         /* idiv (N/A).  */
1316     }
1317   },
1318   /* LD/ST */
1319   {
1320     COSTS_N_INSNS (1),  /* load.  */
1321     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1322     COSTS_N_INSNS (3),  /* ldrd.  */
1323     COSTS_N_INSNS (1),  /* ldm_1st.  */
1324     1,                  /* ldm_regs_per_insn_1st.  */
1325     2,                  /* ldm_regs_per_insn_subsequent.  */
1326     COSTS_N_INSNS (2),  /* loadf.  */
1327     COSTS_N_INSNS (2),  /* loadd.  */
1328     COSTS_N_INSNS (1),  /* load_unaligned.  */
1329     COSTS_N_INSNS (1),  /* store.  */
1330     COSTS_N_INSNS (3),  /* strd.  */
1331     COSTS_N_INSNS (1),  /* stm_1st.  */
1332     1,                  /* stm_regs_per_insn_1st.  */
1333     2,                  /* stm_regs_per_insn_subsequent.  */
1334     COSTS_N_INSNS (2),  /* storef.  */
1335     COSTS_N_INSNS (2),  /* stored.  */
1336     COSTS_N_INSNS (1)   /* store_unaligned.  */
1337   },
1338   {
1339     /* FP SFmode */
1340     {
1341       COSTS_N_INSNS (15),       /* div.  */
1342       COSTS_N_INSNS (3),        /* mult.  */
1343       COSTS_N_INSNS (7),        /* mult_addsub. */
1344       COSTS_N_INSNS (7),        /* fma.  */
1345       COSTS_N_INSNS (3),        /* addsub.  */
1346       COSTS_N_INSNS (3),        /* fpconst.  */
1347       COSTS_N_INSNS (3),        /* neg.  */
1348       COSTS_N_INSNS (3),        /* compare.  */
1349       COSTS_N_INSNS (3),        /* widen.  */
1350       COSTS_N_INSNS (3),        /* narrow.  */
1351       COSTS_N_INSNS (3),        /* toint.  */
1352       COSTS_N_INSNS (3),        /* fromint.  */
1353       COSTS_N_INSNS (3)         /* roundint.  */
1354     },
1355     /* FP DFmode */
1356     {
1357       COSTS_N_INSNS (30),       /* div.  */
1358       COSTS_N_INSNS (6),        /* mult.  */
1359       COSTS_N_INSNS (10),       /* mult_addsub.  */
1360       COSTS_N_INSNS (7),        /* fma.  */
1361       COSTS_N_INSNS (3),        /* addsub.  */
1362       COSTS_N_INSNS (3),        /* fpconst.  */
1363       COSTS_N_INSNS (3),        /* neg.  */
1364       COSTS_N_INSNS (3),        /* compare.  */
1365       COSTS_N_INSNS (3),        /* widen.  */
1366       COSTS_N_INSNS (3),        /* narrow.  */
1367       COSTS_N_INSNS (3),        /* toint.  */
1368       COSTS_N_INSNS (3),        /* fromint.  */
1369       COSTS_N_INSNS (3)         /* roundint.  */
1370     }
1371   },
1372   /* Vector */
1373   {
1374     COSTS_N_INSNS (1)   /* alu.  */
1375   }
1376 };
1377
1378 const struct cpu_cost_table cortexa12_extra_costs =
1379 {
1380   /* ALU */
1381   {
1382     0,                  /* arith.  */
1383     0,                  /* logical.  */
1384     0,                  /* shift.  */
1385     COSTS_N_INSNS (1),  /* shift_reg.  */
1386     COSTS_N_INSNS (1),  /* arith_shift.  */
1387     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1388     COSTS_N_INSNS (1),  /* log_shift.  */
1389     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1390     0,                  /* extend.  */
1391     COSTS_N_INSNS (1),  /* extend_arith.  */
1392     0,                  /* bfi.  */
1393     COSTS_N_INSNS (1),  /* bfx.  */
1394     COSTS_N_INSNS (1),  /* clz.  */
1395     COSTS_N_INSNS (1),  /* rev.  */
1396     0,                  /* non_exec.  */
1397     true                /* non_exec_costs_exec.  */
1398   },
1399   /* MULT SImode */
1400   {
1401     {
1402       COSTS_N_INSNS (2),        /* simple.  */
1403       COSTS_N_INSNS (3),        /* flag_setting.  */
1404       COSTS_N_INSNS (2),        /* extend.  */
1405       COSTS_N_INSNS (3),        /* add.  */
1406       COSTS_N_INSNS (2),        /* extend_add.  */
1407       COSTS_N_INSNS (18)        /* idiv.  */
1408     },
1409     /* MULT DImode */
1410     {
1411       0,                        /* simple (N/A).  */
1412       0,                        /* flag_setting (N/A).  */
1413       COSTS_N_INSNS (3),        /* extend.  */
1414       0,                        /* add (N/A).  */
1415       COSTS_N_INSNS (3),        /* extend_add.  */
1416       0                         /* idiv (N/A).  */
1417     }
1418   },
1419   /* LD/ST */
1420   {
1421     COSTS_N_INSNS (3),  /* load.  */
1422     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1423     COSTS_N_INSNS (3),  /* ldrd.  */
1424     COSTS_N_INSNS (3),  /* ldm_1st.  */
1425     1,                  /* ldm_regs_per_insn_1st.  */
1426     2,                  /* ldm_regs_per_insn_subsequent.  */
1427     COSTS_N_INSNS (3),  /* loadf.  */
1428     COSTS_N_INSNS (3),  /* loadd.  */
1429     0,                  /* load_unaligned.  */
1430     0,                  /* store.  */
1431     0,                  /* strd.  */
1432     0,                  /* stm_1st.  */
1433     1,                  /* stm_regs_per_insn_1st.  */
1434     2,                  /* stm_regs_per_insn_subsequent.  */
1435     COSTS_N_INSNS (2),  /* storef.  */
1436     COSTS_N_INSNS (2),  /* stored.  */
1437     0                   /* store_unaligned.  */
1438   },
1439   {
1440     /* FP SFmode */
1441     {
1442       COSTS_N_INSNS (17),       /* div.  */
1443       COSTS_N_INSNS (4),        /* mult.  */
1444       COSTS_N_INSNS (8),        /* mult_addsub. */
1445       COSTS_N_INSNS (8),        /* fma.  */
1446       COSTS_N_INSNS (4),        /* addsub.  */
1447       COSTS_N_INSNS (2),        /* fpconst. */
1448       COSTS_N_INSNS (2),        /* neg.  */
1449       COSTS_N_INSNS (2),        /* compare.  */
1450       COSTS_N_INSNS (4),        /* widen.  */
1451       COSTS_N_INSNS (4),        /* narrow.  */
1452       COSTS_N_INSNS (4),        /* toint.  */
1453       COSTS_N_INSNS (4),        /* fromint.  */
1454       COSTS_N_INSNS (4)         /* roundint.  */
1455     },
1456     /* FP DFmode */
1457     {
1458       COSTS_N_INSNS (31),       /* div.  */
1459       COSTS_N_INSNS (4),        /* mult.  */
1460       COSTS_N_INSNS (8),        /* mult_addsub.  */
1461       COSTS_N_INSNS (8),        /* fma.  */
1462       COSTS_N_INSNS (4),        /* addsub.  */
1463       COSTS_N_INSNS (2),        /* fpconst.  */
1464       COSTS_N_INSNS (2),        /* neg.  */
1465       COSTS_N_INSNS (2),        /* compare.  */
1466       COSTS_N_INSNS (4),        /* widen.  */
1467       COSTS_N_INSNS (4),        /* narrow.  */
1468       COSTS_N_INSNS (4),        /* toint.  */
1469       COSTS_N_INSNS (4),        /* fromint.  */
1470       COSTS_N_INSNS (4)         /* roundint.  */
1471     }
1472   },
1473   /* Vector */
1474   {
1475     COSTS_N_INSNS (1)   /* alu.  */
1476   }
1477 };
1478
1479 const struct cpu_cost_table cortexa15_extra_costs =
1480 {
1481   /* ALU */
1482   {
1483     0,                  /* arith.  */
1484     0,                  /* logical.  */
1485     0,                  /* shift.  */
1486     0,                  /* shift_reg.  */
1487     COSTS_N_INSNS (1),  /* arith_shift.  */
1488     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1489     COSTS_N_INSNS (1),  /* log_shift.  */
1490     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1491     0,                  /* extend.  */
1492     COSTS_N_INSNS (1),  /* extend_arith.  */
1493     COSTS_N_INSNS (1),  /* bfi.  */
1494     0,                  /* bfx.  */
1495     0,                  /* clz.  */
1496     0,                  /* rev.  */
1497     0,                  /* non_exec.  */
1498     true                /* non_exec_costs_exec.  */
1499   },
1500   /* MULT SImode */
1501   {
1502     {
1503       COSTS_N_INSNS (2),        /* simple.  */
1504       COSTS_N_INSNS (3),        /* flag_setting.  */
1505       COSTS_N_INSNS (2),        /* extend.  */
1506       COSTS_N_INSNS (2),        /* add.  */
1507       COSTS_N_INSNS (2),        /* extend_add.  */
1508       COSTS_N_INSNS (18)        /* idiv.  */
1509     },
1510     /* MULT DImode */
1511     {
1512       0,                        /* simple (N/A).  */
1513       0,                        /* flag_setting (N/A).  */
1514       COSTS_N_INSNS (3),        /* extend.  */
1515       0,                        /* add (N/A).  */
1516       COSTS_N_INSNS (3),        /* extend_add.  */
1517       0                         /* idiv (N/A).  */
1518     }
1519   },
1520   /* LD/ST */
1521   {
1522     COSTS_N_INSNS (3),  /* load.  */
1523     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1524     COSTS_N_INSNS (3),  /* ldrd.  */
1525     COSTS_N_INSNS (4),  /* ldm_1st.  */
1526     1,                  /* ldm_regs_per_insn_1st.  */
1527     2,                  /* ldm_regs_per_insn_subsequent.  */
1528     COSTS_N_INSNS (4),  /* loadf.  */
1529     COSTS_N_INSNS (4),  /* loadd.  */
1530     0,                  /* load_unaligned.  */
1531     0,                  /* store.  */
1532     0,                  /* strd.  */
1533     COSTS_N_INSNS (1),  /* stm_1st.  */
1534     1,                  /* stm_regs_per_insn_1st.  */
1535     2,                  /* stm_regs_per_insn_subsequent.  */
1536     0,                  /* storef.  */
1537     0,                  /* stored.  */
1538     0                   /* store_unaligned.  */
1539   },
1540   {
1541     /* FP SFmode */
1542     {
1543       COSTS_N_INSNS (17),       /* div.  */
1544       COSTS_N_INSNS (4),        /* mult.  */
1545       COSTS_N_INSNS (8),        /* mult_addsub. */
1546       COSTS_N_INSNS (8),        /* fma.  */
1547       COSTS_N_INSNS (4),        /* addsub.  */
1548       COSTS_N_INSNS (2),        /* fpconst. */
1549       COSTS_N_INSNS (2),        /* neg.  */
1550       COSTS_N_INSNS (5),        /* compare.  */
1551       COSTS_N_INSNS (4),        /* widen.  */
1552       COSTS_N_INSNS (4),        /* narrow.  */
1553       COSTS_N_INSNS (4),        /* toint.  */
1554       COSTS_N_INSNS (4),        /* fromint.  */
1555       COSTS_N_INSNS (4)         /* roundint.  */
1556     },
1557     /* FP DFmode */
1558     {
1559       COSTS_N_INSNS (31),       /* div.  */
1560       COSTS_N_INSNS (4),        /* mult.  */
1561       COSTS_N_INSNS (8),        /* mult_addsub.  */
1562       COSTS_N_INSNS (8),        /* fma.  */
1563       COSTS_N_INSNS (4),        /* addsub.  */
1564       COSTS_N_INSNS (2),        /* fpconst.  */
1565       COSTS_N_INSNS (2),        /* neg.  */
1566       COSTS_N_INSNS (2),        /* compare.  */
1567       COSTS_N_INSNS (4),        /* widen.  */
1568       COSTS_N_INSNS (4),        /* narrow.  */
1569       COSTS_N_INSNS (4),        /* toint.  */
1570       COSTS_N_INSNS (4),        /* fromint.  */
1571       COSTS_N_INSNS (4)         /* roundint.  */
1572     }
1573   },
1574   /* Vector */
1575   {
1576     COSTS_N_INSNS (1)   /* alu.  */
1577   }
1578 };
1579
1580 const struct cpu_cost_table v7m_extra_costs =
1581 {
1582   /* ALU */
1583   {
1584     0,                  /* arith.  */
1585     0,                  /* logical.  */
1586     0,                  /* shift.  */
1587     0,                  /* shift_reg.  */
1588     0,                  /* arith_shift.  */
1589     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1590     0,                  /* log_shift.  */
1591     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1592     0,                  /* extend.  */
1593     COSTS_N_INSNS (1),  /* extend_arith.  */
1594     0,                  /* bfi.  */
1595     0,                  /* bfx.  */
1596     0,                  /* clz.  */
1597     0,                  /* rev.  */
1598     COSTS_N_INSNS (1),  /* non_exec.  */
1599     false               /* non_exec_costs_exec.  */
1600   },
1601   {
1602     /* MULT SImode */
1603     {
1604       COSTS_N_INSNS (1),        /* simple.  */
1605       COSTS_N_INSNS (1),        /* flag_setting.  */
1606       COSTS_N_INSNS (2),        /* extend.  */
1607       COSTS_N_INSNS (1),        /* add.  */
1608       COSTS_N_INSNS (3),        /* extend_add.  */
1609       COSTS_N_INSNS (8)         /* idiv.  */
1610     },
1611     /* MULT DImode */
1612     {
1613       0,                        /* simple (N/A).  */
1614       0,                        /* flag_setting (N/A).  */
1615       COSTS_N_INSNS (2),        /* extend.  */
1616       0,                        /* add (N/A).  */
1617       COSTS_N_INSNS (3),        /* extend_add.  */
1618       0                         /* idiv (N/A).  */
1619     }
1620   },
1621   /* LD/ST */
1622   {
1623     COSTS_N_INSNS (2),  /* load.  */
1624     0,                  /* load_sign_extend.  */
1625     COSTS_N_INSNS (3),  /* ldrd.  */
1626     COSTS_N_INSNS (2),  /* ldm_1st.  */
1627     1,                  /* ldm_regs_per_insn_1st.  */
1628     1,                  /* ldm_regs_per_insn_subsequent.  */
1629     COSTS_N_INSNS (2),  /* loadf.  */
1630     COSTS_N_INSNS (3),  /* loadd.  */
1631     COSTS_N_INSNS (1),  /* load_unaligned.  */
1632     COSTS_N_INSNS (2),  /* store.  */
1633     COSTS_N_INSNS (3),  /* strd.  */
1634     COSTS_N_INSNS (2),  /* stm_1st.  */
1635     1,                  /* stm_regs_per_insn_1st.  */
1636     1,                  /* stm_regs_per_insn_subsequent.  */
1637     COSTS_N_INSNS (2),  /* storef.  */
1638     COSTS_N_INSNS (3),  /* stored.  */
1639     COSTS_N_INSNS (1)  /* store_unaligned.  */
1640   },
1641   {
1642     /* FP SFmode */
1643     {
1644       COSTS_N_INSNS (7),        /* div.  */
1645       COSTS_N_INSNS (2),        /* mult.  */
1646       COSTS_N_INSNS (5),        /* mult_addsub.  */
1647       COSTS_N_INSNS (3),        /* fma.  */
1648       COSTS_N_INSNS (1),        /* addsub.  */
1649       0,                        /* fpconst.  */
1650       0,                        /* neg.  */
1651       0,                        /* compare.  */
1652       0,                        /* widen.  */
1653       0,                        /* narrow.  */
1654       0,                        /* toint.  */
1655       0,                        /* fromint.  */
1656       0                         /* roundint.  */
1657     },
1658     /* FP DFmode */
1659     {
1660       COSTS_N_INSNS (15),       /* div.  */
1661       COSTS_N_INSNS (5),        /* mult.  */
1662       COSTS_N_INSNS (7),        /* mult_addsub.  */
1663       COSTS_N_INSNS (7),        /* fma.  */
1664       COSTS_N_INSNS (3),        /* addsub.  */
1665       0,                        /* fpconst.  */
1666       0,                        /* neg.  */
1667       0,                        /* compare.  */
1668       0,                        /* widen.  */
1669       0,                        /* narrow.  */
1670       0,                        /* toint.  */
1671       0,                        /* fromint.  */
1672       0                         /* roundint.  */
1673     }
1674   },
1675   /* Vector */
1676   {
1677     COSTS_N_INSNS (1)   /* alu.  */
1678   }
1679 };
1680
1681 #define ARM_FUSE_NOTHING        (0)
1682 #define ARM_FUSE_MOVW_MOVT      (1 << 0)
1683
1684 const struct tune_params arm_slowmul_tune =
1685 {
1686   arm_slowmul_rtx_costs,
1687   NULL,
1688   NULL,                                         /* Sched adj cost.  */
1689   3,                                            /* Constant limit.  */
1690   5,                                            /* Max cond insns.  */
1691   ARM_PREFETCH_NOT_BENEFICIAL,
1692   true,                                         /* Prefer constant pool.  */
1693   arm_default_branch_cost,
1694   false,                                        /* Prefer LDRD/STRD.  */
1695   {true, true},                                 /* Prefer non short circuit.  */
1696   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1697   false,                                        /* Prefer Neon for 64-bits bitops.  */
1698   false, false,                                 /* Prefer 32-bit encodings.  */
1699   false,                                        /* Prefer Neon for stringops.  */
1700   8,                                            /* Maximum insns to inline memset.  */
1701   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1702   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
1703 };
1704
1705 const struct tune_params arm_fastmul_tune =
1706 {
1707   arm_fastmul_rtx_costs,
1708   NULL,
1709   NULL,                                         /* Sched adj cost.  */
1710   1,                                            /* Constant limit.  */
1711   5,                                            /* Max cond insns.  */
1712   ARM_PREFETCH_NOT_BENEFICIAL,
1713   true,                                         /* Prefer constant pool.  */
1714   arm_default_branch_cost,
1715   false,                                        /* Prefer LDRD/STRD.  */
1716   {true, true},                                 /* Prefer non short circuit.  */
1717   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1718   false,                                        /* Prefer Neon for 64-bits bitops.  */
1719   false, false,                                 /* Prefer 32-bit encodings.  */
1720   false,                                        /* Prefer Neon for stringops.  */
1721   8,                                            /* Maximum insns to inline memset.  */
1722   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1723   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
1724 };
1725
1726 /* StrongARM has early execution of branches, so a sequence that is worth
1727    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1728
1729 const struct tune_params arm_strongarm_tune =
1730 {
1731   arm_fastmul_rtx_costs,
1732   NULL,
1733   NULL,                                         /* Sched adj cost.  */
1734   1,                                            /* Constant limit.  */
1735   3,                                            /* Max cond insns.  */
1736   ARM_PREFETCH_NOT_BENEFICIAL,
1737   true,                                         /* Prefer constant pool.  */
1738   arm_default_branch_cost,
1739   false,                                        /* Prefer LDRD/STRD.  */
1740   {true, true},                                 /* Prefer non short circuit.  */
1741   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1742   false,                                        /* Prefer Neon for 64-bits bitops.  */
1743   false, false,                                 /* Prefer 32-bit encodings.  */
1744   false,                                        /* Prefer Neon for stringops.  */
1745   8,                                            /* Maximum insns to inline memset.  */
1746   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1747   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
1748 };
1749
1750 const struct tune_params arm_xscale_tune =
1751 {
1752   arm_xscale_rtx_costs,
1753   NULL,
1754   xscale_sched_adjust_cost,
1755   2,                                            /* Constant limit.  */
1756   3,                                            /* Max cond insns.  */
1757   ARM_PREFETCH_NOT_BENEFICIAL,
1758   true,                                         /* Prefer constant pool.  */
1759   arm_default_branch_cost,
1760   false,                                        /* Prefer LDRD/STRD.  */
1761   {true, true},                                 /* Prefer non short circuit.  */
1762   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1763   false,                                        /* Prefer Neon for 64-bits bitops.  */
1764   false, false,                                 /* Prefer 32-bit encodings.  */
1765   false,                                        /* Prefer Neon for stringops.  */
1766   8,                                            /* Maximum insns to inline memset.  */
1767   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1768   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
1769 };
1770
1771 const struct tune_params arm_9e_tune =
1772 {
1773   arm_9e_rtx_costs,
1774   NULL,
1775   NULL,                                         /* Sched adj cost.  */
1776   1,                                            /* Constant limit.  */
1777   5,                                            /* Max cond insns.  */
1778   ARM_PREFETCH_NOT_BENEFICIAL,
1779   true,                                         /* Prefer constant pool.  */
1780   arm_default_branch_cost,
1781   false,                                        /* Prefer LDRD/STRD.  */
1782   {true, true},                                 /* Prefer non short circuit.  */
1783   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1784   false,                                        /* Prefer Neon for 64-bits bitops.  */
1785   false, false,                                 /* Prefer 32-bit encodings.  */
1786   false,                                        /* Prefer Neon for stringops.  */
1787   8,                                            /* Maximum insns to inline memset.  */
1788   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1789   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
1790 };
1791
1792 const struct tune_params arm_v6t2_tune =
1793 {
1794   arm_9e_rtx_costs,
1795   NULL,
1796   NULL,                                         /* Sched adj cost.  */
1797   1,                                            /* Constant limit.  */
1798   5,                                            /* Max cond insns.  */
1799   ARM_PREFETCH_NOT_BENEFICIAL,
1800   false,                                        /* Prefer constant pool.  */
1801   arm_default_branch_cost,
1802   false,                                        /* Prefer LDRD/STRD.  */
1803   {true, true},                                 /* Prefer non short circuit.  */
1804   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1805   false,                                        /* Prefer Neon for 64-bits bitops.  */
1806   false, false,                                 /* Prefer 32-bit encodings.  */
1807   false,                                        /* Prefer Neon for stringops.  */
1808   8,                                            /* Maximum insns to inline memset.  */
1809   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1810   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
1811 };
1812
1813 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1814 const struct tune_params arm_cortex_tune =
1815 {
1816   arm_9e_rtx_costs,
1817   &generic_extra_costs,
1818   NULL,                                         /* Sched adj cost.  */
1819   1,                                            /* Constant limit.  */
1820   5,                                            /* Max cond insns.  */
1821   ARM_PREFETCH_NOT_BENEFICIAL,
1822   false,                                        /* Prefer constant pool.  */
1823   arm_default_branch_cost,
1824   false,                                        /* Prefer LDRD/STRD.  */
1825   {true, true},                                 /* Prefer non short circuit.  */
1826   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1827   false,                                        /* Prefer Neon for 64-bits bitops.  */
1828   false, false,                                 /* Prefer 32-bit encodings.  */
1829   false,                                        /* Prefer Neon for stringops.  */
1830   8,                                            /* Maximum insns to inline memset.  */
1831   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1832   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
1833 };
1834
1835 const struct tune_params arm_cortex_a8_tune =
1836 {
1837   arm_9e_rtx_costs,
1838   &cortexa8_extra_costs,
1839   NULL,                                         /* Sched adj cost.  */
1840   1,                                            /* Constant limit.  */
1841   5,                                            /* Max cond insns.  */
1842   ARM_PREFETCH_NOT_BENEFICIAL,
1843   false,                                        /* Prefer constant pool.  */
1844   arm_default_branch_cost,
1845   false,                                        /* Prefer LDRD/STRD.  */
1846   {true, true},                                 /* Prefer non short circuit.  */
1847   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1848   false,                                        /* Prefer Neon for 64-bits bitops.  */
1849   false, false,                                 /* Prefer 32-bit encodings.  */
1850   true,                                         /* Prefer Neon for stringops.  */
1851   8,                                            /* Maximum insns to inline memset.  */
1852   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1853   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
1854 };
1855
1856 const struct tune_params arm_cortex_a7_tune =
1857 {
1858   arm_9e_rtx_costs,
1859   &cortexa7_extra_costs,
1860   NULL,
1861   1,                                            /* Constant limit.  */
1862   5,                                            /* Max cond insns.  */
1863   ARM_PREFETCH_NOT_BENEFICIAL,
1864   false,                                        /* Prefer constant pool.  */
1865   arm_default_branch_cost,
1866   false,                                        /* Prefer LDRD/STRD.  */
1867   {true, true},                                 /* Prefer non short circuit.  */
1868   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1869   false,                                        /* Prefer Neon for 64-bits bitops.  */
1870   false, false,                                 /* Prefer 32-bit encodings.  */
1871   true,                                         /* Prefer Neon for stringops.  */
1872   8,                                            /* Maximum insns to inline memset.  */
1873   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1874   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
1875 };
1876
1877 const struct tune_params arm_cortex_a15_tune =
1878 {
1879   arm_9e_rtx_costs,
1880   &cortexa15_extra_costs,
1881   NULL,                                         /* Sched adj cost.  */
1882   1,                                            /* Constant limit.  */
1883   2,                                            /* Max cond insns.  */
1884   ARM_PREFETCH_NOT_BENEFICIAL,
1885   false,                                        /* Prefer constant pool.  */
1886   arm_default_branch_cost,
1887   true,                                         /* Prefer LDRD/STRD.  */
1888   {true, true},                                 /* Prefer non short circuit.  */
1889   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1890   false,                                        /* Prefer Neon for 64-bits bitops.  */
1891   true, true,                                   /* Prefer 32-bit encodings.  */
1892   true,                                         /* Prefer Neon for stringops.  */
1893   8,                                            /* Maximum insns to inline memset.  */
1894   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1895   ARM_SCHED_AUTOPREF_FULL                       /* Sched L2 autopref.  */
1896 };
1897
1898 const struct tune_params arm_cortex_a53_tune =
1899 {
1900   arm_9e_rtx_costs,
1901   &cortexa53_extra_costs,
1902   NULL,                                         /* Scheduler cost adjustment.  */
1903   1,                                            /* Constant limit.  */
1904   5,                                            /* Max cond insns.  */
1905   ARM_PREFETCH_NOT_BENEFICIAL,
1906   false,                                        /* Prefer constant pool.  */
1907   arm_default_branch_cost,
1908   false,                                        /* Prefer LDRD/STRD.  */
1909   {true, true},                                 /* Prefer non short circuit.  */
1910   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1911   false,                                        /* Prefer Neon for 64-bits bitops.  */
1912   false, false,                                 /* Prefer 32-bit encodings.  */
1913   true,                                         /* Prefer Neon for stringops.  */
1914   8,                                            /* Maximum insns to inline memset.  */
1915   ARM_FUSE_MOVW_MOVT,                           /* Fuseable pairs of instructions.  */
1916   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
1917 };
1918
1919 const struct tune_params arm_cortex_a57_tune =
1920 {
1921   arm_9e_rtx_costs,
1922   &cortexa57_extra_costs,
1923   NULL,                                         /* Scheduler cost adjustment.  */
1924   1,                                           /* Constant limit.  */
1925   2,                                           /* Max cond insns.  */
1926   ARM_PREFETCH_NOT_BENEFICIAL,
1927   false,                                       /* Prefer constant pool.  */
1928   arm_default_branch_cost,
1929   true,                                       /* Prefer LDRD/STRD.  */
1930   {true, true},                                /* Prefer non short circuit.  */
1931   &arm_default_vec_cost,                       /* Vectorizer costs.  */
1932   false,                                       /* Prefer Neon for 64-bits bitops.  */
1933   true, true,                                  /* Prefer 32-bit encodings.  */
1934   true,                                         /* Prefer Neon for stringops.  */
1935   8,                                            /* Maximum insns to inline memset.  */
1936   ARM_FUSE_MOVW_MOVT,                           /* Fuseable pairs of instructions.  */
1937   ARM_SCHED_AUTOPREF_FULL                       /* Sched L2 autopref.  */
1938 };
1939
1940 const struct tune_params arm_xgene1_tune =
1941 {
1942   arm_9e_rtx_costs,
1943   &xgene1_extra_costs,
1944   NULL,                                        /* Scheduler cost adjustment.  */
1945   1,                                           /* Constant limit.  */
1946   2,                                           /* Max cond insns.  */
1947   ARM_PREFETCH_NOT_BENEFICIAL,
1948   false,                                       /* Prefer constant pool.  */
1949   arm_default_branch_cost,
1950   true,                                        /* Prefer LDRD/STRD.  */
1951   {true, true},                                /* Prefer non short circuit.  */
1952   &arm_default_vec_cost,                       /* Vectorizer costs.  */
1953   false,                                       /* Prefer Neon for 64-bits bitops.  */
1954   true, true,                                  /* Prefer 32-bit encodings.  */
1955   false,                                       /* Prefer Neon for stringops.  */
1956   32,                                          /* Maximum insns to inline memset.  */
1957   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1958   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
1959 };
1960
1961 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1962    less appealing.  Set max_insns_skipped to a low value.  */
1963
1964 const struct tune_params arm_cortex_a5_tune =
1965 {
1966   arm_9e_rtx_costs,
1967   &cortexa5_extra_costs,
1968   NULL,                                         /* Sched adj cost.  */
1969   1,                                            /* Constant limit.  */
1970   1,                                            /* Max cond insns.  */
1971   ARM_PREFETCH_NOT_BENEFICIAL,
1972   false,                                        /* Prefer constant pool.  */
1973   arm_cortex_a5_branch_cost,
1974   false,                                        /* Prefer LDRD/STRD.  */
1975   {false, false},                               /* Prefer non short circuit.  */
1976   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1977   false,                                        /* Prefer Neon for 64-bits bitops.  */
1978   false, false,                                 /* Prefer 32-bit encodings.  */
1979   true,                                         /* Prefer Neon for stringops.  */
1980   8,                                            /* Maximum insns to inline memset.  */
1981   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1982   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
1983 };
1984
1985 const struct tune_params arm_cortex_a9_tune =
1986 {
1987   arm_9e_rtx_costs,
1988   &cortexa9_extra_costs,
1989   cortex_a9_sched_adjust_cost,
1990   1,                                            /* Constant limit.  */
1991   5,                                            /* Max cond insns.  */
1992   ARM_PREFETCH_BENEFICIAL(4,32,32),
1993   false,                                        /* Prefer constant pool.  */
1994   arm_default_branch_cost,
1995   false,                                        /* Prefer LDRD/STRD.  */
1996   {true, true},                                 /* Prefer non short circuit.  */
1997   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1998   false,                                        /* Prefer Neon for 64-bits bitops.  */
1999   false, false,                                 /* Prefer 32-bit encodings.  */
2000   false,                                        /* Prefer Neon for stringops.  */
2001   8,                                            /* Maximum insns to inline memset.  */
2002   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
2003   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
2004 };
2005
2006 const struct tune_params arm_cortex_a12_tune =
2007 {
2008   arm_9e_rtx_costs,
2009   &cortexa12_extra_costs,
2010   NULL,                                         /* Sched adj cost.  */
2011   1,                                            /* Constant limit.  */
2012   2,                                            /* Max cond insns.  */
2013   ARM_PREFETCH_NOT_BENEFICIAL,
2014   false,                                        /* Prefer constant pool.  */
2015   arm_default_branch_cost,
2016   true,                                         /* Prefer LDRD/STRD.  */
2017   {true, true},                                 /* Prefer non short circuit.  */
2018   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2019   false,                                        /* Prefer Neon for 64-bits bitops.  */
2020   true, true,                                   /* Prefer 32-bit encodings.  */
2021   true,                                         /* Prefer Neon for stringops.  */
2022   8,                                            /* Maximum insns to inline memset.  */
2023   ARM_FUSE_MOVW_MOVT,                           /* Fuseable pairs of instructions.  */
2024   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
2025 };
2026
2027 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2028    cycle to execute each.  An LDR from the constant pool also takes two cycles
2029    to execute, but mildly increases pipelining opportunity (consecutive
2030    loads/stores can be pipelined together, saving one cycle), and may also
2031    improve icache utilisation.  Hence we prefer the constant pool for such
2032    processors.  */
2033
2034 const struct tune_params arm_v7m_tune =
2035 {
2036   arm_9e_rtx_costs,
2037   &v7m_extra_costs,
2038   NULL,                                         /* Sched adj cost.  */
2039   1,                                            /* Constant limit.  */
2040   2,                                            /* Max cond insns.  */
2041   ARM_PREFETCH_NOT_BENEFICIAL,
2042   true,                                         /* Prefer constant pool.  */
2043   arm_cortex_m_branch_cost,
2044   false,                                        /* Prefer LDRD/STRD.  */
2045   {false, false},                               /* Prefer non short circuit.  */
2046   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2047   false,                                        /* Prefer Neon for 64-bits bitops.  */
2048   false, false,                                 /* Prefer 32-bit encodings.  */
2049   false,                                        /* Prefer Neon for stringops.  */
2050   8,                                            /* Maximum insns to inline memset.  */
2051   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
2052   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
2053 };
2054
2055 /* Cortex-M7 tuning.  */
2056
2057 const struct tune_params arm_cortex_m7_tune =
2058 {
2059   arm_9e_rtx_costs,
2060   &v7m_extra_costs,
2061   NULL,                                         /* Sched adj cost.  */
2062   0,                                            /* Constant limit.  */
2063   1,                                            /* Max cond insns.  */
2064   ARM_PREFETCH_NOT_BENEFICIAL,
2065   true,                                         /* Prefer constant pool.  */
2066   arm_cortex_m7_branch_cost,
2067   false,                                        /* Prefer LDRD/STRD.  */
2068   {true, true},                                 /* Prefer non short circuit.  */
2069   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2070   false,                                        /* Prefer Neon for 64-bits bitops.  */
2071   false, false,                                 /* Prefer 32-bit encodings.  */
2072   false,                                        /* Prefer Neon for stringops.  */
2073   8,                                            /* Maximum insns to inline memset.  */
2074   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
2075   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
2076 };
2077
2078 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2079    arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
2080 const struct tune_params arm_v6m_tune =
2081 {
2082   arm_9e_rtx_costs,
2083   NULL,
2084   NULL,                                         /* Sched adj cost.  */
2085   1,                                            /* Constant limit.  */
2086   5,                                            /* Max cond insns.  */
2087   ARM_PREFETCH_NOT_BENEFICIAL,
2088   false,                                        /* Prefer constant pool.  */
2089   arm_default_branch_cost,
2090   false,                                        /* Prefer LDRD/STRD.  */
2091   {false, false},                               /* Prefer non short circuit.  */
2092   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2093   false,                                        /* Prefer Neon for 64-bits bitops.  */
2094   false, false,                                 /* Prefer 32-bit encodings.  */
2095   false,                                        /* Prefer Neon for stringops.  */
2096   8,                                            /* Maximum insns to inline memset.  */
2097   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
2098   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
2099 };
2100
2101 const struct tune_params arm_fa726te_tune =
2102 {
2103   arm_9e_rtx_costs,
2104   NULL,
2105   fa726te_sched_adjust_cost,
2106   1,                                            /* Constant limit.  */
2107   5,                                            /* Max cond insns.  */
2108   ARM_PREFETCH_NOT_BENEFICIAL,
2109   true,                                         /* Prefer constant pool.  */
2110   arm_default_branch_cost,
2111   false,                                        /* Prefer LDRD/STRD.  */
2112   {true, true},                                 /* Prefer non short circuit.  */
2113   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2114   false,                                        /* Prefer Neon for 64-bits bitops.  */
2115   false, false,                                 /* Prefer 32-bit encodings.  */
2116   false,                                        /* Prefer Neon for stringops.  */
2117   8,                                            /* Maximum insns to inline memset.  */
2118   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
2119   ARM_SCHED_AUTOPREF_OFF                        /* Sched L2 autopref.  */
2120 };
2121
2122
2123 /* Not all of these give usefully different compilation alternatives,
2124    but there is no simple way of generalizing them.  */
2125 static const struct processors all_cores[] =
2126 {
2127   /* ARM Cores */
2128 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2129   {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH,          \
2130     FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2131 #include "arm-cores.def"
2132 #undef ARM_CORE
2133   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2134 };
2135
2136 static const struct processors all_architectures[] =
2137 {
2138   /* ARM Architectures */
2139   /* We don't specify tuning costs here as it will be figured out
2140      from the core.  */
2141
2142 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2143   {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2144 #include "arm-arches.def"
2145 #undef ARM_ARCH
2146   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2147 };
2148
2149
2150 /* These are populated as commandline arguments are processed, or NULL
2151    if not specified.  */
2152 static const struct processors *arm_selected_arch;
2153 static const struct processors *arm_selected_cpu;
2154 static const struct processors *arm_selected_tune;
2155
2156 /* The name of the preprocessor macro to define for this architecture.  */
2157
2158 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2159
2160 /* Available values for -mfpu=.  */
2161
2162 static const struct arm_fpu_desc all_fpus[] =
2163 {
2164 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2165   { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2166 #include "arm-fpus.def"
2167 #undef ARM_FPU
2168 };
2169
2170
2171 /* Supported TLS relocations.  */
2172
2173 enum tls_reloc {
2174   TLS_GD32,
2175   TLS_LDM32,
2176   TLS_LDO32,
2177   TLS_IE32,
2178   TLS_LE32,
2179   TLS_DESCSEQ   /* GNU scheme */
2180 };
2181
2182 /* The maximum number of insns to be used when loading a constant.  */
2183 inline static int
2184 arm_constant_limit (bool size_p)
2185 {
2186   return size_p ? 1 : current_tune->constant_limit;
2187 }
2188
2189 /* Emit an insn that's a simple single-set.  Both the operands must be known
2190    to be valid.  */
2191 inline static rtx_insn *
2192 emit_set_insn (rtx x, rtx y)
2193 {
2194   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2195 }
2196
2197 /* Return the number of bits set in VALUE.  */
2198 static unsigned
2199 bit_count (unsigned long value)
2200 {
2201   unsigned long count = 0;
2202
2203   while (value)
2204     {
2205       count++;
2206       value &= value - 1;  /* Clear the least-significant set bit.  */
2207     }
2208
2209   return count;
2210 }
2211
2212 typedef struct
2213 {
2214   machine_mode mode;
2215   const char *name;
2216 } arm_fixed_mode_set;
2217
2218 /* A small helper for setting fixed-point library libfuncs.  */
2219
2220 static void
2221 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2222                              const char *funcname, const char *modename,
2223                              int num_suffix)
2224 {
2225   char buffer[50];
2226
2227   if (num_suffix == 0)
2228     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2229   else
2230     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2231
2232   set_optab_libfunc (optable, mode, buffer);
2233 }
2234
2235 static void
2236 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2237                             machine_mode from, const char *funcname,
2238                             const char *toname, const char *fromname)
2239 {
2240   char buffer[50];
2241   const char *maybe_suffix_2 = "";
2242
2243   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2244   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2245       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2246       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2247     maybe_suffix_2 = "2";
2248
2249   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2250            maybe_suffix_2);
2251
2252   set_conv_libfunc (optable, to, from, buffer);
2253 }
2254
2255 /* Set up library functions unique to ARM.  */
2256
2257 static void
2258 arm_init_libfuncs (void)
2259 {
2260   /* For Linux, we have access to kernel support for atomic operations.  */
2261   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2262     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2263
2264   /* There are no special library functions unless we are using the
2265      ARM BPABI.  */
2266   if (!TARGET_BPABI)
2267     return;
2268
2269   /* The functions below are described in Section 4 of the "Run-Time
2270      ABI for the ARM architecture", Version 1.0.  */
2271
2272   /* Double-precision floating-point arithmetic.  Table 2.  */
2273   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2274   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2275   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2276   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2277   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2278
2279   /* Double-precision comparisons.  Table 3.  */
2280   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2281   set_optab_libfunc (ne_optab, DFmode, NULL);
2282   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2283   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2284   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2285   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2286   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2287
2288   /* Single-precision floating-point arithmetic.  Table 4.  */
2289   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2290   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2291   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2292   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2293   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2294
2295   /* Single-precision comparisons.  Table 5.  */
2296   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2297   set_optab_libfunc (ne_optab, SFmode, NULL);
2298   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2299   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2300   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2301   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2302   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2303
2304   /* Floating-point to integer conversions.  Table 6.  */
2305   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2306   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2307   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2308   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2309   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2310   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2311   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2312   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2313
2314   /* Conversions between floating types.  Table 7.  */
2315   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2316   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2317
2318   /* Integer to floating-point conversions.  Table 8.  */
2319   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2320   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2321   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2322   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2323   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2324   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2325   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2326   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2327
2328   /* Long long.  Table 9.  */
2329   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2330   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2331   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2332   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2333   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2334   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2335   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2336   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2337
2338   /* Integer (32/32->32) division.  \S 4.3.1.  */
2339   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2340   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2341
2342   /* The divmod functions are designed so that they can be used for
2343      plain division, even though they return both the quotient and the
2344      remainder.  The quotient is returned in the usual location (i.e.,
2345      r0 for SImode, {r0, r1} for DImode), just as would be expected
2346      for an ordinary division routine.  Because the AAPCS calling
2347      conventions specify that all of { r0, r1, r2, r3 } are
2348      callee-saved registers, there is no need to tell the compiler
2349      explicitly that those registers are clobbered by these
2350      routines.  */
2351   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2352   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2353
2354   /* For SImode division the ABI provides div-without-mod routines,
2355      which are faster.  */
2356   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2357   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2358
2359   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2360      divmod libcalls instead.  */
2361   set_optab_libfunc (smod_optab, DImode, NULL);
2362   set_optab_libfunc (umod_optab, DImode, NULL);
2363   set_optab_libfunc (smod_optab, SImode, NULL);
2364   set_optab_libfunc (umod_optab, SImode, NULL);
2365
2366   /* Half-precision float operations.  The compiler handles all operations
2367      with NULL libfuncs by converting the SFmode.  */
2368   switch (arm_fp16_format)
2369     {
2370     case ARM_FP16_FORMAT_IEEE:
2371     case ARM_FP16_FORMAT_ALTERNATIVE:
2372
2373       /* Conversions.  */
2374       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2375                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2376                          ? "__gnu_f2h_ieee"
2377                          : "__gnu_f2h_alternative"));
2378       set_conv_libfunc (sext_optab, SFmode, HFmode,
2379                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2380                          ? "__gnu_h2f_ieee"
2381                          : "__gnu_h2f_alternative"));
2382
2383       /* Arithmetic.  */
2384       set_optab_libfunc (add_optab, HFmode, NULL);
2385       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2386       set_optab_libfunc (smul_optab, HFmode, NULL);
2387       set_optab_libfunc (neg_optab, HFmode, NULL);
2388       set_optab_libfunc (sub_optab, HFmode, NULL);
2389
2390       /* Comparisons.  */
2391       set_optab_libfunc (eq_optab, HFmode, NULL);
2392       set_optab_libfunc (ne_optab, HFmode, NULL);
2393       set_optab_libfunc (lt_optab, HFmode, NULL);
2394       set_optab_libfunc (le_optab, HFmode, NULL);
2395       set_optab_libfunc (ge_optab, HFmode, NULL);
2396       set_optab_libfunc (gt_optab, HFmode, NULL);
2397       set_optab_libfunc (unord_optab, HFmode, NULL);
2398       break;
2399
2400     default:
2401       break;
2402     }
2403
2404   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2405   {
2406     const arm_fixed_mode_set fixed_arith_modes[] =
2407       {
2408         { QQmode, "qq" },
2409         { UQQmode, "uqq" },
2410         { HQmode, "hq" },
2411         { UHQmode, "uhq" },
2412         { SQmode, "sq" },
2413         { USQmode, "usq" },
2414         { DQmode, "dq" },
2415         { UDQmode, "udq" },
2416         { TQmode, "tq" },
2417         { UTQmode, "utq" },
2418         { HAmode, "ha" },
2419         { UHAmode, "uha" },
2420         { SAmode, "sa" },
2421         { USAmode, "usa" },
2422         { DAmode, "da" },
2423         { UDAmode, "uda" },
2424         { TAmode, "ta" },
2425         { UTAmode, "uta" }
2426       };
2427     const arm_fixed_mode_set fixed_conv_modes[] =
2428       {
2429         { QQmode, "qq" },
2430         { UQQmode, "uqq" },
2431         { HQmode, "hq" },
2432         { UHQmode, "uhq" },
2433         { SQmode, "sq" },
2434         { USQmode, "usq" },
2435         { DQmode, "dq" },
2436         { UDQmode, "udq" },
2437         { TQmode, "tq" },
2438         { UTQmode, "utq" },
2439         { HAmode, "ha" },
2440         { UHAmode, "uha" },
2441         { SAmode, "sa" },
2442         { USAmode, "usa" },
2443         { DAmode, "da" },
2444         { UDAmode, "uda" },
2445         { TAmode, "ta" },
2446         { UTAmode, "uta" },
2447         { QImode, "qi" },
2448         { HImode, "hi" },
2449         { SImode, "si" },
2450         { DImode, "di" },
2451         { TImode, "ti" },
2452         { SFmode, "sf" },
2453         { DFmode, "df" }
2454       };
2455     unsigned int i, j;
2456
2457     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2458       {
2459         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2460                                      "add", fixed_arith_modes[i].name, 3);
2461         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2462                                      "ssadd", fixed_arith_modes[i].name, 3);
2463         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2464                                      "usadd", fixed_arith_modes[i].name, 3);
2465         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2466                                      "sub", fixed_arith_modes[i].name, 3);
2467         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2468                                      "sssub", fixed_arith_modes[i].name, 3);
2469         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2470                                      "ussub", fixed_arith_modes[i].name, 3);
2471         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2472                                      "mul", fixed_arith_modes[i].name, 3);
2473         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2474                                      "ssmul", fixed_arith_modes[i].name, 3);
2475         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2476                                      "usmul", fixed_arith_modes[i].name, 3);
2477         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2478                                      "div", fixed_arith_modes[i].name, 3);
2479         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2480                                      "udiv", fixed_arith_modes[i].name, 3);
2481         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2482                                      "ssdiv", fixed_arith_modes[i].name, 3);
2483         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2484                                      "usdiv", fixed_arith_modes[i].name, 3);
2485         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2486                                      "neg", fixed_arith_modes[i].name, 2);
2487         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2488                                      "ssneg", fixed_arith_modes[i].name, 2);
2489         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2490                                      "usneg", fixed_arith_modes[i].name, 2);
2491         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2492                                      "ashl", fixed_arith_modes[i].name, 3);
2493         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2494                                      "ashr", fixed_arith_modes[i].name, 3);
2495         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2496                                      "lshr", fixed_arith_modes[i].name, 3);
2497         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2498                                      "ssashl", fixed_arith_modes[i].name, 3);
2499         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2500                                      "usashl", fixed_arith_modes[i].name, 3);
2501         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2502                                      "cmp", fixed_arith_modes[i].name, 2);
2503       }
2504
2505     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2506       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2507         {
2508           if (i == j
2509               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2510                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2511             continue;
2512
2513           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2514                                       fixed_conv_modes[j].mode, "fract",
2515                                       fixed_conv_modes[i].name,
2516                                       fixed_conv_modes[j].name);
2517           arm_set_fixed_conv_libfunc (satfract_optab,
2518                                       fixed_conv_modes[i].mode,
2519                                       fixed_conv_modes[j].mode, "satfract",
2520                                       fixed_conv_modes[i].name,
2521                                       fixed_conv_modes[j].name);
2522           arm_set_fixed_conv_libfunc (fractuns_optab,
2523                                       fixed_conv_modes[i].mode,
2524                                       fixed_conv_modes[j].mode, "fractuns",
2525                                       fixed_conv_modes[i].name,
2526                                       fixed_conv_modes[j].name);
2527           arm_set_fixed_conv_libfunc (satfractuns_optab,
2528                                       fixed_conv_modes[i].mode,
2529                                       fixed_conv_modes[j].mode, "satfractuns",
2530                                       fixed_conv_modes[i].name,
2531                                       fixed_conv_modes[j].name);
2532         }
2533   }
2534
2535   if (TARGET_AAPCS_BASED)
2536     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2537 }
2538
2539 /* On AAPCS systems, this is the "struct __va_list".  */
2540 static GTY(()) tree va_list_type;
2541
2542 /* Return the type to use as __builtin_va_list.  */
2543 static tree
2544 arm_build_builtin_va_list (void)
2545 {
2546   tree va_list_name;
2547   tree ap_field;
2548
2549   if (!TARGET_AAPCS_BASED)
2550     return std_build_builtin_va_list ();
2551
2552   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2553      defined as:
2554
2555        struct __va_list
2556        {
2557          void *__ap;
2558        };
2559
2560      The C Library ABI further reinforces this definition in \S
2561      4.1.
2562
2563      We must follow this definition exactly.  The structure tag
2564      name is visible in C++ mangled names, and thus forms a part
2565      of the ABI.  The field name may be used by people who
2566      #include <stdarg.h>.  */
2567   /* Create the type.  */
2568   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2569   /* Give it the required name.  */
2570   va_list_name = build_decl (BUILTINS_LOCATION,
2571                              TYPE_DECL,
2572                              get_identifier ("__va_list"),
2573                              va_list_type);
2574   DECL_ARTIFICIAL (va_list_name) = 1;
2575   TYPE_NAME (va_list_type) = va_list_name;
2576   TYPE_STUB_DECL (va_list_type) = va_list_name;
2577   /* Create the __ap field.  */
2578   ap_field = build_decl (BUILTINS_LOCATION,
2579                          FIELD_DECL,
2580                          get_identifier ("__ap"),
2581                          ptr_type_node);
2582   DECL_ARTIFICIAL (ap_field) = 1;
2583   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2584   TYPE_FIELDS (va_list_type) = ap_field;
2585   /* Compute its layout.  */
2586   layout_type (va_list_type);
2587
2588   return va_list_type;
2589 }
2590
2591 /* Return an expression of type "void *" pointing to the next
2592    available argument in a variable-argument list.  VALIST is the
2593    user-level va_list object, of type __builtin_va_list.  */
2594 static tree
2595 arm_extract_valist_ptr (tree valist)
2596 {
2597   if (TREE_TYPE (valist) == error_mark_node)
2598     return error_mark_node;
2599
2600   /* On an AAPCS target, the pointer is stored within "struct
2601      va_list".  */
2602   if (TARGET_AAPCS_BASED)
2603     {
2604       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2605       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2606                        valist, ap_field, NULL_TREE);
2607     }
2608
2609   return valist;
2610 }
2611
2612 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2613 static void
2614 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2615 {
2616   valist = arm_extract_valist_ptr (valist);
2617   std_expand_builtin_va_start (valist, nextarg);
2618 }
2619
2620 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2621 static tree
2622 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2623                           gimple_seq *post_p)
2624 {
2625   valist = arm_extract_valist_ptr (valist);
2626   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2627 }
2628
2629 /* Fix up any incompatible options that the user has specified.  */
2630 static void
2631 arm_option_override (void)
2632 {
2633   arm_selected_arch = NULL;
2634   arm_selected_cpu = NULL;
2635   arm_selected_tune = NULL;
2636
2637   if (global_options_set.x_arm_arch_option)
2638     arm_selected_arch = &all_architectures[arm_arch_option];
2639
2640   if (global_options_set.x_arm_cpu_option)
2641     {
2642       arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2643       arm_selected_tune = &all_cores[(int) arm_cpu_option];
2644     }
2645
2646   if (global_options_set.x_arm_tune_option)
2647     arm_selected_tune = &all_cores[(int) arm_tune_option];
2648
2649 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2650   SUBTARGET_OVERRIDE_OPTIONS;
2651 #endif
2652
2653   if (arm_selected_arch)
2654     {
2655       if (arm_selected_cpu)
2656         {
2657           /* Check for conflict between mcpu and march.  */
2658           if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2659             {
2660               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2661                        arm_selected_cpu->name, arm_selected_arch->name);
2662               /* -march wins for code generation.
2663                  -mcpu wins for default tuning.  */
2664               if (!arm_selected_tune)
2665                 arm_selected_tune = arm_selected_cpu;
2666
2667               arm_selected_cpu = arm_selected_arch;
2668             }
2669           else
2670             /* -mcpu wins.  */
2671             arm_selected_arch = NULL;
2672         }
2673       else
2674         /* Pick a CPU based on the architecture.  */
2675         arm_selected_cpu = arm_selected_arch;
2676     }
2677
2678   /* If the user did not specify a processor, choose one for them.  */
2679   if (!arm_selected_cpu)
2680     {
2681       const struct processors * sel;
2682       unsigned int        sought;
2683
2684       arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2685       if (!arm_selected_cpu->name)
2686         {
2687 #ifdef SUBTARGET_CPU_DEFAULT
2688           /* Use the subtarget default CPU if none was specified by
2689              configure.  */
2690           arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2691 #endif
2692           /* Default to ARM6.  */
2693           if (!arm_selected_cpu->name)
2694             arm_selected_cpu = &all_cores[arm6];
2695         }
2696
2697       sel = arm_selected_cpu;
2698       insn_flags = sel->flags;
2699
2700       /* Now check to see if the user has specified some command line
2701          switch that require certain abilities from the cpu.  */
2702       sought = 0;
2703
2704       if (TARGET_INTERWORK || TARGET_THUMB)
2705         {
2706           sought |= (FL_THUMB | FL_MODE32);
2707
2708           /* There are no ARM processors that support both APCS-26 and
2709              interworking.  Therefore we force FL_MODE26 to be removed
2710              from insn_flags here (if it was set), so that the search
2711              below will always be able to find a compatible processor.  */
2712           insn_flags &= ~FL_MODE26;
2713         }
2714
2715       if (sought != 0 && ((sought & insn_flags) != sought))
2716         {
2717           /* Try to locate a CPU type that supports all of the abilities
2718              of the default CPU, plus the extra abilities requested by
2719              the user.  */
2720           for (sel = all_cores; sel->name != NULL; sel++)
2721             if ((sel->flags & sought) == (sought | insn_flags))
2722               break;
2723
2724           if (sel->name == NULL)
2725             {
2726               unsigned current_bit_count = 0;
2727               const struct processors * best_fit = NULL;
2728
2729               /* Ideally we would like to issue an error message here
2730                  saying that it was not possible to find a CPU compatible
2731                  with the default CPU, but which also supports the command
2732                  line options specified by the programmer, and so they
2733                  ought to use the -mcpu=<name> command line option to
2734                  override the default CPU type.
2735
2736                  If we cannot find a cpu that has both the
2737                  characteristics of the default cpu and the given
2738                  command line options we scan the array again looking
2739                  for a best match.  */
2740               for (sel = all_cores; sel->name != NULL; sel++)
2741                 if ((sel->flags & sought) == sought)
2742                   {
2743                     unsigned count;
2744
2745                     count = bit_count (sel->flags & insn_flags);
2746
2747                     if (count >= current_bit_count)
2748                       {
2749                         best_fit = sel;
2750                         current_bit_count = count;
2751                       }
2752                   }
2753
2754               gcc_assert (best_fit);
2755               sel = best_fit;
2756             }
2757
2758           arm_selected_cpu = sel;
2759         }
2760     }
2761
2762   gcc_assert (arm_selected_cpu);
2763   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
2764   if (!arm_selected_tune)
2765     arm_selected_tune = &all_cores[arm_selected_cpu->core];
2766
2767   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2768   insn_flags = arm_selected_cpu->flags;
2769   arm_base_arch = arm_selected_cpu->base_arch;
2770
2771   arm_tune = arm_selected_tune->core;
2772   tune_flags = arm_selected_tune->flags;
2773   current_tune = arm_selected_tune->tune;
2774
2775   /* Make sure that the processor choice does not conflict with any of the
2776      other command line choices.  */
2777   if (TARGET_ARM && !(insn_flags & FL_NOTM))
2778     error ("target CPU does not support ARM mode");
2779
2780   /* BPABI targets use linker tricks to allow interworking on cores
2781      without thumb support.  */
2782   if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2783     {
2784       warning (0, "target CPU does not support interworking" );
2785       target_flags &= ~MASK_INTERWORK;
2786     }
2787
2788   if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2789     {
2790       warning (0, "target CPU does not support THUMB instructions");
2791       target_flags &= ~MASK_THUMB;
2792     }
2793
2794   if (TARGET_APCS_FRAME && TARGET_THUMB)
2795     {
2796       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2797       target_flags &= ~MASK_APCS_FRAME;
2798     }
2799
2800   /* Callee super interworking implies thumb interworking.  Adding
2801      this to the flags here simplifies the logic elsewhere.  */
2802   if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2803     target_flags |= MASK_INTERWORK;
2804
2805   /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2806      from here where no function is being compiled currently.  */
2807   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2808     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2809
2810   if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2811     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2812
2813   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2814     {
2815       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2816       target_flags |= MASK_APCS_FRAME;
2817     }
2818
2819   if (TARGET_POKE_FUNCTION_NAME)
2820     target_flags |= MASK_APCS_FRAME;
2821
2822   if (TARGET_APCS_REENT && flag_pic)
2823     error ("-fpic and -mapcs-reent are incompatible");
2824
2825   if (TARGET_APCS_REENT)
2826     warning (0, "APCS reentrant code not supported.  Ignored");
2827
2828   /* If this target is normally configured to use APCS frames, warn if they
2829      are turned off and debugging is turned on.  */
2830   if (TARGET_ARM
2831       && write_symbols != NO_DEBUG
2832       && !TARGET_APCS_FRAME
2833       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2834     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2835
2836   if (TARGET_APCS_FLOAT)
2837     warning (0, "passing floating point arguments in fp regs not yet supported");
2838
2839   /* Initialize boolean versions of the flags, for use in the arm.md file.  */
2840   arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2841   arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2842   arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2843   arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2844   arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2845   arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2846   arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2847   arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2848   arm_arch6m = arm_arch6 && !arm_arch_notm;
2849   arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2850   arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2851   arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2852   arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2853   arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2854
2855   arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2856   arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2857   thumb_code = TARGET_ARM == 0;
2858   thumb1_code = TARGET_THUMB1 != 0;
2859   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2860   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2861   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2862   arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2863   arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2864   arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2865   arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0;
2866   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2867   arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2868   arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2869   if (arm_restrict_it == 2)
2870     arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2871
2872   if (!TARGET_THUMB2)
2873     arm_restrict_it = 0;
2874
2875   /* If we are not using the default (ARM mode) section anchor offset
2876      ranges, then set the correct ranges now.  */
2877   if (TARGET_THUMB1)
2878     {
2879       /* Thumb-1 LDR instructions cannot have negative offsets.
2880          Permissible positive offset ranges are 5-bit (for byte loads),
2881          6-bit (for halfword loads), or 7-bit (for word loads).
2882          Empirical results suggest a 7-bit anchor range gives the best
2883          overall code size.  */
2884       targetm.min_anchor_offset = 0;
2885       targetm.max_anchor_offset = 127;
2886     }
2887   else if (TARGET_THUMB2)
2888     {
2889       /* The minimum is set such that the total size of the block
2890          for a particular anchor is 248 + 1 + 4095 bytes, which is
2891          divisible by eight, ensuring natural spacing of anchors.  */
2892       targetm.min_anchor_offset = -248;
2893       targetm.max_anchor_offset = 4095;
2894     }
2895
2896   /* V5 code we generate is completely interworking capable, so we turn off
2897      TARGET_INTERWORK here to avoid many tests later on.  */
2898
2899   /* XXX However, we must pass the right pre-processor defines to CPP
2900      or GLD can get confused.  This is a hack.  */
2901   if (TARGET_INTERWORK)
2902     arm_cpp_interwork = 1;
2903
2904   if (arm_arch5)
2905     target_flags &= ~MASK_INTERWORK;
2906
2907   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2908     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2909
2910   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2911     error ("iwmmxt abi requires an iwmmxt capable cpu");
2912
2913   if (!global_options_set.x_arm_fpu_index)
2914     {
2915       const char *target_fpu_name;
2916       bool ok;
2917
2918 #ifdef FPUTYPE_DEFAULT
2919       target_fpu_name = FPUTYPE_DEFAULT;
2920 #else
2921       target_fpu_name = "vfp";
2922 #endif
2923
2924       ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2925                                   CL_TARGET);
2926       gcc_assert (ok);
2927     }
2928
2929   arm_fpu_desc = &all_fpus[arm_fpu_index];
2930
2931   switch (arm_fpu_desc->model)
2932     {
2933     case ARM_FP_MODEL_VFP:
2934       arm_fpu_attr = FPU_VFP;
2935       break;
2936
2937     default:
2938       gcc_unreachable();
2939     }
2940
2941   if (TARGET_AAPCS_BASED)
2942     {
2943       if (TARGET_CALLER_INTERWORKING)
2944         error ("AAPCS does not support -mcaller-super-interworking");
2945       else
2946         if (TARGET_CALLEE_INTERWORKING)
2947           error ("AAPCS does not support -mcallee-super-interworking");
2948     }
2949
2950   /* iWMMXt and NEON are incompatible.  */
2951   if (TARGET_IWMMXT && TARGET_NEON)
2952     error ("iWMMXt and NEON are incompatible");
2953
2954   /* iWMMXt unsupported under Thumb mode.  */
2955   if (TARGET_THUMB && TARGET_IWMMXT)
2956     error ("iWMMXt unsupported under Thumb mode");
2957
2958   /* __fp16 support currently assumes the core has ldrh.  */
2959   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2960     sorry ("__fp16 and no ldrh");
2961
2962   /* If soft-float is specified then don't use FPU.  */
2963   if (TARGET_SOFT_FLOAT)
2964     arm_fpu_attr = FPU_NONE;
2965
2966   if (TARGET_AAPCS_BASED)
2967     {
2968       if (arm_abi == ARM_ABI_IWMMXT)
2969         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2970       else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2971                && TARGET_HARD_FLOAT
2972                && TARGET_VFP)
2973         arm_pcs_default = ARM_PCS_AAPCS_VFP;
2974       else
2975         arm_pcs_default = ARM_PCS_AAPCS;
2976     }
2977   else
2978     {
2979       if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2980         sorry ("-mfloat-abi=hard and VFP");
2981
2982       if (arm_abi == ARM_ABI_APCS)
2983         arm_pcs_default = ARM_PCS_APCS;
2984       else
2985         arm_pcs_default = ARM_PCS_ATPCS;
2986     }
2987
2988   /* For arm2/3 there is no need to do any scheduling if we are doing
2989      software floating-point.  */
2990   if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2991     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2992
2993   /* Use the cp15 method if it is available.  */
2994   if (target_thread_pointer == TP_AUTO)
2995     {
2996       if (arm_arch6k && !TARGET_THUMB1)
2997         target_thread_pointer = TP_CP15;
2998       else
2999         target_thread_pointer = TP_SOFT;
3000     }
3001
3002   if (TARGET_HARD_TP && TARGET_THUMB1)
3003     error ("can not use -mtp=cp15 with 16-bit Thumb");
3004
3005   /* Override the default structure alignment for AAPCS ABI.  */
3006   if (!global_options_set.x_arm_structure_size_boundary)
3007     {
3008       if (TARGET_AAPCS_BASED)
3009         arm_structure_size_boundary = 8;
3010     }
3011   else
3012     {
3013       if (arm_structure_size_boundary != 8
3014           && arm_structure_size_boundary != 32
3015           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3016         {
3017           if (ARM_DOUBLEWORD_ALIGN)
3018             warning (0,
3019                      "structure size boundary can only be set to 8, 32 or 64");
3020           else
3021             warning (0, "structure size boundary can only be set to 8 or 32");
3022           arm_structure_size_boundary
3023             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3024         }
3025     }
3026
3027   if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
3028     {
3029       error ("RTP PIC is incompatible with Thumb");
3030       flag_pic = 0;
3031     }
3032
3033   /* If stack checking is disabled, we can use r10 as the PIC register,
3034      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3035   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3036     {
3037       if (TARGET_VXWORKS_RTP)
3038         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3039       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3040     }
3041
3042   if (flag_pic && TARGET_VXWORKS_RTP)
3043     arm_pic_register = 9;
3044
3045   if (arm_pic_register_string != NULL)
3046     {
3047       int pic_register = decode_reg_name (arm_pic_register_string);
3048
3049       if (!flag_pic)
3050         warning (0, "-mpic-register= is useless without -fpic");
3051
3052       /* Prevent the user from choosing an obviously stupid PIC register.  */
3053       else if (pic_register < 0 || call_used_regs[pic_register]
3054                || pic_register == HARD_FRAME_POINTER_REGNUM
3055                || pic_register == STACK_POINTER_REGNUM
3056                || pic_register >= PC_REGNUM
3057                || (TARGET_VXWORKS_RTP
3058                    && (unsigned int) pic_register != arm_pic_register))
3059         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3060       else
3061         arm_pic_register = pic_register;
3062     }
3063
3064   if (TARGET_VXWORKS_RTP
3065       && !global_options_set.x_arm_pic_data_is_text_relative)
3066     arm_pic_data_is_text_relative = 0;
3067
3068   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3069   if (fix_cm3_ldrd == 2)
3070     {
3071       if (arm_selected_cpu->core == cortexm3)
3072         fix_cm3_ldrd = 1;
3073       else
3074         fix_cm3_ldrd = 0;
3075     }
3076
3077   /* Enable -munaligned-access by default for
3078      - all ARMv6 architecture-based processors
3079      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3080      - ARMv8 architecture-base processors.
3081
3082      Disable -munaligned-access by default for
3083      - all pre-ARMv6 architecture-based processors
3084      - ARMv6-M architecture-based processors.  */
3085
3086   if (unaligned_access == 2)
3087     {
3088       if (arm_arch6 && (arm_arch_notm || arm_arch7))
3089         unaligned_access = 1;
3090       else
3091         unaligned_access = 0;
3092     }
3093   else if (unaligned_access == 1
3094            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3095     {
3096       warning (0, "target CPU does not support unaligned accesses");
3097       unaligned_access = 0;
3098     }
3099
3100   if (TARGET_THUMB1 && flag_schedule_insns)
3101     {
3102       /* Don't warn since it's on by default in -O2.  */
3103       flag_schedule_insns = 0;
3104     }
3105
3106   if (optimize_size)
3107     {
3108       /* If optimizing for size, bump the number of instructions that we
3109          are prepared to conditionally execute (even on a StrongARM).  */
3110       max_insns_skipped = 6;
3111
3112       /* For THUMB2, we limit the conditional sequence to one IT block.  */
3113       if (TARGET_THUMB2)
3114         max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3115     }
3116   else
3117     max_insns_skipped = current_tune->max_insns_skipped;
3118
3119   /* Hot/Cold partitioning is not currently supported, since we can't
3120      handle literal pool placement in that case.  */
3121   if (flag_reorder_blocks_and_partition)
3122     {
3123       inform (input_location,
3124               "-freorder-blocks-and-partition not supported on this architecture");
3125       flag_reorder_blocks_and_partition = 0;
3126       flag_reorder_blocks = 1;
3127     }
3128
3129   if (flag_pic)
3130     /* Hoisting PIC address calculations more aggressively provides a small,
3131        but measurable, size reduction for PIC code.  Therefore, we decrease
3132        the bar for unrestricted expression hoisting to the cost of PIC address
3133        calculation, which is 2 instructions.  */
3134     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3135                            global_options.x_param_values,
3136                            global_options_set.x_param_values);
3137
3138   /* ARM EABI defaults to strict volatile bitfields.  */
3139   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3140       && abi_version_at_least(2))
3141     flag_strict_volatile_bitfields = 1;
3142
3143   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3144      it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
3145   if (flag_prefetch_loop_arrays < 0
3146       && HAVE_prefetch
3147       && optimize >= 3
3148       && current_tune->num_prefetch_slots > 0)
3149     flag_prefetch_loop_arrays = 1;
3150
3151   /* Set up parameters to be used in prefetching algorithm.  Do not override the
3152      defaults unless we are tuning for a core we have researched values for.  */
3153   if (current_tune->num_prefetch_slots > 0)
3154     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3155                            current_tune->num_prefetch_slots,
3156                            global_options.x_param_values,
3157                            global_options_set.x_param_values);
3158   if (current_tune->l1_cache_line_size >= 0)
3159     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3160                            current_tune->l1_cache_line_size,
3161                            global_options.x_param_values,
3162                            global_options_set.x_param_values);
3163   if (current_tune->l1_cache_size >= 0)
3164     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3165                            current_tune->l1_cache_size,
3166                            global_options.x_param_values,
3167                            global_options_set.x_param_values);
3168
3169   /* Use Neon to perform 64-bits operations rather than core
3170      registers.  */
3171   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3172   if (use_neon_for_64bits == 1)
3173      prefer_neon_for_64bits = true;
3174
3175   /* Use the alternative scheduling-pressure algorithm by default.  */
3176   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3177                          global_options.x_param_values,
3178                          global_options_set.x_param_values);
3179
3180   /* Look through ready list and all of queue for instructions
3181      relevant for L2 auto-prefetcher.  */
3182   int param_sched_autopref_queue_depth;
3183   if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF)
3184     param_sched_autopref_queue_depth = -1;
3185   else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK)
3186     param_sched_autopref_queue_depth = 0;
3187   else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL)
3188     param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3189   else
3190     gcc_unreachable ();
3191   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3192                          param_sched_autopref_queue_depth,
3193                          global_options.x_param_values,
3194                          global_options_set.x_param_values);
3195
3196   /* Disable shrink-wrap when optimizing function for size, since it tends to
3197      generate additional returns.  */
3198   if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3199     flag_shrink_wrap = false;
3200   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3201   if (TARGET_APCS_FRAME)
3202     flag_shrink_wrap = false;
3203
3204   /* We only support -mslow-flash-data on armv7-m targets.  */
3205   if (target_slow_flash_data
3206       && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3207           || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3208     error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3209
3210   /* Currently, for slow flash data, we just disable literal pools.  */
3211   if (target_slow_flash_data)
3212     arm_disable_literal_pool = true;
3213
3214   /* Thumb2 inline assembly code should always use unified syntax.
3215      This will apply to ARM and Thumb1 eventually.  */
3216   if (TARGET_THUMB2)
3217     inline_asm_unified = 1;
3218
3219   /* Disable scheduling fusion by default if it's not armv7 processor
3220      or doesn't prefer ldrd/strd.  */
3221   if (flag_schedule_fusion == 2
3222       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3223     flag_schedule_fusion = 0;
3224
3225   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3226      - epilogue_insns - does not accurately model the corresponding insns
3227      emitted in the asm file.  In particular, see the comment in thumb_exit
3228      'Find out how many of the (return) argument registers we can corrupt'.
3229      As a consequence, the epilogue may clobber registers without fipa-ra
3230      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3231      TODO: Accurately model clobbers for epilogue_insns and reenable
3232      fipa-ra.  */
3233   if (TARGET_THUMB1)
3234     flag_ipa_ra = 0;
3235
3236   /* Register global variables with the garbage collector.  */
3237   arm_add_gc_roots ();
3238 }
3239
3240 static void
3241 arm_add_gc_roots (void)
3242 {
3243   gcc_obstack_init(&minipool_obstack);
3244   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3245 }
3246 \f
3247 /* A table of known ARM exception types.
3248    For use with the interrupt function attribute.  */
3249
3250 typedef struct
3251 {
3252   const char *const arg;
3253   const unsigned long return_value;
3254 }
3255 isr_attribute_arg;
3256
3257 static const isr_attribute_arg isr_attribute_args [] =
3258 {
3259   { "IRQ",   ARM_FT_ISR },
3260   { "irq",   ARM_FT_ISR },
3261   { "FIQ",   ARM_FT_FIQ },
3262   { "fiq",   ARM_FT_FIQ },
3263   { "ABORT", ARM_FT_ISR },
3264   { "abort", ARM_FT_ISR },
3265   { "ABORT", ARM_FT_ISR },
3266   { "abort", ARM_FT_ISR },
3267   { "UNDEF", ARM_FT_EXCEPTION },
3268   { "undef", ARM_FT_EXCEPTION },
3269   { "SWI",   ARM_FT_EXCEPTION },
3270   { "swi",   ARM_FT_EXCEPTION },
3271   { NULL,    ARM_FT_NORMAL }
3272 };
3273
3274 /* Returns the (interrupt) function type of the current
3275    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3276
3277 static unsigned long
3278 arm_isr_value (tree argument)
3279 {
3280   const isr_attribute_arg * ptr;
3281   const char *              arg;
3282
3283   if (!arm_arch_notm)
3284     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3285
3286   /* No argument - default to IRQ.  */
3287   if (argument == NULL_TREE)
3288     return ARM_FT_ISR;
3289
3290   /* Get the value of the argument.  */
3291   if (TREE_VALUE (argument) == NULL_TREE
3292       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3293     return ARM_FT_UNKNOWN;
3294
3295   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3296
3297   /* Check it against the list of known arguments.  */
3298   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3299     if (streq (arg, ptr->arg))
3300       return ptr->return_value;
3301
3302   /* An unrecognized interrupt type.  */
3303   return ARM_FT_UNKNOWN;
3304 }
3305
3306 /* Computes the type of the current function.  */
3307
3308 static unsigned long
3309 arm_compute_func_type (void)
3310 {
3311   unsigned long type = ARM_FT_UNKNOWN;
3312   tree a;
3313   tree attr;
3314
3315   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3316
3317   /* Decide if the current function is volatile.  Such functions
3318      never return, and many memory cycles can be saved by not storing
3319      register values that will never be needed again.  This optimization
3320      was added to speed up context switching in a kernel application.  */
3321   if (optimize > 0
3322       && (TREE_NOTHROW (current_function_decl)
3323           || !(flag_unwind_tables
3324                || (flag_exceptions
3325                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3326       && TREE_THIS_VOLATILE (current_function_decl))
3327     type |= ARM_FT_VOLATILE;
3328
3329   if (cfun->static_chain_decl != NULL)
3330     type |= ARM_FT_NESTED;
3331
3332   attr = DECL_ATTRIBUTES (current_function_decl);
3333
3334   a = lookup_attribute ("naked", attr);
3335   if (a != NULL_TREE)
3336     type |= ARM_FT_NAKED;
3337
3338   a = lookup_attribute ("isr", attr);
3339   if (a == NULL_TREE)
3340     a = lookup_attribute ("interrupt", attr);
3341
3342   if (a == NULL_TREE)
3343     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3344   else
3345     type |= arm_isr_value (TREE_VALUE (a));
3346
3347   return type;
3348 }
3349
3350 /* Returns the type of the current function.  */
3351
3352 unsigned long
3353 arm_current_func_type (void)
3354 {
3355   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3356     cfun->machine->func_type = arm_compute_func_type ();
3357
3358   return cfun->machine->func_type;
3359 }
3360
3361 bool
3362 arm_allocate_stack_slots_for_args (void)
3363 {
3364   /* Naked functions should not allocate stack slots for arguments.  */
3365   return !IS_NAKED (arm_current_func_type ());
3366 }
3367
3368 static bool
3369 arm_warn_func_return (tree decl)
3370 {
3371   /* Naked functions are implemented entirely in assembly, including the
3372      return sequence, so suppress warnings about this.  */
3373   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3374 }
3375
3376 \f
3377 /* Output assembler code for a block containing the constant parts
3378    of a trampoline, leaving space for the variable parts.
3379
3380    On the ARM, (if r8 is the static chain regnum, and remembering that
3381    referencing pc adds an offset of 8) the trampoline looks like:
3382            ldr          r8, [pc, #0]
3383            ldr          pc, [pc]
3384            .word        static chain value
3385            .word        function's address
3386    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3387
3388 static void
3389 arm_asm_trampoline_template (FILE *f)
3390 {
3391   if (TARGET_ARM)
3392     {
3393       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3394       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3395     }
3396   else if (TARGET_THUMB2)
3397     {
3398       /* The Thumb-2 trampoline is similar to the arm implementation.
3399          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3400       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3401                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3402       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3403     }
3404   else
3405     {
3406       ASM_OUTPUT_ALIGN (f, 2);
3407       fprintf (f, "\t.code\t16\n");
3408       fprintf (f, ".Ltrampoline_start:\n");
3409       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3410       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3411       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3412       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3413       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3414       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3415     }
3416   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3417   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3418 }
3419
3420 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3421
3422 static void
3423 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3424 {
3425   rtx fnaddr, mem, a_tramp;
3426
3427   emit_block_move (m_tramp, assemble_trampoline_template (),
3428                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3429
3430   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3431   emit_move_insn (mem, chain_value);
3432
3433   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3434   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3435   emit_move_insn (mem, fnaddr);
3436
3437   a_tramp = XEXP (m_tramp, 0);
3438   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3439                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3440                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3441 }
3442
3443 /* Thumb trampolines should be entered in thumb mode, so set
3444    the bottom bit of the address.  */
3445
3446 static rtx
3447 arm_trampoline_adjust_address (rtx addr)
3448 {
3449   if (TARGET_THUMB)
3450     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3451                                 NULL, 0, OPTAB_LIB_WIDEN);
3452   return addr;
3453 }
3454 \f
3455 /* Return 1 if it is possible to return using a single instruction.
3456    If SIBLING is non-null, this is a test for a return before a sibling
3457    call.  SIBLING is the call insn, so we can examine its register usage.  */
3458
3459 int
3460 use_return_insn (int iscond, rtx sibling)
3461 {
3462   int regno;
3463   unsigned int func_type;
3464   unsigned long saved_int_regs;
3465   unsigned HOST_WIDE_INT stack_adjust;
3466   arm_stack_offsets *offsets;
3467
3468   /* Never use a return instruction before reload has run.  */
3469   if (!reload_completed)
3470     return 0;
3471
3472   func_type = arm_current_func_type ();
3473
3474   /* Naked, volatile and stack alignment functions need special
3475      consideration.  */
3476   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3477     return 0;
3478
3479   /* So do interrupt functions that use the frame pointer and Thumb
3480      interrupt functions.  */
3481   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3482     return 0;
3483
3484   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3485       && !optimize_function_for_size_p (cfun))
3486     return 0;
3487
3488   offsets = arm_get_frame_offsets ();
3489   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3490
3491   /* As do variadic functions.  */
3492   if (crtl->args.pretend_args_size
3493       || cfun->machine->uses_anonymous_args
3494       /* Or if the function calls __builtin_eh_return () */
3495       || crtl->calls_eh_return
3496       /* Or if the function calls alloca */
3497       || cfun->calls_alloca
3498       /* Or if there is a stack adjustment.  However, if the stack pointer
3499          is saved on the stack, we can use a pre-incrementing stack load.  */
3500       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3501                                  && stack_adjust == 4)))
3502     return 0;
3503
3504   saved_int_regs = offsets->saved_regs_mask;
3505
3506   /* Unfortunately, the insn
3507
3508        ldmib sp, {..., sp, ...}
3509
3510      triggers a bug on most SA-110 based devices, such that the stack
3511      pointer won't be correctly restored if the instruction takes a
3512      page fault.  We work around this problem by popping r3 along with
3513      the other registers, since that is never slower than executing
3514      another instruction.
3515
3516      We test for !arm_arch5 here, because code for any architecture
3517      less than this could potentially be run on one of the buggy
3518      chips.  */
3519   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3520     {
3521       /* Validate that r3 is a call-clobbered register (always true in
3522          the default abi) ...  */
3523       if (!call_used_regs[3])
3524         return 0;
3525
3526       /* ... that it isn't being used for a return value ... */
3527       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3528         return 0;
3529
3530       /* ... or for a tail-call argument ...  */
3531       if (sibling)
3532         {
3533           gcc_assert (CALL_P (sibling));
3534
3535           if (find_regno_fusage (sibling, USE, 3))
3536             return 0;
3537         }
3538
3539       /* ... and that there are no call-saved registers in r0-r2
3540          (always true in the default ABI).  */
3541       if (saved_int_regs & 0x7)
3542         return 0;
3543     }
3544
3545   /* Can't be done if interworking with Thumb, and any registers have been
3546      stacked.  */
3547   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3548     return 0;
3549
3550   /* On StrongARM, conditional returns are expensive if they aren't
3551      taken and multiple registers have been stacked.  */
3552   if (iscond && arm_tune_strongarm)
3553     {
3554       /* Conditional return when just the LR is stored is a simple
3555          conditional-load instruction, that's not expensive.  */
3556       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3557         return 0;
3558
3559       if (flag_pic
3560           && arm_pic_register != INVALID_REGNUM
3561           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3562         return 0;
3563     }
3564
3565   /* If there are saved registers but the LR isn't saved, then we need
3566      two instructions for the return.  */
3567   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3568     return 0;
3569
3570   /* Can't be done if any of the VFP regs are pushed,
3571      since this also requires an insn.  */
3572   if (TARGET_HARD_FLOAT && TARGET_VFP)
3573     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3574       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3575         return 0;
3576
3577   if (TARGET_REALLY_IWMMXT)
3578     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3579       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3580         return 0;
3581
3582   return 1;
3583 }
3584
3585 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3586    shrink-wrapping if possible.  This is the case if we need to emit a
3587    prologue, which we can test by looking at the offsets.  */
3588 bool
3589 use_simple_return_p (void)
3590 {
3591   arm_stack_offsets *offsets;
3592
3593   offsets = arm_get_frame_offsets ();
3594   return offsets->outgoing_args != 0;
3595 }
3596
3597 /* Return TRUE if int I is a valid immediate ARM constant.  */
3598
3599 int
3600 const_ok_for_arm (HOST_WIDE_INT i)
3601 {
3602   int lowbit;
3603
3604   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3605      be all zero, or all one.  */
3606   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3607       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3608           != ((~(unsigned HOST_WIDE_INT) 0)
3609               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3610     return FALSE;
3611
3612   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3613
3614   /* Fast return for 0 and small values.  We must do this for zero, since
3615      the code below can't handle that one case.  */
3616   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3617     return TRUE;
3618
3619   /* Get the number of trailing zeros.  */
3620   lowbit = ffs((int) i) - 1;
3621
3622   /* Only even shifts are allowed in ARM mode so round down to the
3623      nearest even number.  */
3624   if (TARGET_ARM)
3625     lowbit &= ~1;
3626
3627   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3628     return TRUE;
3629
3630   if (TARGET_ARM)
3631     {
3632       /* Allow rotated constants in ARM mode.  */
3633       if (lowbit <= 4
3634            && ((i & ~0xc000003f) == 0
3635                || (i & ~0xf000000f) == 0
3636                || (i & ~0xfc000003) == 0))
3637         return TRUE;
3638     }
3639   else
3640     {
3641       HOST_WIDE_INT v;
3642
3643       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
3644       v = i & 0xff;
3645       v |= v << 16;
3646       if (i == v || i == (v | (v << 8)))
3647         return TRUE;
3648
3649       /* Allow repeated pattern 0xXY00XY00.  */
3650       v = i & 0xff00;
3651       v |= v << 16;
3652       if (i == v)
3653         return TRUE;
3654     }
3655
3656   return FALSE;
3657 }
3658
3659 /* Return true if I is a valid constant for the operation CODE.  */
3660 int
3661 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3662 {
3663   if (const_ok_for_arm (i))
3664     return 1;
3665
3666   switch (code)
3667     {
3668     case SET:
3669       /* See if we can use movw.  */
3670       if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3671         return 1;
3672       else
3673         /* Otherwise, try mvn.  */
3674         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3675
3676     case PLUS:
3677       /* See if we can use addw or subw.  */
3678       if (TARGET_THUMB2
3679           && ((i & 0xfffff000) == 0
3680               || ((-i) & 0xfffff000) == 0))
3681         return 1;
3682       /* else fall through.  */
3683
3684     case COMPARE:
3685     case EQ:
3686     case NE:
3687     case GT:
3688     case LE:
3689     case LT:
3690     case GE:
3691     case GEU:
3692     case LTU:
3693     case GTU:
3694     case LEU:
3695     case UNORDERED:
3696     case ORDERED:
3697     case UNEQ:
3698     case UNGE:
3699     case UNLT:
3700     case UNGT:
3701     case UNLE:
3702       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3703
3704     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
3705     case XOR:
3706       return 0;
3707
3708     case IOR:
3709       if (TARGET_THUMB2)
3710         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3711       return 0;
3712
3713     case AND:
3714       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3715
3716     default:
3717       gcc_unreachable ();
3718     }
3719 }
3720
3721 /* Return true if I is a valid di mode constant for the operation CODE.  */
3722 int
3723 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3724 {
3725   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3726   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3727   rtx hi = GEN_INT (hi_val);
3728   rtx lo = GEN_INT (lo_val);
3729
3730   if (TARGET_THUMB1)
3731     return 0;
3732
3733   switch (code)
3734     {
3735     case AND:
3736     case IOR:
3737     case XOR:
3738       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3739               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3740     case PLUS:
3741       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3742
3743     default:
3744       return 0;
3745     }
3746 }
3747
3748 /* Emit a sequence of insns to handle a large constant.
3749    CODE is the code of the operation required, it can be any of SET, PLUS,
3750    IOR, AND, XOR, MINUS;
3751    MODE is the mode in which the operation is being performed;
3752    VAL is the integer to operate on;
3753    SOURCE is the other operand (a register, or a null-pointer for SET);
3754    SUBTARGETS means it is safe to create scratch registers if that will
3755    either produce a simpler sequence, or we will want to cse the values.
3756    Return value is the number of insns emitted.  */
3757
3758 /* ??? Tweak this for thumb2.  */
3759 int
3760 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3761                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3762 {
3763   rtx cond;
3764
3765   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3766     cond = COND_EXEC_TEST (PATTERN (insn));
3767   else
3768     cond = NULL_RTX;
3769
3770   if (subtargets || code == SET
3771       || (REG_P (target) && REG_P (source)
3772           && REGNO (target) != REGNO (source)))
3773     {
3774       /* After arm_reorg has been called, we can't fix up expensive
3775          constants by pushing them into memory so we must synthesize
3776          them in-line, regardless of the cost.  This is only likely to
3777          be more costly on chips that have load delay slots and we are
3778          compiling without running the scheduler (so no splitting
3779          occurred before the final instruction emission).
3780
3781          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3782       */
3783       if (!cfun->machine->after_arm_reorg
3784           && !cond
3785           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3786                                 1, 0)
3787               > (arm_constant_limit (optimize_function_for_size_p (cfun))
3788                  + (code != SET))))
3789         {
3790           if (code == SET)
3791             {
3792               /* Currently SET is the only monadic value for CODE, all
3793                  the rest are diadic.  */
3794               if (TARGET_USE_MOVT)
3795                 arm_emit_movpair (target, GEN_INT (val));
3796               else
3797                 emit_set_insn (target, GEN_INT (val));
3798
3799               return 1;
3800             }
3801           else
3802             {
3803               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3804
3805               if (TARGET_USE_MOVT)
3806                 arm_emit_movpair (temp, GEN_INT (val));
3807               else
3808                 emit_set_insn (temp, GEN_INT (val));
3809
3810               /* For MINUS, the value is subtracted from, since we never
3811                  have subtraction of a constant.  */
3812               if (code == MINUS)
3813                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3814               else
3815                 emit_set_insn (target,
3816                                gen_rtx_fmt_ee (code, mode, source, temp));
3817               return 2;
3818             }
3819         }
3820     }
3821
3822   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3823                            1);
3824 }
3825
3826 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3827    ARM/THUMB2 immediates, and add up to VAL.
3828    Thr function return value gives the number of insns required.  */
3829 static int
3830 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3831                             struct four_ints *return_sequence)
3832 {
3833   int best_consecutive_zeros = 0;
3834   int i;
3835   int best_start = 0;
3836   int insns1, insns2;
3837   struct four_ints tmp_sequence;
3838
3839   /* If we aren't targeting ARM, the best place to start is always at
3840      the bottom, otherwise look more closely.  */
3841   if (TARGET_ARM)
3842     {
3843       for (i = 0; i < 32; i += 2)
3844         {
3845           int consecutive_zeros = 0;
3846
3847           if (!(val & (3 << i)))
3848             {
3849               while ((i < 32) && !(val & (3 << i)))
3850                 {
3851                   consecutive_zeros += 2;
3852                   i += 2;
3853                 }
3854               if (consecutive_zeros > best_consecutive_zeros)
3855                 {
3856                   best_consecutive_zeros = consecutive_zeros;
3857                   best_start = i - consecutive_zeros;
3858                 }
3859               i -= 2;
3860             }
3861         }
3862     }
3863
3864   /* So long as it won't require any more insns to do so, it's
3865      desirable to emit a small constant (in bits 0...9) in the last
3866      insn.  This way there is more chance that it can be combined with
3867      a later addressing insn to form a pre-indexed load or store
3868      operation.  Consider:
3869
3870            *((volatile int *)0xe0000100) = 1;
3871            *((volatile int *)0xe0000110) = 2;
3872
3873      We want this to wind up as:
3874
3875             mov rA, #0xe0000000
3876             mov rB, #1
3877             str rB, [rA, #0x100]
3878             mov rB, #2
3879             str rB, [rA, #0x110]
3880
3881      rather than having to synthesize both large constants from scratch.
3882
3883      Therefore, we calculate how many insns would be required to emit
3884      the constant starting from `best_start', and also starting from
3885      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
3886      yield a shorter sequence, we may as well use zero.  */
3887   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3888   if (best_start != 0
3889       && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3890     {
3891       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3892       if (insns2 <= insns1)
3893         {
3894           *return_sequence = tmp_sequence;
3895           insns1 = insns2;
3896         }
3897     }
3898
3899   return insns1;
3900 }
3901
3902 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
3903 static int
3904 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3905                              struct four_ints *return_sequence, int i)
3906 {
3907   int remainder = val & 0xffffffff;
3908   int insns = 0;
3909
3910   /* Try and find a way of doing the job in either two or three
3911      instructions.
3912
3913      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3914      location.  We start at position I.  This may be the MSB, or
3915      optimial_immediate_sequence may have positioned it at the largest block
3916      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3917      wrapping around to the top of the word when we drop off the bottom.
3918      In the worst case this code should produce no more than four insns.
3919
3920      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3921      constants, shifted to any arbitrary location.  We should always start
3922      at the MSB.  */
3923   do
3924     {
3925       int end;
3926       unsigned int b1, b2, b3, b4;
3927       unsigned HOST_WIDE_INT result;
3928       int loc;
3929
3930       gcc_assert (insns < 4);
3931
3932       if (i <= 0)
3933         i += 32;
3934
3935       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
3936       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3937         {
3938           loc = i;
3939           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3940             /* We can use addw/subw for the last 12 bits.  */
3941             result = remainder;
3942           else
3943             {
3944               /* Use an 8-bit shifted/rotated immediate.  */
3945               end = i - 8;
3946               if (end < 0)
3947                 end += 32;
3948               result = remainder & ((0x0ff << end)
3949                                    | ((i < end) ? (0xff >> (32 - end))
3950                                                 : 0));
3951               i -= 8;
3952             }
3953         }
3954       else
3955         {
3956           /* Arm allows rotates by a multiple of two. Thumb-2 allows
3957              arbitrary shifts.  */
3958           i -= TARGET_ARM ? 2 : 1;
3959           continue;
3960         }
3961
3962       /* Next, see if we can do a better job with a thumb2 replicated
3963          constant.
3964
3965          We do it this way around to catch the cases like 0x01F001E0 where
3966          two 8-bit immediates would work, but a replicated constant would
3967          make it worse.
3968
3969          TODO: 16-bit constants that don't clear all the bits, but still win.
3970          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
3971       if (TARGET_THUMB2)
3972         {
3973           b1 = (remainder & 0xff000000) >> 24;
3974           b2 = (remainder & 0x00ff0000) >> 16;
3975           b3 = (remainder & 0x0000ff00) >> 8;
3976           b4 = remainder & 0xff;
3977
3978           if (loc > 24)
3979             {
3980               /* The 8-bit immediate already found clears b1 (and maybe b2),
3981                  but must leave b3 and b4 alone.  */
3982
3983               /* First try to find a 32-bit replicated constant that clears
3984                  almost everything.  We can assume that we can't do it in one,
3985                  or else we wouldn't be here.  */
3986               unsigned int tmp = b1 & b2 & b3 & b4;
3987               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3988                                   + (tmp << 24);
3989               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3990                                             + (tmp == b3) + (tmp == b4);
3991               if (tmp
3992                   && (matching_bytes >= 3
3993                       || (matching_bytes == 2
3994                           && const_ok_for_op (remainder & ~tmp2, code))))
3995                 {
3996                   /* At least 3 of the bytes match, and the fourth has at
3997                      least as many bits set, or two of the bytes match
3998                      and it will only require one more insn to finish.  */
3999                   result = tmp2;
4000                   i = tmp != b1 ? 32
4001                       : tmp != b2 ? 24
4002                       : tmp != b3 ? 16
4003                       : 8;
4004                 }
4005
4006               /* Second, try to find a 16-bit replicated constant that can
4007                  leave three of the bytes clear.  If b2 or b4 is already
4008                  zero, then we can.  If the 8-bit from above would not
4009                  clear b2 anyway, then we still win.  */
4010               else if (b1 == b3 && (!b2 || !b4
4011                                || (remainder & 0x00ff0000 & ~result)))
4012                 {
4013                   result = remainder & 0xff00ff00;
4014                   i = 24;
4015                 }
4016             }
4017           else if (loc > 16)
4018             {
4019               /* The 8-bit immediate already found clears b2 (and maybe b3)
4020                  and we don't get here unless b1 is alredy clear, but it will
4021                  leave b4 unchanged.  */
4022
4023               /* If we can clear b2 and b4 at once, then we win, since the
4024                  8-bits couldn't possibly reach that far.  */
4025               if (b2 == b4)
4026                 {
4027                   result = remainder & 0x00ff00ff;
4028                   i = 16;
4029                 }
4030             }
4031         }
4032
4033       return_sequence->i[insns++] = result;
4034       remainder &= ~result;
4035
4036       if (code == SET || code == MINUS)
4037         code = PLUS;
4038     }
4039   while (remainder);
4040
4041   return insns;
4042 }
4043
4044 /* Emit an instruction with the indicated PATTERN.  If COND is
4045    non-NULL, conditionalize the execution of the instruction on COND
4046    being true.  */
4047
4048 static void
4049 emit_constant_insn (rtx cond, rtx pattern)
4050 {
4051   if (cond)
4052     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4053   emit_insn (pattern);
4054 }
4055
4056 /* As above, but extra parameter GENERATE which, if clear, suppresses
4057    RTL generation.  */
4058
4059 static int
4060 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4061                   HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
4062                   int generate)
4063 {
4064   int can_invert = 0;
4065   int can_negate = 0;
4066   int final_invert = 0;
4067   int i;
4068   int set_sign_bit_copies = 0;
4069   int clear_sign_bit_copies = 0;
4070   int clear_zero_bit_copies = 0;
4071   int set_zero_bit_copies = 0;
4072   int insns = 0, neg_insns, inv_insns;
4073   unsigned HOST_WIDE_INT temp1, temp2;
4074   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4075   struct four_ints *immediates;
4076   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4077
4078   /* Find out which operations are safe for a given CODE.  Also do a quick
4079      check for degenerate cases; these can occur when DImode operations
4080      are split.  */
4081   switch (code)
4082     {
4083     case SET:
4084       can_invert = 1;
4085       break;
4086
4087     case PLUS:
4088       can_negate = 1;
4089       break;
4090
4091     case IOR:
4092       if (remainder == 0xffffffff)
4093         {
4094           if (generate)
4095             emit_constant_insn (cond,
4096                                 gen_rtx_SET (VOIDmode, target,
4097                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4098           return 1;
4099         }
4100
4101       if (remainder == 0)
4102         {
4103           if (reload_completed && rtx_equal_p (target, source))
4104             return 0;
4105
4106           if (generate)
4107             emit_constant_insn (cond,
4108                                 gen_rtx_SET (VOIDmode, target, source));
4109           return 1;
4110         }
4111       break;
4112
4113     case AND:
4114       if (remainder == 0)
4115         {
4116           if (generate)
4117             emit_constant_insn (cond,
4118                                 gen_rtx_SET (VOIDmode, target, const0_rtx));
4119           return 1;
4120         }
4121       if (remainder == 0xffffffff)
4122         {
4123           if (reload_completed && rtx_equal_p (target, source))
4124             return 0;
4125           if (generate)
4126             emit_constant_insn (cond,
4127                                 gen_rtx_SET (VOIDmode, target, source));
4128           return 1;
4129         }
4130       can_invert = 1;
4131       break;
4132
4133     case XOR:
4134       if (remainder == 0)
4135         {
4136           if (reload_completed && rtx_equal_p (target, source))
4137             return 0;
4138           if (generate)
4139             emit_constant_insn (cond,
4140                                 gen_rtx_SET (VOIDmode, target, source));
4141           return 1;
4142         }
4143
4144       if (remainder == 0xffffffff)
4145         {
4146           if (generate)
4147             emit_constant_insn (cond,
4148                                 gen_rtx_SET (VOIDmode, target,
4149                                              gen_rtx_NOT (mode, source)));
4150           return 1;
4151         }
4152       final_invert = 1;
4153       break;
4154
4155     case MINUS:
4156       /* We treat MINUS as (val - source), since (source - val) is always
4157          passed as (source + (-val)).  */
4158       if (remainder == 0)
4159         {
4160           if (generate)
4161             emit_constant_insn (cond,
4162                                 gen_rtx_SET (VOIDmode, target,
4163                                              gen_rtx_NEG (mode, source)));
4164           return 1;
4165         }
4166       if (const_ok_for_arm (val))
4167         {
4168           if (generate)
4169             emit_constant_insn (cond,
4170                                 gen_rtx_SET (VOIDmode, target,
4171                                              gen_rtx_MINUS (mode, GEN_INT (val),
4172                                                             source)));
4173           return 1;
4174         }
4175
4176       break;
4177
4178     default:
4179       gcc_unreachable ();
4180     }
4181
4182   /* If we can do it in one insn get out quickly.  */
4183   if (const_ok_for_op (val, code))
4184     {
4185       if (generate)
4186         emit_constant_insn (cond,
4187                             gen_rtx_SET (VOIDmode, target,
4188                                          (source
4189                                           ? gen_rtx_fmt_ee (code, mode, source,
4190                                                             GEN_INT (val))
4191                                           : GEN_INT (val))));
4192       return 1;
4193     }
4194
4195   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4196      insn.  */
4197   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4198       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4199     {
4200       if (generate)
4201         {
4202           if (mode == SImode && i == 16)
4203             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4204                smaller insn.  */
4205             emit_constant_insn (cond,
4206                                 gen_zero_extendhisi2
4207                                 (target, gen_lowpart (HImode, source)));
4208           else
4209             /* Extz only supports SImode, but we can coerce the operands
4210                into that mode.  */
4211             emit_constant_insn (cond,
4212                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4213                                               gen_lowpart (SImode, source),
4214                                               GEN_INT (i), const0_rtx));
4215         }
4216
4217       return 1;
4218     }
4219
4220   /* Calculate a few attributes that may be useful for specific
4221      optimizations.  */
4222   /* Count number of leading zeros.  */
4223   for (i = 31; i >= 0; i--)
4224     {
4225       if ((remainder & (1 << i)) == 0)
4226         clear_sign_bit_copies++;
4227       else
4228         break;
4229     }
4230
4231   /* Count number of leading 1's.  */
4232   for (i = 31; i >= 0; i--)
4233     {
4234       if ((remainder & (1 << i)) != 0)
4235         set_sign_bit_copies++;
4236       else
4237         break;
4238     }
4239
4240   /* Count number of trailing zero's.  */
4241   for (i = 0; i <= 31; i++)
4242     {
4243       if ((remainder & (1 << i)) == 0)
4244         clear_zero_bit_copies++;
4245       else
4246         break;
4247     }
4248
4249   /* Count number of trailing 1's.  */
4250   for (i = 0; i <= 31; i++)
4251     {
4252       if ((remainder & (1 << i)) != 0)
4253         set_zero_bit_copies++;
4254       else
4255         break;
4256     }
4257
4258   switch (code)
4259     {
4260     case SET:
4261       /* See if we can do this by sign_extending a constant that is known
4262          to be negative.  This is a good, way of doing it, since the shift
4263          may well merge into a subsequent insn.  */
4264       if (set_sign_bit_copies > 1)
4265         {
4266           if (const_ok_for_arm
4267               (temp1 = ARM_SIGN_EXTEND (remainder
4268                                         << (set_sign_bit_copies - 1))))
4269             {
4270               if (generate)
4271                 {
4272                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4273                   emit_constant_insn (cond,
4274                                       gen_rtx_SET (VOIDmode, new_src,
4275                                                    GEN_INT (temp1)));
4276                   emit_constant_insn (cond,
4277                                       gen_ashrsi3 (target, new_src,
4278                                                    GEN_INT (set_sign_bit_copies - 1)));
4279                 }
4280               return 2;
4281             }
4282           /* For an inverted constant, we will need to set the low bits,
4283              these will be shifted out of harm's way.  */
4284           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4285           if (const_ok_for_arm (~temp1))
4286             {
4287               if (generate)
4288                 {
4289                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4290                   emit_constant_insn (cond,
4291                                       gen_rtx_SET (VOIDmode, new_src,
4292                                                    GEN_INT (temp1)));
4293                   emit_constant_insn (cond,
4294                                       gen_ashrsi3 (target, new_src,
4295                                                    GEN_INT (set_sign_bit_copies - 1)));
4296                 }
4297               return 2;
4298             }
4299         }
4300
4301       /* See if we can calculate the value as the difference between two
4302          valid immediates.  */
4303       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4304         {
4305           int topshift = clear_sign_bit_copies & ~1;
4306
4307           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4308                                    & (0xff000000 >> topshift));
4309
4310           /* If temp1 is zero, then that means the 9 most significant
4311              bits of remainder were 1 and we've caused it to overflow.
4312              When topshift is 0 we don't need to do anything since we
4313              can borrow from 'bit 32'.  */
4314           if (temp1 == 0 && topshift != 0)
4315             temp1 = 0x80000000 >> (topshift - 1);
4316
4317           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4318
4319           if (const_ok_for_arm (temp2))
4320             {
4321               if (generate)
4322                 {
4323                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4324                   emit_constant_insn (cond,
4325                                       gen_rtx_SET (VOIDmode, new_src,
4326                                                    GEN_INT (temp1)));
4327                   emit_constant_insn (cond,
4328                                       gen_addsi3 (target, new_src,
4329                                                   GEN_INT (-temp2)));
4330                 }
4331
4332               return 2;
4333             }
4334         }
4335
4336       /* See if we can generate this by setting the bottom (or the top)
4337          16 bits, and then shifting these into the other half of the
4338          word.  We only look for the simplest cases, to do more would cost
4339          too much.  Be careful, however, not to generate this when the
4340          alternative would take fewer insns.  */
4341       if (val & 0xffff0000)
4342         {
4343           temp1 = remainder & 0xffff0000;
4344           temp2 = remainder & 0x0000ffff;
4345
4346           /* Overlaps outside this range are best done using other methods.  */
4347           for (i = 9; i < 24; i++)
4348             {
4349               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4350                   && !const_ok_for_arm (temp2))
4351                 {
4352                   rtx new_src = (subtargets
4353                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4354                                  : target);
4355                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4356                                             source, subtargets, generate);
4357                   source = new_src;
4358                   if (generate)
4359                     emit_constant_insn
4360                       (cond,
4361                        gen_rtx_SET
4362                        (VOIDmode, target,
4363                         gen_rtx_IOR (mode,
4364                                      gen_rtx_ASHIFT (mode, source,
4365                                                      GEN_INT (i)),
4366                                      source)));
4367                   return insns + 1;
4368                 }
4369             }
4370
4371           /* Don't duplicate cases already considered.  */
4372           for (i = 17; i < 24; i++)
4373             {
4374               if (((temp1 | (temp1 >> i)) == remainder)
4375                   && !const_ok_for_arm (temp1))
4376                 {
4377                   rtx new_src = (subtargets
4378                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4379                                  : target);
4380                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4381                                             source, subtargets, generate);
4382                   source = new_src;
4383                   if (generate)
4384                     emit_constant_insn
4385                       (cond,
4386                        gen_rtx_SET (VOIDmode, target,
4387                                     gen_rtx_IOR
4388                                     (mode,
4389                                      gen_rtx_LSHIFTRT (mode, source,
4390                                                        GEN_INT (i)),
4391                                      source)));
4392                   return insns + 1;
4393                 }
4394             }
4395         }
4396       break;
4397
4398     case IOR:
4399     case XOR:
4400       /* If we have IOR or XOR, and the constant can be loaded in a
4401          single instruction, and we can find a temporary to put it in,
4402          then this can be done in two instructions instead of 3-4.  */
4403       if (subtargets
4404           /* TARGET can't be NULL if SUBTARGETS is 0 */
4405           || (reload_completed && !reg_mentioned_p (target, source)))
4406         {
4407           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4408             {
4409               if (generate)
4410                 {
4411                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4412
4413                   emit_constant_insn (cond,
4414                                       gen_rtx_SET (VOIDmode, sub,
4415                                                    GEN_INT (val)));
4416                   emit_constant_insn (cond,
4417                                       gen_rtx_SET (VOIDmode, target,
4418                                                    gen_rtx_fmt_ee (code, mode,
4419                                                                    source, sub)));
4420                 }
4421               return 2;
4422             }
4423         }
4424
4425       if (code == XOR)
4426         break;
4427
4428       /*  Convert.
4429           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4430                              and the remainder 0s for e.g. 0xfff00000)
4431           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4432
4433           This can be done in 2 instructions by using shifts with mov or mvn.
4434           e.g. for
4435           x = x | 0xfff00000;
4436           we generate.
4437           mvn   r0, r0, asl #12
4438           mvn   r0, r0, lsr #12  */
4439       if (set_sign_bit_copies > 8
4440           && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4441         {
4442           if (generate)
4443             {
4444               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4445               rtx shift = GEN_INT (set_sign_bit_copies);
4446
4447               emit_constant_insn
4448                 (cond,
4449                  gen_rtx_SET (VOIDmode, sub,
4450                               gen_rtx_NOT (mode,
4451                                            gen_rtx_ASHIFT (mode,
4452                                                            source,
4453                                                            shift))));
4454               emit_constant_insn
4455                 (cond,
4456                  gen_rtx_SET (VOIDmode, target,
4457                               gen_rtx_NOT (mode,
4458                                            gen_rtx_LSHIFTRT (mode, sub,
4459                                                              shift))));
4460             }
4461           return 2;
4462         }
4463
4464       /* Convert
4465           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4466            to
4467           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4468
4469           For eg. r0 = r0 | 0xfff
4470                mvn      r0, r0, lsr #12
4471                mvn      r0, r0, asl #12
4472
4473       */
4474       if (set_zero_bit_copies > 8
4475           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4476         {
4477           if (generate)
4478             {
4479               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4480               rtx shift = GEN_INT (set_zero_bit_copies);
4481
4482               emit_constant_insn
4483                 (cond,
4484                  gen_rtx_SET (VOIDmode, sub,
4485                               gen_rtx_NOT (mode,
4486                                            gen_rtx_LSHIFTRT (mode,
4487                                                              source,
4488                                                              shift))));
4489               emit_constant_insn
4490                 (cond,
4491                  gen_rtx_SET (VOIDmode, target,
4492                               gen_rtx_NOT (mode,
4493                                            gen_rtx_ASHIFT (mode, sub,
4494                                                            shift))));
4495             }
4496           return 2;
4497         }
4498
4499       /* This will never be reached for Thumb2 because orn is a valid
4500          instruction. This is for Thumb1 and the ARM 32 bit cases.
4501
4502          x = y | constant (such that ~constant is a valid constant)
4503          Transform this to
4504          x = ~(~y & ~constant).
4505       */
4506       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4507         {
4508           if (generate)
4509             {
4510               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4511               emit_constant_insn (cond,
4512                                   gen_rtx_SET (VOIDmode, sub,
4513                                                gen_rtx_NOT (mode, source)));
4514               source = sub;
4515               if (subtargets)
4516                 sub = gen_reg_rtx (mode);
4517               emit_constant_insn (cond,
4518                                   gen_rtx_SET (VOIDmode, sub,
4519                                                gen_rtx_AND (mode, source,
4520                                                             GEN_INT (temp1))));
4521               emit_constant_insn (cond,
4522                                   gen_rtx_SET (VOIDmode, target,
4523                                                gen_rtx_NOT (mode, sub)));
4524             }
4525           return 3;
4526         }
4527       break;
4528
4529     case AND:
4530       /* See if two shifts will do 2 or more insn's worth of work.  */
4531       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4532         {
4533           HOST_WIDE_INT shift_mask = ((0xffffffff
4534                                        << (32 - clear_sign_bit_copies))
4535                                       & 0xffffffff);
4536
4537           if ((remainder | shift_mask) != 0xffffffff)
4538             {
4539               HOST_WIDE_INT new_val
4540                 = ARM_SIGN_EXTEND (remainder | shift_mask);
4541
4542               if (generate)
4543                 {
4544                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4545                   insns = arm_gen_constant (AND, SImode, cond, new_val,
4546                                             new_src, source, subtargets, 1);
4547                   source = new_src;
4548                 }
4549               else
4550                 {
4551                   rtx targ = subtargets ? NULL_RTX : target;
4552                   insns = arm_gen_constant (AND, mode, cond, new_val,
4553                                             targ, source, subtargets, 0);
4554                 }
4555             }
4556
4557           if (generate)
4558             {
4559               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4560               rtx shift = GEN_INT (clear_sign_bit_copies);
4561
4562               emit_insn (gen_ashlsi3 (new_src, source, shift));
4563               emit_insn (gen_lshrsi3 (target, new_src, shift));
4564             }
4565
4566           return insns + 2;
4567         }
4568
4569       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4570         {
4571           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4572
4573           if ((remainder | shift_mask) != 0xffffffff)
4574             {
4575               HOST_WIDE_INT new_val
4576                 = ARM_SIGN_EXTEND (remainder | shift_mask);
4577               if (generate)
4578                 {
4579                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4580
4581                   insns = arm_gen_constant (AND, mode, cond, new_val,
4582                                             new_src, source, subtargets, 1);
4583                   source = new_src;
4584                 }
4585               else
4586                 {
4587                   rtx targ = subtargets ? NULL_RTX : target;
4588
4589                   insns = arm_gen_constant (AND, mode, cond, new_val,
4590                                             targ, source, subtargets, 0);
4591                 }
4592             }
4593
4594           if (generate)
4595             {
4596               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4597               rtx shift = GEN_INT (clear_zero_bit_copies);
4598
4599               emit_insn (gen_lshrsi3 (new_src, source, shift));
4600               emit_insn (gen_ashlsi3 (target, new_src, shift));
4601             }
4602
4603           return insns + 2;
4604         }
4605
4606       break;
4607
4608     default:
4609       break;
4610     }
4611
4612   /* Calculate what the instruction sequences would be if we generated it
4613      normally, negated, or inverted.  */
4614   if (code == AND)
4615     /* AND cannot be split into multiple insns, so invert and use BIC.  */
4616     insns = 99;
4617   else
4618     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4619
4620   if (can_negate)
4621     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4622                                             &neg_immediates);
4623   else
4624     neg_insns = 99;
4625
4626   if (can_invert || final_invert)
4627     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4628                                             &inv_immediates);
4629   else
4630     inv_insns = 99;
4631
4632   immediates = &pos_immediates;
4633
4634   /* Is the negated immediate sequence more efficient?  */
4635   if (neg_insns < insns && neg_insns <= inv_insns)
4636     {
4637       insns = neg_insns;
4638       immediates = &neg_immediates;
4639     }
4640   else
4641     can_negate = 0;
4642
4643   /* Is the inverted immediate sequence more efficient?
4644      We must allow for an extra NOT instruction for XOR operations, although
4645      there is some chance that the final 'mvn' will get optimized later.  */
4646   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4647     {
4648       insns = inv_insns;
4649       immediates = &inv_immediates;
4650     }
4651   else
4652     {
4653       can_invert = 0;
4654       final_invert = 0;
4655     }
4656
4657   /* Now output the chosen sequence as instructions.  */
4658   if (generate)
4659     {
4660       for (i = 0; i < insns; i++)
4661         {
4662           rtx new_src, temp1_rtx;
4663
4664           temp1 = immediates->i[i];
4665
4666           if (code == SET || code == MINUS)
4667             new_src = (subtargets ? gen_reg_rtx (mode) : target);
4668           else if ((final_invert || i < (insns - 1)) && subtargets)
4669             new_src = gen_reg_rtx (mode);
4670           else
4671             new_src = target;
4672
4673           if (can_invert)
4674             temp1 = ~temp1;
4675           else if (can_negate)
4676             temp1 = -temp1;
4677
4678           temp1 = trunc_int_for_mode (temp1, mode);
4679           temp1_rtx = GEN_INT (temp1);
4680
4681           if (code == SET)
4682             ;
4683           else if (code == MINUS)
4684             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4685           else
4686             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4687
4688           emit_constant_insn (cond,
4689                               gen_rtx_SET (VOIDmode, new_src,
4690                                            temp1_rtx));
4691           source = new_src;
4692
4693           if (code == SET)
4694             {
4695               can_negate = can_invert;
4696               can_invert = 0;
4697               code = PLUS;
4698             }
4699           else if (code == MINUS)
4700             code = PLUS;
4701         }
4702     }
4703
4704   if (final_invert)
4705     {
4706       if (generate)
4707         emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4708                                                gen_rtx_NOT (mode, source)));
4709       insns++;
4710     }
4711
4712   return insns;
4713 }
4714
4715 /* Canonicalize a comparison so that we are more likely to recognize it.
4716    This can be done for a few constant compares, where we can make the
4717    immediate value easier to load.  */
4718
4719 static void
4720 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4721                              bool op0_preserve_value)
4722 {
4723   machine_mode mode;
4724   unsigned HOST_WIDE_INT i, maxval;
4725
4726   mode = GET_MODE (*op0);
4727   if (mode == VOIDmode)
4728     mode = GET_MODE (*op1);
4729
4730   maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4731
4732   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
4733      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
4734      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
4735      for GTU/LEU in Thumb mode.  */
4736   if (mode == DImode)
4737     {
4738
4739       if (*code == GT || *code == LE
4740           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4741         {
4742           /* Missing comparison.  First try to use an available
4743              comparison.  */
4744           if (CONST_INT_P (*op1))
4745             {
4746               i = INTVAL (*op1);
4747               switch (*code)
4748                 {
4749                 case GT:
4750                 case LE:
4751                   if (i != maxval
4752                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4753                     {
4754                       *op1 = GEN_INT (i + 1);
4755                       *code = *code == GT ? GE : LT;
4756                       return;
4757                     }
4758                   break;
4759                 case GTU:
4760                 case LEU:
4761                   if (i != ~((unsigned HOST_WIDE_INT) 0)
4762                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4763                     {
4764                       *op1 = GEN_INT (i + 1);
4765                       *code = *code == GTU ? GEU : LTU;
4766                       return;
4767                     }
4768                   break;
4769                 default:
4770                   gcc_unreachable ();
4771                 }
4772             }
4773
4774           /* If that did not work, reverse the condition.  */
4775           if (!op0_preserve_value)
4776             {
4777               std::swap (*op0, *op1);
4778               *code = (int)swap_condition ((enum rtx_code)*code);
4779             }
4780         }
4781       return;
4782     }
4783
4784   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4785      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4786      to facilitate possible combining with a cmp into 'ands'.  */
4787   if (mode == SImode
4788       && GET_CODE (*op0) == ZERO_EXTEND
4789       && GET_CODE (XEXP (*op0, 0)) == SUBREG
4790       && GET_MODE (XEXP (*op0, 0)) == QImode
4791       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4792       && subreg_lowpart_p (XEXP (*op0, 0))
4793       && *op1 == const0_rtx)
4794     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4795                         GEN_INT (255));
4796
4797   /* Comparisons smaller than DImode.  Only adjust comparisons against
4798      an out-of-range constant.  */
4799   if (!CONST_INT_P (*op1)
4800       || const_ok_for_arm (INTVAL (*op1))
4801       || const_ok_for_arm (- INTVAL (*op1)))
4802     return;
4803
4804   i = INTVAL (*op1);
4805
4806   switch (*code)
4807     {
4808     case EQ:
4809     case NE:
4810       return;
4811
4812     case GT:
4813     case LE:
4814       if (i != maxval
4815           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4816         {
4817           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4818           *code = *code == GT ? GE : LT;
4819           return;
4820         }
4821       break;
4822
4823     case GE:
4824     case LT:
4825       if (i != ~maxval
4826           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4827         {
4828           *op1 = GEN_INT (i - 1);
4829           *code = *code == GE ? GT : LE;
4830           return;
4831         }
4832       break;
4833
4834     case GTU:
4835     case LEU:
4836       if (i != ~((unsigned HOST_WIDE_INT) 0)
4837           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4838         {
4839           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4840           *code = *code == GTU ? GEU : LTU;
4841           return;
4842         }
4843       break;
4844
4845     case GEU:
4846     case LTU:
4847       if (i != 0
4848           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4849         {
4850           *op1 = GEN_INT (i - 1);
4851           *code = *code == GEU ? GTU : LEU;
4852           return;
4853         }
4854       break;
4855
4856     default:
4857       gcc_unreachable ();
4858     }
4859 }
4860
4861
4862 /* Define how to find the value returned by a function.  */
4863
4864 static rtx
4865 arm_function_value(const_tree type, const_tree func,
4866                    bool outgoing ATTRIBUTE_UNUSED)
4867 {
4868   machine_mode mode;
4869   int unsignedp ATTRIBUTE_UNUSED;
4870   rtx r ATTRIBUTE_UNUSED;
4871
4872   mode = TYPE_MODE (type);
4873
4874   if (TARGET_AAPCS_BASED)
4875     return aapcs_allocate_return_reg (mode, type, func);
4876
4877   /* Promote integer types.  */
4878   if (INTEGRAL_TYPE_P (type))
4879     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4880
4881   /* Promotes small structs returned in a register to full-word size
4882      for big-endian AAPCS.  */
4883   if (arm_return_in_msb (type))
4884     {
4885       HOST_WIDE_INT size = int_size_in_bytes (type);
4886       if (size % UNITS_PER_WORD != 0)
4887         {
4888           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4889           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4890         }
4891     }
4892
4893   return arm_libcall_value_1 (mode);
4894 }
4895
4896 /* libcall hashtable helpers.  */
4897
4898 struct libcall_hasher : typed_noop_remove <rtx_def>
4899 {
4900   typedef const rtx_def *value_type;
4901   typedef const rtx_def *compare_type;
4902   static inline hashval_t hash (const rtx_def *);
4903   static inline bool equal (const rtx_def *, const rtx_def *);
4904   static inline void remove (rtx_def *);
4905 };
4906
4907 inline bool
4908 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
4909 {
4910   return rtx_equal_p (p1, p2);
4911 }
4912
4913 inline hashval_t
4914 libcall_hasher::hash (const rtx_def *p1)
4915 {
4916   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4917 }
4918
4919 typedef hash_table<libcall_hasher> libcall_table_type;
4920
4921 static void
4922 add_libcall (libcall_table_type *htab, rtx libcall)
4923 {
4924   *htab->find_slot (libcall, INSERT) = libcall;
4925 }
4926
4927 static bool
4928 arm_libcall_uses_aapcs_base (const_rtx libcall)
4929 {
4930   static bool init_done = false;
4931   static libcall_table_type *libcall_htab = NULL;
4932
4933   if (!init_done)
4934     {
4935       init_done = true;
4936
4937       libcall_htab = new libcall_table_type (31);
4938       add_libcall (libcall_htab,
4939                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4940       add_libcall (libcall_htab,
4941                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4942       add_libcall (libcall_htab,
4943                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4944       add_libcall (libcall_htab,
4945                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4946
4947       add_libcall (libcall_htab,
4948                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4949       add_libcall (libcall_htab,
4950                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4951       add_libcall (libcall_htab,
4952                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4953       add_libcall (libcall_htab,
4954                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4955
4956       add_libcall (libcall_htab,
4957                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
4958       add_libcall (libcall_htab,
4959                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4960       add_libcall (libcall_htab,
4961                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
4962       add_libcall (libcall_htab,
4963                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
4964       add_libcall (libcall_htab,
4965                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
4966       add_libcall (libcall_htab,
4967                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
4968       add_libcall (libcall_htab,
4969                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
4970       add_libcall (libcall_htab,
4971                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
4972
4973       /* Values from double-precision helper functions are returned in core
4974          registers if the selected core only supports single-precision
4975          arithmetic, even if we are using the hard-float ABI.  The same is
4976          true for single-precision helpers, but we will never be using the
4977          hard-float ABI on a CPU which doesn't support single-precision
4978          operations in hardware.  */
4979       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4980       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4981       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4982       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4983       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4984       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4985       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4986       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4987       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4988       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4989       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4990       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4991                                                         SFmode));
4992       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4993                                                         DFmode));
4994     }
4995
4996   return libcall && libcall_htab->find (libcall) != NULL;
4997 }
4998
4999 static rtx
5000 arm_libcall_value_1 (machine_mode mode)
5001 {
5002   if (TARGET_AAPCS_BASED)
5003     return aapcs_libcall_value (mode);
5004   else if (TARGET_IWMMXT_ABI
5005            && arm_vector_mode_supported_p (mode))
5006     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5007   else
5008     return gen_rtx_REG (mode, ARG_REGISTER (1));
5009 }
5010
5011 /* Define how to find the value returned by a library function
5012    assuming the value has mode MODE.  */
5013
5014 static rtx
5015 arm_libcall_value (machine_mode mode, const_rtx libcall)
5016 {
5017   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5018       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5019     {
5020       /* The following libcalls return their result in integer registers,
5021          even though they return a floating point value.  */
5022       if (arm_libcall_uses_aapcs_base (libcall))
5023         return gen_rtx_REG (mode, ARG_REGISTER(1));
5024
5025     }
5026
5027   return arm_libcall_value_1 (mode);
5028 }
5029
5030 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5031
5032 static bool
5033 arm_function_value_regno_p (const unsigned int regno)
5034 {
5035   if (regno == ARG_REGISTER (1)
5036       || (TARGET_32BIT
5037           && TARGET_AAPCS_BASED
5038           && TARGET_VFP
5039           && TARGET_HARD_FLOAT
5040           && regno == FIRST_VFP_REGNUM)
5041       || (TARGET_IWMMXT_ABI
5042           && regno == FIRST_IWMMXT_REGNUM))
5043     return true;
5044
5045   return false;
5046 }
5047
5048 /* Determine the amount of memory needed to store the possible return
5049    registers of an untyped call.  */
5050 int
5051 arm_apply_result_size (void)
5052 {
5053   int size = 16;
5054
5055   if (TARGET_32BIT)
5056     {
5057       if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5058         size += 32;
5059       if (TARGET_IWMMXT_ABI)
5060         size += 8;
5061     }
5062
5063   return size;
5064 }
5065
5066 /* Decide whether TYPE should be returned in memory (true)
5067    or in a register (false).  FNTYPE is the type of the function making
5068    the call.  */
5069 static bool
5070 arm_return_in_memory (const_tree type, const_tree fntype)
5071 {
5072   HOST_WIDE_INT size;
5073
5074   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5075
5076   if (TARGET_AAPCS_BASED)
5077     {
5078       /* Simple, non-aggregate types (ie not including vectors and
5079          complex) are always returned in a register (or registers).
5080          We don't care about which register here, so we can short-cut
5081          some of the detail.  */
5082       if (!AGGREGATE_TYPE_P (type)
5083           && TREE_CODE (type) != VECTOR_TYPE
5084           && TREE_CODE (type) != COMPLEX_TYPE)
5085         return false;
5086
5087       /* Any return value that is no larger than one word can be
5088          returned in r0.  */
5089       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5090         return false;
5091
5092       /* Check any available co-processors to see if they accept the
5093          type as a register candidate (VFP, for example, can return
5094          some aggregates in consecutive registers).  These aren't
5095          available if the call is variadic.  */
5096       if (aapcs_select_return_coproc (type, fntype) >= 0)
5097         return false;
5098
5099       /* Vector values should be returned using ARM registers, not
5100          memory (unless they're over 16 bytes, which will break since
5101          we only have four call-clobbered registers to play with).  */
5102       if (TREE_CODE (type) == VECTOR_TYPE)
5103         return (size < 0 || size > (4 * UNITS_PER_WORD));
5104
5105       /* The rest go in memory.  */
5106       return true;
5107     }
5108
5109   if (TREE_CODE (type) == VECTOR_TYPE)
5110     return (size < 0 || size > (4 * UNITS_PER_WORD));
5111
5112   if (!AGGREGATE_TYPE_P (type) &&
5113       (TREE_CODE (type) != VECTOR_TYPE))
5114     /* All simple types are returned in registers.  */
5115     return false;
5116
5117   if (arm_abi != ARM_ABI_APCS)
5118     {
5119       /* ATPCS and later return aggregate types in memory only if they are
5120          larger than a word (or are variable size).  */
5121       return (size < 0 || size > UNITS_PER_WORD);
5122     }
5123
5124   /* For the arm-wince targets we choose to be compatible with Microsoft's
5125      ARM and Thumb compilers, which always return aggregates in memory.  */
5126 #ifndef ARM_WINCE
5127   /* All structures/unions bigger than one word are returned in memory.
5128      Also catch the case where int_size_in_bytes returns -1.  In this case
5129      the aggregate is either huge or of variable size, and in either case
5130      we will want to return it via memory and not in a register.  */
5131   if (size < 0 || size > UNITS_PER_WORD)
5132     return true;
5133
5134   if (TREE_CODE (type) == RECORD_TYPE)
5135     {
5136       tree field;
5137
5138       /* For a struct the APCS says that we only return in a register
5139          if the type is 'integer like' and every addressable element
5140          has an offset of zero.  For practical purposes this means
5141          that the structure can have at most one non bit-field element
5142          and that this element must be the first one in the structure.  */
5143
5144       /* Find the first field, ignoring non FIELD_DECL things which will
5145          have been created by C++.  */
5146       for (field = TYPE_FIELDS (type);
5147            field && TREE_CODE (field) != FIELD_DECL;
5148            field = DECL_CHAIN (field))
5149         continue;
5150
5151       if (field == NULL)
5152         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5153
5154       /* Check that the first field is valid for returning in a register.  */
5155
5156       /* ... Floats are not allowed */
5157       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5158         return true;
5159
5160       /* ... Aggregates that are not themselves valid for returning in
5161          a register are not allowed.  */
5162       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5163         return true;
5164
5165       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5166          since they are not addressable.  */
5167       for (field = DECL_CHAIN (field);
5168            field;
5169            field = DECL_CHAIN (field))
5170         {
5171           if (TREE_CODE (field) != FIELD_DECL)
5172             continue;
5173
5174           if (!DECL_BIT_FIELD_TYPE (field))
5175             return true;
5176         }
5177
5178       return false;
5179     }
5180
5181   if (TREE_CODE (type) == UNION_TYPE)
5182     {
5183       tree field;
5184
5185       /* Unions can be returned in registers if every element is
5186          integral, or can be returned in an integer register.  */
5187       for (field = TYPE_FIELDS (type);
5188            field;
5189            field = DECL_CHAIN (field))
5190         {
5191           if (TREE_CODE (field) != FIELD_DECL)
5192             continue;
5193
5194           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5195             return true;
5196
5197           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5198             return true;
5199         }
5200
5201       return false;
5202     }
5203 #endif /* not ARM_WINCE */
5204
5205   /* Return all other types in memory.  */
5206   return true;
5207 }
5208
5209 const struct pcs_attribute_arg
5210 {
5211   const char *arg;
5212   enum arm_pcs value;
5213 } pcs_attribute_args[] =
5214   {
5215     {"aapcs", ARM_PCS_AAPCS},
5216     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5217 #if 0
5218     /* We could recognize these, but changes would be needed elsewhere
5219      * to implement them.  */
5220     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5221     {"atpcs", ARM_PCS_ATPCS},
5222     {"apcs", ARM_PCS_APCS},
5223 #endif
5224     {NULL, ARM_PCS_UNKNOWN}
5225   };
5226
5227 static enum arm_pcs
5228 arm_pcs_from_attribute (tree attr)
5229 {
5230   const struct pcs_attribute_arg *ptr;
5231   const char *arg;
5232
5233   /* Get the value of the argument.  */
5234   if (TREE_VALUE (attr) == NULL_TREE
5235       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5236     return ARM_PCS_UNKNOWN;
5237
5238   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5239
5240   /* Check it against the list of known arguments.  */
5241   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5242     if (streq (arg, ptr->arg))
5243       return ptr->value;
5244
5245   /* An unrecognized interrupt type.  */
5246   return ARM_PCS_UNKNOWN;
5247 }
5248
5249 /* Get the PCS variant to use for this call.  TYPE is the function's type
5250    specification, DECL is the specific declartion.  DECL may be null if
5251    the call could be indirect or if this is a library call.  */
5252 static enum arm_pcs
5253 arm_get_pcs_model (const_tree type, const_tree decl)
5254 {
5255   bool user_convention = false;
5256   enum arm_pcs user_pcs = arm_pcs_default;
5257   tree attr;
5258
5259   gcc_assert (type);
5260
5261   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5262   if (attr)
5263     {
5264       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5265       user_convention = true;
5266     }
5267
5268   if (TARGET_AAPCS_BASED)
5269     {
5270       /* Detect varargs functions.  These always use the base rules
5271          (no argument is ever a candidate for a co-processor
5272          register).  */
5273       bool base_rules = stdarg_p (type);
5274
5275       if (user_convention)
5276         {
5277           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5278             sorry ("non-AAPCS derived PCS variant");
5279           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5280             error ("variadic functions must use the base AAPCS variant");
5281         }
5282
5283       if (base_rules)
5284         return ARM_PCS_AAPCS;
5285       else if (user_convention)
5286         return user_pcs;
5287       else if (decl && flag_unit_at_a_time)
5288         {
5289           /* Local functions never leak outside this compilation unit,
5290              so we are free to use whatever conventions are
5291              appropriate.  */
5292           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5293           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5294           if (i && i->local)
5295             return ARM_PCS_AAPCS_LOCAL;
5296         }
5297     }
5298   else if (user_convention && user_pcs != arm_pcs_default)
5299     sorry ("PCS variant");
5300
5301   /* For everything else we use the target's default.  */
5302   return arm_pcs_default;
5303 }
5304
5305
5306 static void
5307 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5308                     const_tree fntype ATTRIBUTE_UNUSED,
5309                     rtx libcall ATTRIBUTE_UNUSED,
5310                     const_tree fndecl ATTRIBUTE_UNUSED)
5311 {
5312   /* Record the unallocated VFP registers.  */
5313   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5314   pcum->aapcs_vfp_reg_alloc = 0;
5315 }
5316
5317 /* Walk down the type tree of TYPE counting consecutive base elements.
5318    If *MODEP is VOIDmode, then set it to the first valid floating point
5319    type.  If a non-floating point type is found, or if a floating point
5320    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5321    otherwise return the count in the sub-tree.  */
5322 static int
5323 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5324 {
5325   machine_mode mode;
5326   HOST_WIDE_INT size;
5327
5328   switch (TREE_CODE (type))
5329     {
5330     case REAL_TYPE:
5331       mode = TYPE_MODE (type);
5332       if (mode != DFmode && mode != SFmode)
5333         return -1;
5334
5335       if (*modep == VOIDmode)
5336         *modep = mode;
5337
5338       if (*modep == mode)
5339         return 1;
5340
5341       break;
5342
5343     case COMPLEX_TYPE:
5344       mode = TYPE_MODE (TREE_TYPE (type));
5345       if (mode != DFmode && mode != SFmode)
5346         return -1;
5347
5348       if (*modep == VOIDmode)
5349         *modep = mode;
5350
5351       if (*modep == mode)
5352         return 2;
5353
5354       break;
5355
5356     case VECTOR_TYPE:
5357       /* Use V2SImode and V4SImode as representatives of all 64-bit
5358          and 128-bit vector types, whether or not those modes are
5359          supported with the present options.  */
5360       size = int_size_in_bytes (type);
5361       switch (size)
5362         {
5363         case 8:
5364           mode = V2SImode;
5365           break;
5366         case 16:
5367           mode = V4SImode;
5368           break;
5369         default:
5370           return -1;
5371         }
5372
5373       if (*modep == VOIDmode)
5374         *modep = mode;
5375
5376       /* Vector modes are considered to be opaque: two vectors are
5377          equivalent for the purposes of being homogeneous aggregates
5378          if they are the same size.  */
5379       if (*modep == mode)
5380         return 1;
5381
5382       break;
5383
5384     case ARRAY_TYPE:
5385       {
5386         int count;
5387         tree index = TYPE_DOMAIN (type);
5388
5389         /* Can't handle incomplete types nor sizes that are not
5390            fixed.  */
5391         if (!COMPLETE_TYPE_P (type)
5392             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5393           return -1;
5394
5395         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5396         if (count == -1
5397             || !index
5398             || !TYPE_MAX_VALUE (index)
5399             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5400             || !TYPE_MIN_VALUE (index)
5401             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5402             || count < 0)
5403           return -1;
5404
5405         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5406                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5407
5408         /* There must be no padding.  */
5409         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5410           return -1;
5411
5412         return count;
5413       }
5414
5415     case RECORD_TYPE:
5416       {
5417         int count = 0;
5418         int sub_count;
5419         tree field;
5420
5421         /* Can't handle incomplete types nor sizes that are not
5422            fixed.  */
5423         if (!COMPLETE_TYPE_P (type)
5424             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5425           return -1;
5426
5427         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5428           {
5429             if (TREE_CODE (field) != FIELD_DECL)
5430               continue;
5431
5432             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5433             if (sub_count < 0)
5434               return -1;
5435             count += sub_count;
5436           }
5437
5438         /* There must be no padding.  */
5439         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5440           return -1;
5441
5442         return count;
5443       }
5444
5445     case UNION_TYPE:
5446     case QUAL_UNION_TYPE:
5447       {
5448         /* These aren't very interesting except in a degenerate case.  */
5449         int count = 0;
5450         int sub_count;
5451         tree field;
5452
5453         /* Can't handle incomplete types nor sizes that are not
5454            fixed.  */
5455         if (!COMPLETE_TYPE_P (type)
5456             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5457           return -1;
5458
5459         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5460           {
5461             if (TREE_CODE (field) != FIELD_DECL)
5462               continue;
5463
5464             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5465             if (sub_count < 0)
5466               return -1;
5467             count = count > sub_count ? count : sub_count;
5468           }
5469
5470         /* There must be no padding.  */
5471         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5472           return -1;
5473
5474         return count;
5475       }
5476
5477     default:
5478       break;
5479     }
5480
5481   return -1;
5482 }
5483
5484 /* Return true if PCS_VARIANT should use VFP registers.  */
5485 static bool
5486 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5487 {
5488   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5489     {
5490       static bool seen_thumb1_vfp = false;
5491
5492       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5493         {
5494           sorry ("Thumb-1 hard-float VFP ABI");
5495           /* sorry() is not immediately fatal, so only display this once.  */
5496           seen_thumb1_vfp = true;
5497         }
5498
5499       return true;
5500     }
5501
5502   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5503     return false;
5504
5505   return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5506           (TARGET_VFP_DOUBLE || !is_double));
5507 }
5508
5509 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5510    suitable for passing or returning in VFP registers for the PCS
5511    variant selected.  If it is, then *BASE_MODE is updated to contain
5512    a machine mode describing each element of the argument's type and
5513    *COUNT to hold the number of such elements.  */
5514 static bool
5515 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5516                                        machine_mode mode, const_tree type,
5517                                        machine_mode *base_mode, int *count)
5518 {
5519   machine_mode new_mode = VOIDmode;
5520
5521   /* If we have the type information, prefer that to working things
5522      out from the mode.  */
5523   if (type)
5524     {
5525       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5526
5527       if (ag_count > 0 && ag_count <= 4)
5528         *count = ag_count;
5529       else
5530         return false;
5531     }
5532   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5533            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5534            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5535     {
5536       *count = 1;
5537       new_mode = mode;
5538     }
5539   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5540     {
5541       *count = 2;
5542       new_mode = (mode == DCmode ? DFmode : SFmode);
5543     }
5544   else
5545     return false;
5546
5547
5548   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5549     return false;
5550
5551   *base_mode = new_mode;
5552   return true;
5553 }
5554
5555 static bool
5556 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5557                                machine_mode mode, const_tree type)
5558 {
5559   int count ATTRIBUTE_UNUSED;
5560   machine_mode ag_mode ATTRIBUTE_UNUSED;
5561
5562   if (!use_vfp_abi (pcs_variant, false))
5563     return false;
5564   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5565                                                 &ag_mode, &count);
5566 }
5567
5568 static bool
5569 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5570                              const_tree type)
5571 {
5572   if (!use_vfp_abi (pcum->pcs_variant, false))
5573     return false;
5574
5575   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5576                                                 &pcum->aapcs_vfp_rmode,
5577                                                 &pcum->aapcs_vfp_rcount);
5578 }
5579
5580 static bool
5581 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5582                     const_tree type  ATTRIBUTE_UNUSED)
5583 {
5584   int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5585   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5586   int regno;
5587
5588   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5589     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5590       {
5591         pcum->aapcs_vfp_reg_alloc = mask << regno;
5592         if (mode == BLKmode
5593             || (mode == TImode && ! TARGET_NEON)
5594             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5595           {
5596             int i;
5597             int rcount = pcum->aapcs_vfp_rcount;
5598             int rshift = shift;
5599             machine_mode rmode = pcum->aapcs_vfp_rmode;
5600             rtx par;
5601             if (!TARGET_NEON)
5602               {
5603                 /* Avoid using unsupported vector modes.  */
5604                 if (rmode == V2SImode)
5605                   rmode = DImode;
5606                 else if (rmode == V4SImode)
5607                   {
5608                     rmode = DImode;
5609                     rcount *= 2;
5610                     rshift /= 2;
5611                   }
5612               }
5613             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5614             for (i = 0; i < rcount; i++)
5615               {
5616                 rtx tmp = gen_rtx_REG (rmode,
5617                                        FIRST_VFP_REGNUM + regno + i * rshift);
5618                 tmp = gen_rtx_EXPR_LIST
5619                   (VOIDmode, tmp,
5620                    GEN_INT (i * GET_MODE_SIZE (rmode)));
5621                 XVECEXP (par, 0, i) = tmp;
5622               }
5623
5624             pcum->aapcs_reg = par;
5625           }
5626         else
5627           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5628         return true;
5629       }
5630   return false;
5631 }
5632
5633 static rtx
5634 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5635                                machine_mode mode,
5636                                const_tree type ATTRIBUTE_UNUSED)
5637 {
5638   if (!use_vfp_abi (pcs_variant, false))
5639     return NULL;
5640
5641   if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5642     {
5643       int count;
5644       machine_mode ag_mode;
5645       int i;
5646       rtx par;
5647       int shift;
5648
5649       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5650                                              &ag_mode, &count);
5651
5652       if (!TARGET_NEON)
5653         {
5654           if (ag_mode == V2SImode)
5655             ag_mode = DImode;
5656           else if (ag_mode == V4SImode)
5657             {
5658               ag_mode = DImode;
5659               count *= 2;
5660             }
5661         }
5662       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5663       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5664       for (i = 0; i < count; i++)
5665         {
5666           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5667           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5668                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5669           XVECEXP (par, 0, i) = tmp;
5670         }
5671
5672       return par;
5673     }
5674
5675   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5676 }
5677
5678 static void
5679 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5680                    machine_mode mode  ATTRIBUTE_UNUSED,
5681                    const_tree type  ATTRIBUTE_UNUSED)
5682 {
5683   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5684   pcum->aapcs_vfp_reg_alloc = 0;
5685   return;
5686 }
5687
5688 #define AAPCS_CP(X)                             \
5689   {                                             \
5690     aapcs_ ## X ## _cum_init,                   \
5691     aapcs_ ## X ## _is_call_candidate,          \
5692     aapcs_ ## X ## _allocate,                   \
5693     aapcs_ ## X ## _is_return_candidate,        \
5694     aapcs_ ## X ## _allocate_return_reg,        \
5695     aapcs_ ## X ## _advance                     \
5696   }
5697
5698 /* Table of co-processors that can be used to pass arguments in
5699    registers.  Idealy no arugment should be a candidate for more than
5700    one co-processor table entry, but the table is processed in order
5701    and stops after the first match.  If that entry then fails to put
5702    the argument into a co-processor register, the argument will go on
5703    the stack.  */
5704 static struct
5705 {
5706   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
5707   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5708
5709   /* Return true if an argument of mode MODE (or type TYPE if MODE is
5710      BLKmode) is a candidate for this co-processor's registers; this
5711      function should ignore any position-dependent state in
5712      CUMULATIVE_ARGS and only use call-type dependent information.  */
5713   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5714
5715   /* Return true if the argument does get a co-processor register; it
5716      should set aapcs_reg to an RTX of the register allocated as is
5717      required for a return from FUNCTION_ARG.  */
5718   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5719
5720   /* Return true if a result of mode MODE (or type TYPE if MODE is
5721      BLKmode) is can be returned in this co-processor's registers.  */
5722   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5723
5724   /* Allocate and return an RTX element to hold the return type of a
5725      call, this routine must not fail and will only be called if
5726      is_return_candidate returned true with the same parameters.  */
5727   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5728
5729   /* Finish processing this argument and prepare to start processing
5730      the next one.  */
5731   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5732 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5733   {
5734     AAPCS_CP(vfp)
5735   };
5736
5737 #undef AAPCS_CP
5738
5739 static int
5740 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5741                           const_tree type)
5742 {
5743   int i;
5744
5745   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5746     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5747       return i;
5748
5749   return -1;
5750 }
5751
5752 static int
5753 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5754 {
5755   /* We aren't passed a decl, so we can't check that a call is local.
5756      However, it isn't clear that that would be a win anyway, since it
5757      might limit some tail-calling opportunities.  */
5758   enum arm_pcs pcs_variant;
5759
5760   if (fntype)
5761     {
5762       const_tree fndecl = NULL_TREE;
5763
5764       if (TREE_CODE (fntype) == FUNCTION_DECL)
5765         {
5766           fndecl = fntype;
5767           fntype = TREE_TYPE (fntype);
5768         }
5769
5770       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5771     }
5772   else
5773     pcs_variant = arm_pcs_default;
5774
5775   if (pcs_variant != ARM_PCS_AAPCS)
5776     {
5777       int i;
5778
5779       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5780         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5781                                                         TYPE_MODE (type),
5782                                                         type))
5783           return i;
5784     }
5785   return -1;
5786 }
5787
5788 static rtx
5789 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5790                            const_tree fntype)
5791 {
5792   /* We aren't passed a decl, so we can't check that a call is local.
5793      However, it isn't clear that that would be a win anyway, since it
5794      might limit some tail-calling opportunities.  */
5795   enum arm_pcs pcs_variant;
5796   int unsignedp ATTRIBUTE_UNUSED;
5797
5798   if (fntype)
5799     {
5800       const_tree fndecl = NULL_TREE;
5801
5802       if (TREE_CODE (fntype) == FUNCTION_DECL)
5803         {
5804           fndecl = fntype;
5805           fntype = TREE_TYPE (fntype);
5806         }
5807
5808       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5809     }
5810   else
5811     pcs_variant = arm_pcs_default;
5812
5813   /* Promote integer types.  */
5814   if (type && INTEGRAL_TYPE_P (type))
5815     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5816
5817   if (pcs_variant != ARM_PCS_AAPCS)
5818     {
5819       int i;
5820
5821       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5822         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5823                                                         type))
5824           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5825                                                              mode, type);
5826     }
5827
5828   /* Promotes small structs returned in a register to full-word size
5829      for big-endian AAPCS.  */
5830   if (type && arm_return_in_msb (type))
5831     {
5832       HOST_WIDE_INT size = int_size_in_bytes (type);
5833       if (size % UNITS_PER_WORD != 0)
5834         {
5835           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5836           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5837         }
5838     }
5839
5840   return gen_rtx_REG (mode, R0_REGNUM);
5841 }
5842
5843 static rtx
5844 aapcs_libcall_value (machine_mode mode)
5845 {
5846   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5847       && GET_MODE_SIZE (mode) <= 4)
5848     mode = SImode;
5849
5850   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5851 }
5852
5853 /* Lay out a function argument using the AAPCS rules.  The rule
5854    numbers referred to here are those in the AAPCS.  */
5855 static void
5856 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5857                   const_tree type, bool named)
5858 {
5859   int nregs, nregs2;
5860   int ncrn;
5861
5862   /* We only need to do this once per argument.  */
5863   if (pcum->aapcs_arg_processed)
5864     return;
5865
5866   pcum->aapcs_arg_processed = true;
5867
5868   /* Special case: if named is false then we are handling an incoming
5869      anonymous argument which is on the stack.  */
5870   if (!named)
5871     return;
5872
5873   /* Is this a potential co-processor register candidate?  */
5874   if (pcum->pcs_variant != ARM_PCS_AAPCS)
5875     {
5876       int slot = aapcs_select_call_coproc (pcum, mode, type);
5877       pcum->aapcs_cprc_slot = slot;
5878
5879       /* We don't have to apply any of the rules from part B of the
5880          preparation phase, these are handled elsewhere in the
5881          compiler.  */
5882
5883       if (slot >= 0)
5884         {
5885           /* A Co-processor register candidate goes either in its own
5886              class of registers or on the stack.  */
5887           if (!pcum->aapcs_cprc_failed[slot])
5888             {
5889               /* C1.cp - Try to allocate the argument to co-processor
5890                  registers.  */
5891               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5892                 return;
5893
5894               /* C2.cp - Put the argument on the stack and note that we
5895                  can't assign any more candidates in this slot.  We also
5896                  need to note that we have allocated stack space, so that
5897                  we won't later try to split a non-cprc candidate between
5898                  core registers and the stack.  */
5899               pcum->aapcs_cprc_failed[slot] = true;
5900               pcum->can_split = false;
5901             }
5902
5903           /* We didn't get a register, so this argument goes on the
5904              stack.  */
5905           gcc_assert (pcum->can_split == false);
5906           return;
5907         }
5908     }
5909
5910   /* C3 - For double-word aligned arguments, round the NCRN up to the
5911      next even number.  */
5912   ncrn = pcum->aapcs_ncrn;
5913   if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5914     ncrn++;
5915
5916   nregs = ARM_NUM_REGS2(mode, type);
5917
5918   /* Sigh, this test should really assert that nregs > 0, but a GCC
5919      extension allows empty structs and then gives them empty size; it
5920      then allows such a structure to be passed by value.  For some of
5921      the code below we have to pretend that such an argument has
5922      non-zero size so that we 'locate' it correctly either in
5923      registers or on the stack.  */
5924   gcc_assert (nregs >= 0);
5925
5926   nregs2 = nregs ? nregs : 1;
5927
5928   /* C4 - Argument fits entirely in core registers.  */
5929   if (ncrn + nregs2 <= NUM_ARG_REGS)
5930     {
5931       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5932       pcum->aapcs_next_ncrn = ncrn + nregs;
5933       return;
5934     }
5935
5936   /* C5 - Some core registers left and there are no arguments already
5937      on the stack: split this argument between the remaining core
5938      registers and the stack.  */
5939   if (ncrn < NUM_ARG_REGS && pcum->can_split)
5940     {
5941       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5942       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5943       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5944       return;
5945     }
5946
5947   /* C6 - NCRN is set to 4.  */
5948   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5949
5950   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
5951   return;
5952 }
5953
5954 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5955    for a call to a function whose data type is FNTYPE.
5956    For a library call, FNTYPE is NULL.  */
5957 void
5958 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5959                           rtx libname,
5960                           tree fndecl ATTRIBUTE_UNUSED)
5961 {
5962   /* Long call handling.  */
5963   if (fntype)
5964     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5965   else
5966     pcum->pcs_variant = arm_pcs_default;
5967
5968   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5969     {
5970       if (arm_libcall_uses_aapcs_base (libname))
5971         pcum->pcs_variant = ARM_PCS_AAPCS;
5972
5973       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5974       pcum->aapcs_reg = NULL_RTX;
5975       pcum->aapcs_partial = 0;
5976       pcum->aapcs_arg_processed = false;
5977       pcum->aapcs_cprc_slot = -1;
5978       pcum->can_split = true;
5979
5980       if (pcum->pcs_variant != ARM_PCS_AAPCS)
5981         {
5982           int i;
5983
5984           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5985             {
5986               pcum->aapcs_cprc_failed[i] = false;
5987               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5988             }
5989         }
5990       return;
5991     }
5992
5993   /* Legacy ABIs */
5994
5995   /* On the ARM, the offset starts at 0.  */
5996   pcum->nregs = 0;
5997   pcum->iwmmxt_nregs = 0;
5998   pcum->can_split = true;
5999
6000   /* Varargs vectors are treated the same as long long.
6001      named_count avoids having to change the way arm handles 'named' */
6002   pcum->named_count = 0;
6003   pcum->nargs = 0;
6004
6005   if (TARGET_REALLY_IWMMXT && fntype)
6006     {
6007       tree fn_arg;
6008
6009       for (fn_arg = TYPE_ARG_TYPES (fntype);
6010            fn_arg;
6011            fn_arg = TREE_CHAIN (fn_arg))
6012         pcum->named_count += 1;
6013
6014       if (! pcum->named_count)
6015         pcum->named_count = INT_MAX;
6016     }
6017 }
6018
6019 /* Return true if mode/type need doubleword alignment.  */
6020 static bool
6021 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6022 {
6023   return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
6024           || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
6025 }
6026
6027
6028 /* Determine where to put an argument to a function.
6029    Value is zero to push the argument on the stack,
6030    or a hard register in which to store the argument.
6031
6032    MODE is the argument's machine mode.
6033    TYPE is the data type of the argument (as a tree).
6034     This is null for libcalls where that information may
6035     not be available.
6036    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6037     the preceding args and about the function being called.
6038    NAMED is nonzero if this argument is a named parameter
6039     (otherwise it is an extra parameter matching an ellipsis).
6040
6041    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6042    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6043    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6044    defined), say it is passed in the stack (function_prologue will
6045    indeed make it pass in the stack if necessary).  */
6046
6047 static rtx
6048 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6049                   const_tree type, bool named)
6050 {
6051   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6052   int nregs;
6053
6054   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6055      a call insn (op3 of a call_value insn).  */
6056   if (mode == VOIDmode)
6057     return const0_rtx;
6058
6059   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6060     {
6061       aapcs_layout_arg (pcum, mode, type, named);
6062       return pcum->aapcs_reg;
6063     }
6064
6065   /* Varargs vectors are treated the same as long long.
6066      named_count avoids having to change the way arm handles 'named' */
6067   if (TARGET_IWMMXT_ABI
6068       && arm_vector_mode_supported_p (mode)
6069       && pcum->named_count > pcum->nargs + 1)
6070     {
6071       if (pcum->iwmmxt_nregs <= 9)
6072         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6073       else
6074         {
6075           pcum->can_split = false;
6076           return NULL_RTX;
6077         }
6078     }
6079
6080   /* Put doubleword aligned quantities in even register pairs.  */
6081   if (pcum->nregs & 1
6082       && ARM_DOUBLEWORD_ALIGN
6083       && arm_needs_doubleword_align (mode, type))
6084     pcum->nregs++;
6085
6086   /* Only allow splitting an arg between regs and memory if all preceding
6087      args were allocated to regs.  For args passed by reference we only count
6088      the reference pointer.  */
6089   if (pcum->can_split)
6090     nregs = 1;
6091   else
6092     nregs = ARM_NUM_REGS2 (mode, type);
6093
6094   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6095     return NULL_RTX;
6096
6097   return gen_rtx_REG (mode, pcum->nregs);
6098 }
6099
6100 static unsigned int
6101 arm_function_arg_boundary (machine_mode mode, const_tree type)
6102 {
6103   return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6104           ? DOUBLEWORD_ALIGNMENT
6105           : PARM_BOUNDARY);
6106 }
6107
6108 static int
6109 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6110                        tree type, bool named)
6111 {
6112   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6113   int nregs = pcum->nregs;
6114
6115   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6116     {
6117       aapcs_layout_arg (pcum, mode, type, named);
6118       return pcum->aapcs_partial;
6119     }
6120
6121   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6122     return 0;
6123
6124   if (NUM_ARG_REGS > nregs
6125       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6126       && pcum->can_split)
6127     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6128
6129   return 0;
6130 }
6131
6132 /* Update the data in PCUM to advance over an argument
6133    of mode MODE and data type TYPE.
6134    (TYPE is null for libcalls where that information may not be available.)  */
6135
6136 static void
6137 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6138                           const_tree type, bool named)
6139 {
6140   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6141
6142   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6143     {
6144       aapcs_layout_arg (pcum, mode, type, named);
6145
6146       if (pcum->aapcs_cprc_slot >= 0)
6147         {
6148           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6149                                                               type);
6150           pcum->aapcs_cprc_slot = -1;
6151         }
6152
6153       /* Generic stuff.  */
6154       pcum->aapcs_arg_processed = false;
6155       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6156       pcum->aapcs_reg = NULL_RTX;
6157       pcum->aapcs_partial = 0;
6158     }
6159   else
6160     {
6161       pcum->nargs += 1;
6162       if (arm_vector_mode_supported_p (mode)
6163           && pcum->named_count > pcum->nargs
6164           && TARGET_IWMMXT_ABI)
6165         pcum->iwmmxt_nregs += 1;
6166       else
6167         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6168     }
6169 }
6170
6171 /* Variable sized types are passed by reference.  This is a GCC
6172    extension to the ARM ABI.  */
6173
6174 static bool
6175 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6176                        machine_mode mode ATTRIBUTE_UNUSED,
6177                        const_tree type, bool named ATTRIBUTE_UNUSED)
6178 {
6179   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6180 }
6181 \f
6182 /* Encode the current state of the #pragma [no_]long_calls.  */
6183 typedef enum
6184 {
6185   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6186   LONG,         /* #pragma long_calls is in effect.  */
6187   SHORT         /* #pragma no_long_calls is in effect.  */
6188 } arm_pragma_enum;
6189
6190 static arm_pragma_enum arm_pragma_long_calls = OFF;
6191
6192 void
6193 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6194 {
6195   arm_pragma_long_calls = LONG;
6196 }
6197
6198 void
6199 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6200 {
6201   arm_pragma_long_calls = SHORT;
6202 }
6203
6204 void
6205 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6206 {
6207   arm_pragma_long_calls = OFF;
6208 }
6209 \f
6210 /* Handle an attribute requiring a FUNCTION_DECL;
6211    arguments as in struct attribute_spec.handler.  */
6212 static tree
6213 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6214                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6215 {
6216   if (TREE_CODE (*node) != FUNCTION_DECL)
6217     {
6218       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6219                name);
6220       *no_add_attrs = true;
6221     }
6222
6223   return NULL_TREE;
6224 }
6225
6226 /* Handle an "interrupt" or "isr" attribute;
6227    arguments as in struct attribute_spec.handler.  */
6228 static tree
6229 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6230                           bool *no_add_attrs)
6231 {
6232   if (DECL_P (*node))
6233     {
6234       if (TREE_CODE (*node) != FUNCTION_DECL)
6235         {
6236           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6237                    name);
6238           *no_add_attrs = true;
6239         }
6240       /* FIXME: the argument if any is checked for type attributes;
6241          should it be checked for decl ones?  */
6242     }
6243   else
6244     {
6245       if (TREE_CODE (*node) == FUNCTION_TYPE
6246           || TREE_CODE (*node) == METHOD_TYPE)
6247         {
6248           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6249             {
6250               warning (OPT_Wattributes, "%qE attribute ignored",
6251                        name);
6252               *no_add_attrs = true;
6253             }
6254         }
6255       else if (TREE_CODE (*node) == POINTER_TYPE
6256                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6257                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6258                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6259         {
6260           *node = build_variant_type_copy (*node);
6261           TREE_TYPE (*node) = build_type_attribute_variant
6262             (TREE_TYPE (*node),
6263              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6264           *no_add_attrs = true;
6265         }
6266       else
6267         {
6268           /* Possibly pass this attribute on from the type to a decl.  */
6269           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6270                        | (int) ATTR_FLAG_FUNCTION_NEXT
6271                        | (int) ATTR_FLAG_ARRAY_NEXT))
6272             {
6273               *no_add_attrs = true;
6274               return tree_cons (name, args, NULL_TREE);
6275             }
6276           else
6277             {
6278               warning (OPT_Wattributes, "%qE attribute ignored",
6279                        name);
6280             }
6281         }
6282     }
6283
6284   return NULL_TREE;
6285 }
6286
6287 /* Handle a "pcs" attribute; arguments as in struct
6288    attribute_spec.handler.  */
6289 static tree
6290 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6291                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6292 {
6293   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6294     {
6295       warning (OPT_Wattributes, "%qE attribute ignored", name);
6296       *no_add_attrs = true;
6297     }
6298   return NULL_TREE;
6299 }
6300
6301 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6302 /* Handle the "notshared" attribute.  This attribute is another way of
6303    requesting hidden visibility.  ARM's compiler supports
6304    "__declspec(notshared)"; we support the same thing via an
6305    attribute.  */
6306
6307 static tree
6308 arm_handle_notshared_attribute (tree *node,
6309                                 tree name ATTRIBUTE_UNUSED,
6310                                 tree args ATTRIBUTE_UNUSED,
6311                                 int flags ATTRIBUTE_UNUSED,
6312                                 bool *no_add_attrs)
6313 {
6314   tree decl = TYPE_NAME (*node);
6315
6316   if (decl)
6317     {
6318       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6319       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6320       *no_add_attrs = false;
6321     }
6322   return NULL_TREE;
6323 }
6324 #endif
6325
6326 /* Return 0 if the attributes for two types are incompatible, 1 if they
6327    are compatible, and 2 if they are nearly compatible (which causes a
6328    warning to be generated).  */
6329 static int
6330 arm_comp_type_attributes (const_tree type1, const_tree type2)
6331 {
6332   int l1, l2, s1, s2;
6333
6334   /* Check for mismatch of non-default calling convention.  */
6335   if (TREE_CODE (type1) != FUNCTION_TYPE)
6336     return 1;
6337
6338   /* Check for mismatched call attributes.  */
6339   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6340   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6341   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6342   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6343
6344   /* Only bother to check if an attribute is defined.  */
6345   if (l1 | l2 | s1 | s2)
6346     {
6347       /* If one type has an attribute, the other must have the same attribute.  */
6348       if ((l1 != l2) || (s1 != s2))
6349         return 0;
6350
6351       /* Disallow mixed attributes.  */
6352       if ((l1 & s2) || (l2 & s1))
6353         return 0;
6354     }
6355
6356   /* Check for mismatched ISR attribute.  */
6357   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6358   if (! l1)
6359     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6360   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6361   if (! l2)
6362     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6363   if (l1 != l2)
6364     return 0;
6365
6366   return 1;
6367 }
6368
6369 /*  Assigns default attributes to newly defined type.  This is used to
6370     set short_call/long_call attributes for function types of
6371     functions defined inside corresponding #pragma scopes.  */
6372 static void
6373 arm_set_default_type_attributes (tree type)
6374 {
6375   /* Add __attribute__ ((long_call)) to all functions, when
6376      inside #pragma long_calls or __attribute__ ((short_call)),
6377      when inside #pragma no_long_calls.  */
6378   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6379     {
6380       tree type_attr_list, attr_name;
6381       type_attr_list = TYPE_ATTRIBUTES (type);
6382
6383       if (arm_pragma_long_calls == LONG)
6384         attr_name = get_identifier ("long_call");
6385       else if (arm_pragma_long_calls == SHORT)
6386         attr_name = get_identifier ("short_call");
6387       else
6388         return;
6389
6390       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6391       TYPE_ATTRIBUTES (type) = type_attr_list;
6392     }
6393 }
6394 \f
6395 /* Return true if DECL is known to be linked into section SECTION.  */
6396
6397 static bool
6398 arm_function_in_section_p (tree decl, section *section)
6399 {
6400   /* We can only be certain about the prevailing symbol definition.  */
6401   if (!decl_binds_to_current_def_p (decl))
6402     return false;
6403
6404   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
6405   if (!DECL_SECTION_NAME (decl))
6406     {
6407       /* Make sure that we will not create a unique section for DECL.  */
6408       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6409         return false;
6410     }
6411
6412   return function_section (decl) == section;
6413 }
6414
6415 /* Return nonzero if a 32-bit "long_call" should be generated for
6416    a call from the current function to DECL.  We generate a long_call
6417    if the function:
6418
6419         a.  has an __attribute__((long call))
6420      or b.  is within the scope of a #pragma long_calls
6421      or c.  the -mlong-calls command line switch has been specified
6422
6423    However we do not generate a long call if the function:
6424
6425         d.  has an __attribute__ ((short_call))
6426      or e.  is inside the scope of a #pragma no_long_calls
6427      or f.  is defined in the same section as the current function.  */
6428
6429 bool
6430 arm_is_long_call_p (tree decl)
6431 {
6432   tree attrs;
6433
6434   if (!decl)
6435     return TARGET_LONG_CALLS;
6436
6437   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6438   if (lookup_attribute ("short_call", attrs))
6439     return false;
6440
6441   /* For "f", be conservative, and only cater for cases in which the
6442      whole of the current function is placed in the same section.  */
6443   if (!flag_reorder_blocks_and_partition
6444       && TREE_CODE (decl) == FUNCTION_DECL
6445       && arm_function_in_section_p (decl, current_function_section ()))
6446     return false;
6447
6448   if (lookup_attribute ("long_call", attrs))
6449     return true;
6450
6451   return TARGET_LONG_CALLS;
6452 }
6453
6454 /* Return nonzero if it is ok to make a tail-call to DECL.  */
6455 static bool
6456 arm_function_ok_for_sibcall (tree decl, tree exp)
6457 {
6458   unsigned long func_type;
6459
6460   if (cfun->machine->sibcall_blocked)
6461     return false;
6462
6463   /* Never tailcall something if we are generating code for Thumb-1.  */
6464   if (TARGET_THUMB1)
6465     return false;
6466
6467   /* The PIC register is live on entry to VxWorks PLT entries, so we
6468      must make the call before restoring the PIC register.  */
6469   if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6470     return false;
6471
6472   /* If we are interworking and the function is not declared static
6473      then we can't tail-call it unless we know that it exists in this
6474      compilation unit (since it might be a Thumb routine).  */
6475   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6476       && !TREE_ASM_WRITTEN (decl))
6477     return false;
6478
6479   func_type = arm_current_func_type ();
6480   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
6481   if (IS_INTERRUPT (func_type))
6482     return false;
6483
6484   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6485     {
6486       /* Check that the return value locations are the same.  For
6487          example that we aren't returning a value from the sibling in
6488          a VFP register but then need to transfer it to a core
6489          register.  */
6490       rtx a, b;
6491
6492       a = arm_function_value (TREE_TYPE (exp), decl, false);
6493       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6494                               cfun->decl, false);
6495       if (!rtx_equal_p (a, b))
6496         return false;
6497     }
6498
6499   /* Never tailcall if function may be called with a misaligned SP.  */
6500   if (IS_STACKALIGN (func_type))
6501     return false;
6502
6503   /* The AAPCS says that, on bare-metal, calls to unresolved weak
6504      references should become a NOP.  Don't convert such calls into
6505      sibling calls.  */
6506   if (TARGET_AAPCS_BASED
6507       && arm_abi == ARM_ABI_AAPCS
6508       && decl
6509       && DECL_WEAK (decl))
6510     return false;
6511
6512   /* Everything else is ok.  */
6513   return true;
6514 }
6515
6516 \f
6517 /* Addressing mode support functions.  */
6518
6519 /* Return nonzero if X is a legitimate immediate operand when compiling
6520    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
6521 int
6522 legitimate_pic_operand_p (rtx x)
6523 {
6524   if (GET_CODE (x) == SYMBOL_REF
6525       || (GET_CODE (x) == CONST
6526           && GET_CODE (XEXP (x, 0)) == PLUS
6527           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6528     return 0;
6529
6530   return 1;
6531 }
6532
6533 /* Record that the current function needs a PIC register.  Initialize
6534    cfun->machine->pic_reg if we have not already done so.  */
6535
6536 static void
6537 require_pic_register (void)
6538 {
6539   /* A lot of the logic here is made obscure by the fact that this
6540      routine gets called as part of the rtx cost estimation process.
6541      We don't want those calls to affect any assumptions about the real
6542      function; and further, we can't call entry_of_function() until we
6543      start the real expansion process.  */
6544   if (!crtl->uses_pic_offset_table)
6545     {
6546       gcc_assert (can_create_pseudo_p ());
6547       if (arm_pic_register != INVALID_REGNUM
6548           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6549         {
6550           if (!cfun->machine->pic_reg)
6551             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6552
6553           /* Play games to avoid marking the function as needing pic
6554              if we are being called as part of the cost-estimation
6555              process.  */
6556           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6557             crtl->uses_pic_offset_table = 1;
6558         }
6559       else
6560         {
6561           rtx_insn *seq, *insn;
6562
6563           if (!cfun->machine->pic_reg)
6564             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6565
6566           /* Play games to avoid marking the function as needing pic
6567              if we are being called as part of the cost-estimation
6568              process.  */
6569           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6570             {
6571               crtl->uses_pic_offset_table = 1;
6572               start_sequence ();
6573
6574               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6575                   && arm_pic_register > LAST_LO_REGNUM)
6576                 emit_move_insn (cfun->machine->pic_reg,
6577                                 gen_rtx_REG (Pmode, arm_pic_register));
6578               else
6579                 arm_load_pic_register (0UL);
6580
6581               seq = get_insns ();
6582               end_sequence ();
6583
6584               for (insn = seq; insn; insn = NEXT_INSN (insn))
6585                 if (INSN_P (insn))
6586                   INSN_LOCATION (insn) = prologue_location;
6587
6588               /* We can be called during expansion of PHI nodes, where
6589                  we can't yet emit instructions directly in the final
6590                  insn stream.  Queue the insns on the entry edge, they will
6591                  be committed after everything else is expanded.  */
6592               insert_insn_on_edge (seq,
6593                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6594             }
6595         }
6596     }
6597 }
6598
6599 rtx
6600 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6601 {
6602   if (GET_CODE (orig) == SYMBOL_REF
6603       || GET_CODE (orig) == LABEL_REF)
6604     {
6605       rtx insn;
6606
6607       if (reg == 0)
6608         {
6609           gcc_assert (can_create_pseudo_p ());
6610           reg = gen_reg_rtx (Pmode);
6611         }
6612
6613       /* VxWorks does not impose a fixed gap between segments; the run-time
6614          gap can be different from the object-file gap.  We therefore can't
6615          use GOTOFF unless we are absolutely sure that the symbol is in the
6616          same segment as the GOT.  Unfortunately, the flexibility of linker
6617          scripts means that we can't be sure of that in general, so assume
6618          that GOTOFF is never valid on VxWorks.  */
6619       if ((GET_CODE (orig) == LABEL_REF
6620            || (GET_CODE (orig) == SYMBOL_REF &&
6621                SYMBOL_REF_LOCAL_P (orig)))
6622           && NEED_GOT_RELOC
6623           && arm_pic_data_is_text_relative)
6624         insn = arm_pic_static_addr (orig, reg);
6625       else
6626         {
6627           rtx pat;
6628           rtx mem;
6629
6630           /* If this function doesn't have a pic register, create one now.  */
6631           require_pic_register ();
6632
6633           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6634
6635           /* Make the MEM as close to a constant as possible.  */
6636           mem = SET_SRC (pat);
6637           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6638           MEM_READONLY_P (mem) = 1;
6639           MEM_NOTRAP_P (mem) = 1;
6640
6641           insn = emit_insn (pat);
6642         }
6643
6644       /* Put a REG_EQUAL note on this insn, so that it can be optimized
6645          by loop.  */
6646       set_unique_reg_note (insn, REG_EQUAL, orig);
6647
6648       return reg;
6649     }
6650   else if (GET_CODE (orig) == CONST)
6651     {
6652       rtx base, offset;
6653
6654       if (GET_CODE (XEXP (orig, 0)) == PLUS
6655           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6656         return orig;
6657
6658       /* Handle the case where we have: const (UNSPEC_TLS).  */
6659       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6660           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6661         return orig;
6662
6663       /* Handle the case where we have:
6664          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
6665          CONST_INT.  */
6666       if (GET_CODE (XEXP (orig, 0)) == PLUS
6667           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6668           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6669         {
6670           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6671           return orig;
6672         }
6673
6674       if (reg == 0)
6675         {
6676           gcc_assert (can_create_pseudo_p ());
6677           reg = gen_reg_rtx (Pmode);
6678         }
6679
6680       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6681
6682       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6683       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6684                                        base == reg ? 0 : reg);
6685
6686       if (CONST_INT_P (offset))
6687         {
6688           /* The base register doesn't really matter, we only want to
6689              test the index for the appropriate mode.  */
6690           if (!arm_legitimate_index_p (mode, offset, SET, 0))
6691             {
6692               gcc_assert (can_create_pseudo_p ());
6693               offset = force_reg (Pmode, offset);
6694             }
6695
6696           if (CONST_INT_P (offset))
6697             return plus_constant (Pmode, base, INTVAL (offset));
6698         }
6699
6700       if (GET_MODE_SIZE (mode) > 4
6701           && (GET_MODE_CLASS (mode) == MODE_INT
6702               || TARGET_SOFT_FLOAT))
6703         {
6704           emit_insn (gen_addsi3 (reg, base, offset));
6705           return reg;
6706         }
6707
6708       return gen_rtx_PLUS (Pmode, base, offset);
6709     }
6710
6711   return orig;
6712 }
6713
6714
6715 /* Find a spare register to use during the prolog of a function.  */
6716
6717 static int
6718 thumb_find_work_register (unsigned long pushed_regs_mask)
6719 {
6720   int reg;
6721
6722   /* Check the argument registers first as these are call-used.  The
6723      register allocation order means that sometimes r3 might be used
6724      but earlier argument registers might not, so check them all.  */
6725   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6726     if (!df_regs_ever_live_p (reg))
6727       return reg;
6728
6729   /* Before going on to check the call-saved registers we can try a couple
6730      more ways of deducing that r3 is available.  The first is when we are
6731      pushing anonymous arguments onto the stack and we have less than 4
6732      registers worth of fixed arguments(*).  In this case r3 will be part of
6733      the variable argument list and so we can be sure that it will be
6734      pushed right at the start of the function.  Hence it will be available
6735      for the rest of the prologue.
6736      (*): ie crtl->args.pretend_args_size is greater than 0.  */
6737   if (cfun->machine->uses_anonymous_args
6738       && crtl->args.pretend_args_size > 0)
6739     return LAST_ARG_REGNUM;
6740
6741   /* The other case is when we have fixed arguments but less than 4 registers
6742      worth.  In this case r3 might be used in the body of the function, but
6743      it is not being used to convey an argument into the function.  In theory
6744      we could just check crtl->args.size to see how many bytes are
6745      being passed in argument registers, but it seems that it is unreliable.
6746      Sometimes it will have the value 0 when in fact arguments are being
6747      passed.  (See testcase execute/20021111-1.c for an example).  So we also
6748      check the args_info.nregs field as well.  The problem with this field is
6749      that it makes no allowances for arguments that are passed to the
6750      function but which are not used.  Hence we could miss an opportunity
6751      when a function has an unused argument in r3.  But it is better to be
6752      safe than to be sorry.  */
6753   if (! cfun->machine->uses_anonymous_args
6754       && crtl->args.size >= 0
6755       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6756       && (TARGET_AAPCS_BASED
6757           ? crtl->args.info.aapcs_ncrn < 4
6758           : crtl->args.info.nregs < 4))
6759     return LAST_ARG_REGNUM;
6760
6761   /* Otherwise look for a call-saved register that is going to be pushed.  */
6762   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6763     if (pushed_regs_mask & (1 << reg))
6764       return reg;
6765
6766   if (TARGET_THUMB2)
6767     {
6768       /* Thumb-2 can use high regs.  */
6769       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6770         if (pushed_regs_mask & (1 << reg))
6771           return reg;
6772     }
6773   /* Something went wrong - thumb_compute_save_reg_mask()
6774      should have arranged for a suitable register to be pushed.  */
6775   gcc_unreachable ();
6776 }
6777
6778 static GTY(()) int pic_labelno;
6779
6780 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
6781    low register.  */
6782
6783 void
6784 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6785 {
6786   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6787
6788   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6789     return;
6790
6791   gcc_assert (flag_pic);
6792
6793   pic_reg = cfun->machine->pic_reg;
6794   if (TARGET_VXWORKS_RTP)
6795     {
6796       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6797       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6798       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6799
6800       emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6801
6802       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6803       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6804     }
6805   else
6806     {
6807       /* We use an UNSPEC rather than a LABEL_REF because this label
6808          never appears in the code stream.  */
6809
6810       labelno = GEN_INT (pic_labelno++);
6811       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6812       l1 = gen_rtx_CONST (VOIDmode, l1);
6813
6814       /* On the ARM the PC register contains 'dot + 8' at the time of the
6815          addition, on the Thumb it is 'dot + 4'.  */
6816       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6817       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6818                                 UNSPEC_GOTSYM_OFF);
6819       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6820
6821       if (TARGET_32BIT)
6822         {
6823           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6824         }
6825       else /* TARGET_THUMB1 */
6826         {
6827           if (arm_pic_register != INVALID_REGNUM
6828               && REGNO (pic_reg) > LAST_LO_REGNUM)
6829             {
6830               /* We will have pushed the pic register, so we should always be
6831                  able to find a work register.  */
6832               pic_tmp = gen_rtx_REG (SImode,
6833                                      thumb_find_work_register (saved_regs));
6834               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6835               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6836               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6837             }
6838           else if (arm_pic_register != INVALID_REGNUM
6839                    && arm_pic_register > LAST_LO_REGNUM
6840                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
6841             {
6842               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6843               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6844               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6845             }
6846           else
6847             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6848         }
6849     }
6850
6851   /* Need to emit this whether or not we obey regdecls,
6852      since setjmp/longjmp can cause life info to screw up.  */
6853   emit_use (pic_reg);
6854 }
6855
6856 /* Generate code to load the address of a static var when flag_pic is set.  */
6857 static rtx
6858 arm_pic_static_addr (rtx orig, rtx reg)
6859 {
6860   rtx l1, labelno, offset_rtx, insn;
6861
6862   gcc_assert (flag_pic);
6863
6864   /* We use an UNSPEC rather than a LABEL_REF because this label
6865      never appears in the code stream.  */
6866   labelno = GEN_INT (pic_labelno++);
6867   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6868   l1 = gen_rtx_CONST (VOIDmode, l1);
6869
6870   /* On the ARM the PC register contains 'dot + 8' at the time of the
6871      addition, on the Thumb it is 'dot + 4'.  */
6872   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6873   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6874                                UNSPEC_SYMBOL_OFFSET);
6875   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6876
6877   insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6878   return insn;
6879 }
6880
6881 /* Return nonzero if X is valid as an ARM state addressing register.  */
6882 static int
6883 arm_address_register_rtx_p (rtx x, int strict_p)
6884 {
6885   int regno;
6886
6887   if (!REG_P (x))
6888     return 0;
6889
6890   regno = REGNO (x);
6891
6892   if (strict_p)
6893     return ARM_REGNO_OK_FOR_BASE_P (regno);
6894
6895   return (regno <= LAST_ARM_REGNUM
6896           || regno >= FIRST_PSEUDO_REGISTER
6897           || regno == FRAME_POINTER_REGNUM
6898           || regno == ARG_POINTER_REGNUM);
6899 }
6900
6901 /* Return TRUE if this rtx is the difference of a symbol and a label,
6902    and will reduce to a PC-relative relocation in the object file.
6903    Expressions like this can be left alone when generating PIC, rather
6904    than forced through the GOT.  */
6905 static int
6906 pcrel_constant_p (rtx x)
6907 {
6908   if (GET_CODE (x) == MINUS)
6909     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6910
6911   return FALSE;
6912 }
6913
6914 /* Return true if X will surely end up in an index register after next
6915    splitting pass.  */
6916 static bool
6917 will_be_in_index_register (const_rtx x)
6918 {
6919   /* arm.md: calculate_pic_address will split this into a register.  */
6920   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6921 }
6922
6923 /* Return nonzero if X is a valid ARM state address operand.  */
6924 int
6925 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6926                                 int strict_p)
6927 {
6928   bool use_ldrd;
6929   enum rtx_code code = GET_CODE (x);
6930
6931   if (arm_address_register_rtx_p (x, strict_p))
6932     return 1;
6933
6934   use_ldrd = (TARGET_LDRD
6935               && (mode == DImode
6936                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6937
6938   if (code == POST_INC || code == PRE_DEC
6939       || ((code == PRE_INC || code == POST_DEC)
6940           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6941     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6942
6943   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6944            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6945            && GET_CODE (XEXP (x, 1)) == PLUS
6946            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6947     {
6948       rtx addend = XEXP (XEXP (x, 1), 1);
6949
6950       /* Don't allow ldrd post increment by register because it's hard
6951          to fixup invalid register choices.  */
6952       if (use_ldrd
6953           && GET_CODE (x) == POST_MODIFY
6954           && REG_P (addend))
6955         return 0;
6956
6957       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6958               && arm_legitimate_index_p (mode, addend, outer, strict_p));
6959     }
6960
6961   /* After reload constants split into minipools will have addresses
6962      from a LABEL_REF.  */
6963   else if (reload_completed
6964            && (code == LABEL_REF
6965                || (code == CONST
6966                    && GET_CODE (XEXP (x, 0)) == PLUS
6967                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6968                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6969     return 1;
6970
6971   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6972     return 0;
6973
6974   else if (code == PLUS)
6975     {
6976       rtx xop0 = XEXP (x, 0);
6977       rtx xop1 = XEXP (x, 1);
6978
6979       return ((arm_address_register_rtx_p (xop0, strict_p)
6980                && ((CONST_INT_P (xop1)
6981                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6982                    || (!strict_p && will_be_in_index_register (xop1))))
6983               || (arm_address_register_rtx_p (xop1, strict_p)
6984                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6985     }
6986
6987 #if 0
6988   /* Reload currently can't handle MINUS, so disable this for now */
6989   else if (GET_CODE (x) == MINUS)
6990     {
6991       rtx xop0 = XEXP (x, 0);
6992       rtx xop1 = XEXP (x, 1);
6993
6994       return (arm_address_register_rtx_p (xop0, strict_p)
6995               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6996     }
6997 #endif
6998
6999   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7000            && code == SYMBOL_REF
7001            && CONSTANT_POOL_ADDRESS_P (x)
7002            && ! (flag_pic
7003                  && symbol_mentioned_p (get_pool_constant (x))
7004                  && ! pcrel_constant_p (get_pool_constant (x))))
7005     return 1;
7006
7007   return 0;
7008 }
7009
7010 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7011 static int
7012 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7013 {
7014   bool use_ldrd;
7015   enum rtx_code code = GET_CODE (x);
7016
7017   if (arm_address_register_rtx_p (x, strict_p))
7018     return 1;
7019
7020   use_ldrd = (TARGET_LDRD
7021               && (mode == DImode
7022                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7023
7024   if (code == POST_INC || code == PRE_DEC
7025       || ((code == PRE_INC || code == POST_DEC)
7026           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7027     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7028
7029   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7030            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7031            && GET_CODE (XEXP (x, 1)) == PLUS
7032            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7033     {
7034       /* Thumb-2 only has autoincrement by constant.  */
7035       rtx addend = XEXP (XEXP (x, 1), 1);
7036       HOST_WIDE_INT offset;
7037
7038       if (!CONST_INT_P (addend))
7039         return 0;
7040
7041       offset = INTVAL(addend);
7042       if (GET_MODE_SIZE (mode) <= 4)
7043         return (offset > -256 && offset < 256);
7044
7045       return (use_ldrd && offset > -1024 && offset < 1024
7046               && (offset & 3) == 0);
7047     }
7048
7049   /* After reload constants split into minipools will have addresses
7050      from a LABEL_REF.  */
7051   else if (reload_completed
7052            && (code == LABEL_REF
7053                || (code == CONST
7054                    && GET_CODE (XEXP (x, 0)) == PLUS
7055                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7056                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7057     return 1;
7058
7059   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7060     return 0;
7061
7062   else if (code == PLUS)
7063     {
7064       rtx xop0 = XEXP (x, 0);
7065       rtx xop1 = XEXP (x, 1);
7066
7067       return ((arm_address_register_rtx_p (xop0, strict_p)
7068                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7069                    || (!strict_p && will_be_in_index_register (xop1))))
7070               || (arm_address_register_rtx_p (xop1, strict_p)
7071                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7072     }
7073
7074   /* Normally we can assign constant values to target registers without
7075      the help of constant pool.  But there are cases we have to use constant
7076      pool like:
7077      1) assign a label to register.
7078      2) sign-extend a 8bit value to 32bit and then assign to register.
7079
7080      Constant pool access in format:
7081      (set (reg r0) (mem (symbol_ref (".LC0"))))
7082      will cause the use of literal pool (later in function arm_reorg).
7083      So here we mark such format as an invalid format, then the compiler
7084      will adjust it into:
7085      (set (reg r0) (symbol_ref (".LC0")))
7086      (set (reg r0) (mem (reg r0))).
7087      No extra register is required, and (mem (reg r0)) won't cause the use
7088      of literal pools.  */
7089   else if (arm_disable_literal_pool && code == SYMBOL_REF
7090            && CONSTANT_POOL_ADDRESS_P (x))
7091     return 0;
7092
7093   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7094            && code == SYMBOL_REF
7095            && CONSTANT_POOL_ADDRESS_P (x)
7096            && ! (flag_pic
7097                  && symbol_mentioned_p (get_pool_constant (x))
7098                  && ! pcrel_constant_p (get_pool_constant (x))))
7099     return 1;
7100
7101   return 0;
7102 }
7103
7104 /* Return nonzero if INDEX is valid for an address index operand in
7105    ARM state.  */
7106 static int
7107 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7108                         int strict_p)
7109 {
7110   HOST_WIDE_INT range;
7111   enum rtx_code code = GET_CODE (index);
7112
7113   /* Standard coprocessor addressing modes.  */
7114   if (TARGET_HARD_FLOAT
7115       && TARGET_VFP
7116       && (mode == SFmode || mode == DFmode))
7117     return (code == CONST_INT && INTVAL (index) < 1024
7118             && INTVAL (index) > -1024
7119             && (INTVAL (index) & 3) == 0);
7120
7121   /* For quad modes, we restrict the constant offset to be slightly less
7122      than what the instruction format permits.  We do this because for
7123      quad mode moves, we will actually decompose them into two separate
7124      double-mode reads or writes.  INDEX must therefore be a valid
7125      (double-mode) offset and so should INDEX+8.  */
7126   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7127     return (code == CONST_INT
7128             && INTVAL (index) < 1016
7129             && INTVAL (index) > -1024
7130             && (INTVAL (index) & 3) == 0);
7131
7132   /* We have no such constraint on double mode offsets, so we permit the
7133      full range of the instruction format.  */
7134   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7135     return (code == CONST_INT
7136             && INTVAL (index) < 1024
7137             && INTVAL (index) > -1024
7138             && (INTVAL (index) & 3) == 0);
7139
7140   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7141     return (code == CONST_INT
7142             && INTVAL (index) < 1024
7143             && INTVAL (index) > -1024
7144             && (INTVAL (index) & 3) == 0);
7145
7146   if (arm_address_register_rtx_p (index, strict_p)
7147       && (GET_MODE_SIZE (mode) <= 4))
7148     return 1;
7149
7150   if (mode == DImode || mode == DFmode)
7151     {
7152       if (code == CONST_INT)
7153         {
7154           HOST_WIDE_INT val = INTVAL (index);
7155
7156           if (TARGET_LDRD)
7157             return val > -256 && val < 256;
7158           else
7159             return val > -4096 && val < 4092;
7160         }
7161
7162       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7163     }
7164
7165   if (GET_MODE_SIZE (mode) <= 4
7166       && ! (arm_arch4
7167             && (mode == HImode
7168                 || mode == HFmode
7169                 || (mode == QImode && outer == SIGN_EXTEND))))
7170     {
7171       if (code == MULT)
7172         {
7173           rtx xiop0 = XEXP (index, 0);
7174           rtx xiop1 = XEXP (index, 1);
7175
7176           return ((arm_address_register_rtx_p (xiop0, strict_p)
7177                    && power_of_two_operand (xiop1, SImode))
7178                   || (arm_address_register_rtx_p (xiop1, strict_p)
7179                       && power_of_two_operand (xiop0, SImode)));
7180         }
7181       else if (code == LSHIFTRT || code == ASHIFTRT
7182                || code == ASHIFT || code == ROTATERT)
7183         {
7184           rtx op = XEXP (index, 1);
7185
7186           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7187                   && CONST_INT_P (op)
7188                   && INTVAL (op) > 0
7189                   && INTVAL (op) <= 31);
7190         }
7191     }
7192
7193   /* For ARM v4 we may be doing a sign-extend operation during the
7194      load.  */
7195   if (arm_arch4)
7196     {
7197       if (mode == HImode
7198           || mode == HFmode
7199           || (outer == SIGN_EXTEND && mode == QImode))
7200         range = 256;
7201       else
7202         range = 4096;
7203     }
7204   else
7205     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7206
7207   return (code == CONST_INT
7208           && INTVAL (index) < range
7209           && INTVAL (index) > -range);
7210 }
7211
7212 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7213    index operand.  i.e. 1, 2, 4 or 8.  */
7214 static bool
7215 thumb2_index_mul_operand (rtx op)
7216 {
7217   HOST_WIDE_INT val;
7218
7219   if (!CONST_INT_P (op))
7220     return false;
7221
7222   val = INTVAL(op);
7223   return (val == 1 || val == 2 || val == 4 || val == 8);
7224 }
7225
7226 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
7227 static int
7228 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7229 {
7230   enum rtx_code code = GET_CODE (index);
7231
7232   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
7233   /* Standard coprocessor addressing modes.  */
7234   if (TARGET_HARD_FLOAT
7235       && TARGET_VFP
7236       && (mode == SFmode || mode == DFmode))
7237     return (code == CONST_INT && INTVAL (index) < 1024
7238             /* Thumb-2 allows only > -256 index range for it's core register
7239                load/stores. Since we allow SF/DF in core registers, we have
7240                to use the intersection between -256~4096 (core) and -1024~1024
7241                (coprocessor).  */
7242             && INTVAL (index) > -256
7243             && (INTVAL (index) & 3) == 0);
7244
7245   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7246     {
7247       /* For DImode assume values will usually live in core regs
7248          and only allow LDRD addressing modes.  */
7249       if (!TARGET_LDRD || mode != DImode)
7250         return (code == CONST_INT
7251                 && INTVAL (index) < 1024
7252                 && INTVAL (index) > -1024
7253                 && (INTVAL (index) & 3) == 0);
7254     }
7255
7256   /* For quad modes, we restrict the constant offset to be slightly less
7257      than what the instruction format permits.  We do this because for
7258      quad mode moves, we will actually decompose them into two separate
7259      double-mode reads or writes.  INDEX must therefore be a valid
7260      (double-mode) offset and so should INDEX+8.  */
7261   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7262     return (code == CONST_INT
7263             && INTVAL (index) < 1016
7264             && INTVAL (index) > -1024
7265             && (INTVAL (index) & 3) == 0);
7266
7267   /* We have no such constraint on double mode offsets, so we permit the
7268      full range of the instruction format.  */
7269   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7270     return (code == CONST_INT
7271             && INTVAL (index) < 1024
7272             && INTVAL (index) > -1024
7273             && (INTVAL (index) & 3) == 0);
7274
7275   if (arm_address_register_rtx_p (index, strict_p)
7276       && (GET_MODE_SIZE (mode) <= 4))
7277     return 1;
7278
7279   if (mode == DImode || mode == DFmode)
7280     {
7281       if (code == CONST_INT)
7282         {
7283           HOST_WIDE_INT val = INTVAL (index);
7284           /* ??? Can we assume ldrd for thumb2?  */
7285           /* Thumb-2 ldrd only has reg+const addressing modes.  */
7286           /* ldrd supports offsets of +-1020.
7287              However the ldr fallback does not.  */
7288           return val > -256 && val < 256 && (val & 3) == 0;
7289         }
7290       else
7291         return 0;
7292     }
7293
7294   if (code == MULT)
7295     {
7296       rtx xiop0 = XEXP (index, 0);
7297       rtx xiop1 = XEXP (index, 1);
7298
7299       return ((arm_address_register_rtx_p (xiop0, strict_p)
7300                && thumb2_index_mul_operand (xiop1))
7301               || (arm_address_register_rtx_p (xiop1, strict_p)
7302                   && thumb2_index_mul_operand (xiop0)));
7303     }
7304   else if (code == ASHIFT)
7305     {
7306       rtx op = XEXP (index, 1);
7307
7308       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7309               && CONST_INT_P (op)
7310               && INTVAL (op) > 0
7311               && INTVAL (op) <= 3);
7312     }
7313
7314   return (code == CONST_INT
7315           && INTVAL (index) < 4096
7316           && INTVAL (index) > -256);
7317 }
7318
7319 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
7320 static int
7321 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7322 {
7323   int regno;
7324
7325   if (!REG_P (x))
7326     return 0;
7327
7328   regno = REGNO (x);
7329
7330   if (strict_p)
7331     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7332
7333   return (regno <= LAST_LO_REGNUM
7334           || regno > LAST_VIRTUAL_REGISTER
7335           || regno == FRAME_POINTER_REGNUM
7336           || (GET_MODE_SIZE (mode) >= 4
7337               && (regno == STACK_POINTER_REGNUM
7338                   || regno >= FIRST_PSEUDO_REGISTER
7339                   || x == hard_frame_pointer_rtx
7340                   || x == arg_pointer_rtx)));
7341 }
7342
7343 /* Return nonzero if x is a legitimate index register.  This is the case
7344    for any base register that can access a QImode object.  */
7345 inline static int
7346 thumb1_index_register_rtx_p (rtx x, int strict_p)
7347 {
7348   return thumb1_base_register_rtx_p (x, QImode, strict_p);
7349 }
7350
7351 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7352
7353    The AP may be eliminated to either the SP or the FP, so we use the
7354    least common denominator, e.g. SImode, and offsets from 0 to 64.
7355
7356    ??? Verify whether the above is the right approach.
7357
7358    ??? Also, the FP may be eliminated to the SP, so perhaps that
7359    needs special handling also.
7360
7361    ??? Look at how the mips16 port solves this problem.  It probably uses
7362    better ways to solve some of these problems.
7363
7364    Although it is not incorrect, we don't accept QImode and HImode
7365    addresses based on the frame pointer or arg pointer until the
7366    reload pass starts.  This is so that eliminating such addresses
7367    into stack based ones won't produce impossible code.  */
7368 int
7369 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7370 {
7371   /* ??? Not clear if this is right.  Experiment.  */
7372   if (GET_MODE_SIZE (mode) < 4
7373       && !(reload_in_progress || reload_completed)
7374       && (reg_mentioned_p (frame_pointer_rtx, x)
7375           || reg_mentioned_p (arg_pointer_rtx, x)
7376           || reg_mentioned_p (virtual_incoming_args_rtx, x)
7377           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7378           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7379           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7380     return 0;
7381
7382   /* Accept any base register.  SP only in SImode or larger.  */
7383   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7384     return 1;
7385
7386   /* This is PC relative data before arm_reorg runs.  */
7387   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7388            && GET_CODE (x) == SYMBOL_REF
7389            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7390     return 1;
7391
7392   /* This is PC relative data after arm_reorg runs.  */
7393   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7394            && reload_completed
7395            && (GET_CODE (x) == LABEL_REF
7396                || (GET_CODE (x) == CONST
7397                    && GET_CODE (XEXP (x, 0)) == PLUS
7398                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7399                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7400     return 1;
7401
7402   /* Post-inc indexing only supported for SImode and larger.  */
7403   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7404            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7405     return 1;
7406
7407   else if (GET_CODE (x) == PLUS)
7408     {
7409       /* REG+REG address can be any two index registers.  */
7410       /* We disallow FRAME+REG addressing since we know that FRAME
7411          will be replaced with STACK, and SP relative addressing only
7412          permits SP+OFFSET.  */
7413       if (GET_MODE_SIZE (mode) <= 4
7414           && XEXP (x, 0) != frame_pointer_rtx
7415           && XEXP (x, 1) != frame_pointer_rtx
7416           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7417           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7418               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7419         return 1;
7420
7421       /* REG+const has 5-7 bit offset for non-SP registers.  */
7422       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7423                 || XEXP (x, 0) == arg_pointer_rtx)
7424                && CONST_INT_P (XEXP (x, 1))
7425                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7426         return 1;
7427
7428       /* REG+const has 10-bit offset for SP, but only SImode and
7429          larger is supported.  */
7430       /* ??? Should probably check for DI/DFmode overflow here
7431          just like GO_IF_LEGITIMATE_OFFSET does.  */
7432       else if (REG_P (XEXP (x, 0))
7433                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7434                && GET_MODE_SIZE (mode) >= 4
7435                && CONST_INT_P (XEXP (x, 1))
7436                && INTVAL (XEXP (x, 1)) >= 0
7437                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7438                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7439         return 1;
7440
7441       else if (REG_P (XEXP (x, 0))
7442                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7443                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7444                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7445                        && REGNO (XEXP (x, 0))
7446                           <= LAST_VIRTUAL_POINTER_REGISTER))
7447                && GET_MODE_SIZE (mode) >= 4
7448                && CONST_INT_P (XEXP (x, 1))
7449                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7450         return 1;
7451     }
7452
7453   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7454            && GET_MODE_SIZE (mode) == 4
7455            && GET_CODE (x) == SYMBOL_REF
7456            && CONSTANT_POOL_ADDRESS_P (x)
7457            && ! (flag_pic
7458                  && symbol_mentioned_p (get_pool_constant (x))
7459                  && ! pcrel_constant_p (get_pool_constant (x))))
7460     return 1;
7461
7462   return 0;
7463 }
7464
7465 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7466    instruction of mode MODE.  */
7467 int
7468 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7469 {
7470   switch (GET_MODE_SIZE (mode))
7471     {
7472     case 1:
7473       return val >= 0 && val < 32;
7474
7475     case 2:
7476       return val >= 0 && val < 64 && (val & 1) == 0;
7477
7478     default:
7479       return (val >= 0
7480               && (val + GET_MODE_SIZE (mode)) <= 128
7481               && (val & 3) == 0);
7482     }
7483 }
7484
7485 bool
7486 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7487 {
7488   if (TARGET_ARM)
7489     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7490   else if (TARGET_THUMB2)
7491     return thumb2_legitimate_address_p (mode, x, strict_p);
7492   else /* if (TARGET_THUMB1) */
7493     return thumb1_legitimate_address_p (mode, x, strict_p);
7494 }
7495
7496 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7497
7498    Given an rtx X being reloaded into a reg required to be
7499    in class CLASS, return the class of reg to actually use.
7500    In general this is just CLASS, but for the Thumb core registers and
7501    immediate constants we prefer a LO_REGS class or a subset.  */
7502
7503 static reg_class_t
7504 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7505 {
7506   if (TARGET_32BIT)
7507     return rclass;
7508   else
7509     {
7510       if (rclass == GENERAL_REGS)
7511         return LO_REGS;
7512       else
7513         return rclass;
7514     }
7515 }
7516
7517 /* Build the SYMBOL_REF for __tls_get_addr.  */
7518
7519 static GTY(()) rtx tls_get_addr_libfunc;
7520
7521 static rtx
7522 get_tls_get_addr (void)
7523 {
7524   if (!tls_get_addr_libfunc)
7525     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7526   return tls_get_addr_libfunc;
7527 }
7528
7529 rtx
7530 arm_load_tp (rtx target)
7531 {
7532   if (!target)
7533     target = gen_reg_rtx (SImode);
7534
7535   if (TARGET_HARD_TP)
7536     {
7537       /* Can return in any reg.  */
7538       emit_insn (gen_load_tp_hard (target));
7539     }
7540   else
7541     {
7542       /* Always returned in r0.  Immediately copy the result into a pseudo,
7543          otherwise other uses of r0 (e.g. setting up function arguments) may
7544          clobber the value.  */
7545
7546       rtx tmp;
7547
7548       emit_insn (gen_load_tp_soft ());
7549
7550       tmp = gen_rtx_REG (SImode, R0_REGNUM);
7551       emit_move_insn (target, tmp);
7552     }
7553   return target;
7554 }
7555
7556 static rtx
7557 load_tls_operand (rtx x, rtx reg)
7558 {
7559   rtx tmp;
7560
7561   if (reg == NULL_RTX)
7562     reg = gen_reg_rtx (SImode);
7563
7564   tmp = gen_rtx_CONST (SImode, x);
7565
7566   emit_move_insn (reg, tmp);
7567
7568   return reg;
7569 }
7570
7571 static rtx
7572 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7573 {
7574   rtx insns, label, labelno, sum;
7575
7576   gcc_assert (reloc != TLS_DESCSEQ);
7577   start_sequence ();
7578
7579   labelno = GEN_INT (pic_labelno++);
7580   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7581   label = gen_rtx_CONST (VOIDmode, label);
7582
7583   sum = gen_rtx_UNSPEC (Pmode,
7584                         gen_rtvec (4, x, GEN_INT (reloc), label,
7585                                    GEN_INT (TARGET_ARM ? 8 : 4)),
7586                         UNSPEC_TLS);
7587   reg = load_tls_operand (sum, reg);
7588
7589   if (TARGET_ARM)
7590     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7591   else
7592     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7593
7594   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7595                                      LCT_PURE, /* LCT_CONST?  */
7596                                      Pmode, 1, reg, Pmode);
7597
7598   insns = get_insns ();
7599   end_sequence ();
7600
7601   return insns;
7602 }
7603
7604 static rtx
7605 arm_tls_descseq_addr (rtx x, rtx reg)
7606 {
7607   rtx labelno = GEN_INT (pic_labelno++);
7608   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7609   rtx sum = gen_rtx_UNSPEC (Pmode,
7610                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7611                                        gen_rtx_CONST (VOIDmode, label),
7612                                        GEN_INT (!TARGET_ARM)),
7613                             UNSPEC_TLS);
7614   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7615
7616   emit_insn (gen_tlscall (x, labelno));
7617   if (!reg)
7618     reg = gen_reg_rtx (SImode);
7619   else
7620     gcc_assert (REGNO (reg) != R0_REGNUM);
7621
7622   emit_move_insn (reg, reg0);
7623
7624   return reg;
7625 }
7626
7627 rtx
7628 legitimize_tls_address (rtx x, rtx reg)
7629 {
7630   rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7631   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7632
7633   switch (model)
7634     {
7635     case TLS_MODEL_GLOBAL_DYNAMIC:
7636       if (TARGET_GNU2_TLS)
7637         {
7638           reg = arm_tls_descseq_addr (x, reg);
7639
7640           tp = arm_load_tp (NULL_RTX);
7641
7642           dest = gen_rtx_PLUS (Pmode, tp, reg);
7643         }
7644       else
7645         {
7646           /* Original scheme */
7647           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7648           dest = gen_reg_rtx (Pmode);
7649           emit_libcall_block (insns, dest, ret, x);
7650         }
7651       return dest;
7652
7653     case TLS_MODEL_LOCAL_DYNAMIC:
7654       if (TARGET_GNU2_TLS)
7655         {
7656           reg = arm_tls_descseq_addr (x, reg);
7657
7658           tp = arm_load_tp (NULL_RTX);
7659
7660           dest = gen_rtx_PLUS (Pmode, tp, reg);
7661         }
7662       else
7663         {
7664           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7665
7666           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7667              share the LDM result with other LD model accesses.  */
7668           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7669                                 UNSPEC_TLS);
7670           dest = gen_reg_rtx (Pmode);
7671           emit_libcall_block (insns, dest, ret, eqv);
7672
7673           /* Load the addend.  */
7674           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7675                                                      GEN_INT (TLS_LDO32)),
7676                                    UNSPEC_TLS);
7677           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7678           dest = gen_rtx_PLUS (Pmode, dest, addend);
7679         }
7680       return dest;
7681
7682     case TLS_MODEL_INITIAL_EXEC:
7683       labelno = GEN_INT (pic_labelno++);
7684       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7685       label = gen_rtx_CONST (VOIDmode, label);
7686       sum = gen_rtx_UNSPEC (Pmode,
7687                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7688                                        GEN_INT (TARGET_ARM ? 8 : 4)),
7689                             UNSPEC_TLS);
7690       reg = load_tls_operand (sum, reg);
7691
7692       if (TARGET_ARM)
7693         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7694       else if (TARGET_THUMB2)
7695         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7696       else
7697         {
7698           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7699           emit_move_insn (reg, gen_const_mem (SImode, reg));
7700         }
7701
7702       tp = arm_load_tp (NULL_RTX);
7703
7704       return gen_rtx_PLUS (Pmode, tp, reg);
7705
7706     case TLS_MODEL_LOCAL_EXEC:
7707       tp = arm_load_tp (NULL_RTX);
7708
7709       reg = gen_rtx_UNSPEC (Pmode,
7710                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7711                             UNSPEC_TLS);
7712       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7713
7714       return gen_rtx_PLUS (Pmode, tp, reg);
7715
7716     default:
7717       abort ();
7718     }
7719 }
7720
7721 /* Try machine-dependent ways of modifying an illegitimate address
7722    to be legitimate.  If we find one, return the new, valid address.  */
7723 rtx
7724 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7725 {
7726   if (arm_tls_referenced_p (x))
7727     {
7728       rtx addend = NULL;
7729
7730       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7731         {
7732           addend = XEXP (XEXP (x, 0), 1);
7733           x = XEXP (XEXP (x, 0), 0);
7734         }
7735
7736       if (GET_CODE (x) != SYMBOL_REF)
7737         return x;
7738
7739       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7740
7741       x = legitimize_tls_address (x, NULL_RTX);
7742
7743       if (addend)
7744         {
7745           x = gen_rtx_PLUS (SImode, x, addend);
7746           orig_x = x;
7747         }
7748       else
7749         return x;
7750     }
7751
7752   if (!TARGET_ARM)
7753     {
7754       /* TODO: legitimize_address for Thumb2.  */
7755       if (TARGET_THUMB2)
7756         return x;
7757       return thumb_legitimize_address (x, orig_x, mode);
7758     }
7759
7760   if (GET_CODE (x) == PLUS)
7761     {
7762       rtx xop0 = XEXP (x, 0);
7763       rtx xop1 = XEXP (x, 1);
7764
7765       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7766         xop0 = force_reg (SImode, xop0);
7767
7768       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7769           && !symbol_mentioned_p (xop1))
7770         xop1 = force_reg (SImode, xop1);
7771
7772       if (ARM_BASE_REGISTER_RTX_P (xop0)
7773           && CONST_INT_P (xop1))
7774         {
7775           HOST_WIDE_INT n, low_n;
7776           rtx base_reg, val;
7777           n = INTVAL (xop1);
7778
7779           /* VFP addressing modes actually allow greater offsets, but for
7780              now we just stick with the lowest common denominator.  */
7781           if (mode == DImode
7782               || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7783             {
7784               low_n = n & 0x0f;
7785               n &= ~0x0f;
7786               if (low_n > 4)
7787                 {
7788                   n += 16;
7789                   low_n -= 16;
7790                 }
7791             }
7792           else
7793             {
7794               low_n = ((mode) == TImode ? 0
7795                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7796               n -= low_n;
7797             }
7798
7799           base_reg = gen_reg_rtx (SImode);
7800           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7801           emit_move_insn (base_reg, val);
7802           x = plus_constant (Pmode, base_reg, low_n);
7803         }
7804       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7805         x = gen_rtx_PLUS (SImode, xop0, xop1);
7806     }
7807
7808   /* XXX We don't allow MINUS any more -- see comment in
7809      arm_legitimate_address_outer_p ().  */
7810   else if (GET_CODE (x) == MINUS)
7811     {
7812       rtx xop0 = XEXP (x, 0);
7813       rtx xop1 = XEXP (x, 1);
7814
7815       if (CONSTANT_P (xop0))
7816         xop0 = force_reg (SImode, xop0);
7817
7818       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7819         xop1 = force_reg (SImode, xop1);
7820
7821       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7822         x = gen_rtx_MINUS (SImode, xop0, xop1);
7823     }
7824
7825   /* Make sure to take full advantage of the pre-indexed addressing mode
7826      with absolute addresses which often allows for the base register to
7827      be factorized for multiple adjacent memory references, and it might
7828      even allows for the mini pool to be avoided entirely. */
7829   else if (CONST_INT_P (x) && optimize > 0)
7830     {
7831       unsigned int bits;
7832       HOST_WIDE_INT mask, base, index;
7833       rtx base_reg;
7834
7835       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7836          use a 8-bit index. So let's use a 12-bit index for SImode only and
7837          hope that arm_gen_constant will enable ldrb to use more bits. */
7838       bits = (mode == SImode) ? 12 : 8;
7839       mask = (1 << bits) - 1;
7840       base = INTVAL (x) & ~mask;
7841       index = INTVAL (x) & mask;
7842       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7843         {
7844           /* It'll most probably be more efficient to generate the base
7845              with more bits set and use a negative index instead. */
7846           base |= mask;
7847           index -= mask;
7848         }
7849       base_reg = force_reg (SImode, GEN_INT (base));
7850       x = plus_constant (Pmode, base_reg, index);
7851     }
7852
7853   if (flag_pic)
7854     {
7855       /* We need to find and carefully transform any SYMBOL and LABEL
7856          references; so go back to the original address expression.  */
7857       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7858
7859       if (new_x != orig_x)
7860         x = new_x;
7861     }
7862
7863   return x;
7864 }
7865
7866
7867 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7868    to be legitimate.  If we find one, return the new, valid address.  */
7869 rtx
7870 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7871 {
7872   if (GET_CODE (x) == PLUS
7873       && CONST_INT_P (XEXP (x, 1))
7874       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7875           || INTVAL (XEXP (x, 1)) < 0))
7876     {
7877       rtx xop0 = XEXP (x, 0);
7878       rtx xop1 = XEXP (x, 1);
7879       HOST_WIDE_INT offset = INTVAL (xop1);
7880
7881       /* Try and fold the offset into a biasing of the base register and
7882          then offsetting that.  Don't do this when optimizing for space
7883          since it can cause too many CSEs.  */
7884       if (optimize_size && offset >= 0
7885           && offset < 256 + 31 * GET_MODE_SIZE (mode))
7886         {
7887           HOST_WIDE_INT delta;
7888
7889           if (offset >= 256)
7890             delta = offset - (256 - GET_MODE_SIZE (mode));
7891           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7892             delta = 31 * GET_MODE_SIZE (mode);
7893           else
7894             delta = offset & (~31 * GET_MODE_SIZE (mode));
7895
7896           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7897                                 NULL_RTX);
7898           x = plus_constant (Pmode, xop0, delta);
7899         }
7900       else if (offset < 0 && offset > -256)
7901         /* Small negative offsets are best done with a subtract before the
7902            dereference, forcing these into a register normally takes two
7903            instructions.  */
7904         x = force_operand (x, NULL_RTX);
7905       else
7906         {
7907           /* For the remaining cases, force the constant into a register.  */
7908           xop1 = force_reg (SImode, xop1);
7909           x = gen_rtx_PLUS (SImode, xop0, xop1);
7910         }
7911     }
7912   else if (GET_CODE (x) == PLUS
7913            && s_register_operand (XEXP (x, 1), SImode)
7914            && !s_register_operand (XEXP (x, 0), SImode))
7915     {
7916       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7917
7918       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7919     }
7920
7921   if (flag_pic)
7922     {
7923       /* We need to find and carefully transform any SYMBOL and LABEL
7924          references; so go back to the original address expression.  */
7925       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7926
7927       if (new_x != orig_x)
7928         x = new_x;
7929     }
7930
7931   return x;
7932 }
7933
7934 bool
7935 arm_legitimize_reload_address (rtx *p,
7936                                machine_mode mode,
7937                                int opnum, int type,
7938                                int ind_levels ATTRIBUTE_UNUSED)
7939 {
7940   /* We must recognize output that we have already generated ourselves.  */
7941   if (GET_CODE (*p) == PLUS
7942       && GET_CODE (XEXP (*p, 0)) == PLUS
7943       && REG_P (XEXP (XEXP (*p, 0), 0))
7944       && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7945       && CONST_INT_P (XEXP (*p, 1)))
7946     {
7947       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7948                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7949                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
7950       return true;
7951     }
7952
7953   if (GET_CODE (*p) == PLUS
7954       && REG_P (XEXP (*p, 0))
7955       && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7956       /* If the base register is equivalent to a constant, let the generic
7957          code handle it.  Otherwise we will run into problems if a future
7958          reload pass decides to rematerialize the constant.  */
7959       && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7960       && CONST_INT_P (XEXP (*p, 1)))
7961     {
7962       HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7963       HOST_WIDE_INT low, high;
7964
7965       /* Detect coprocessor load/stores.  */
7966       bool coproc_p = ((TARGET_HARD_FLOAT
7967                         && TARGET_VFP
7968                         && (mode == SFmode || mode == DFmode))
7969                        || (TARGET_REALLY_IWMMXT
7970                            && VALID_IWMMXT_REG_MODE (mode))
7971                        || (TARGET_NEON
7972                            && (VALID_NEON_DREG_MODE (mode)
7973                                || VALID_NEON_QREG_MODE (mode))));
7974
7975       /* For some conditions, bail out when lower two bits are unaligned.  */
7976       if ((val & 0x3) != 0
7977           /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
7978           && (coproc_p
7979               /* For DI, and DF under soft-float: */
7980               || ((mode == DImode || mode == DFmode)
7981                   /* Without ldrd, we use stm/ldm, which does not
7982                      fair well with unaligned bits.  */
7983                   && (! TARGET_LDRD
7984                       /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
7985                       || TARGET_THUMB2))))
7986         return false;
7987
7988       /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7989          of which the (reg+high) gets turned into a reload add insn,
7990          we try to decompose the index into high/low values that can often
7991          also lead to better reload CSE.
7992          For example:
7993                  ldr r0, [r2, #4100]  // Offset too large
7994                  ldr r1, [r2, #4104]  // Offset too large
7995
7996          is best reloaded as:
7997                  add t1, r2, #4096
7998                  ldr r0, [t1, #4]
7999                  add t2, r2, #4096
8000                  ldr r1, [t2, #8]
8001
8002          which post-reload CSE can simplify in most cases to eliminate the
8003          second add instruction:
8004                  add t1, r2, #4096
8005                  ldr r0, [t1, #4]
8006                  ldr r1, [t1, #8]
8007
8008          The idea here is that we want to split out the bits of the constant
8009          as a mask, rather than as subtracting the maximum offset that the
8010          respective type of load/store used can handle.
8011
8012          When encountering negative offsets, we can still utilize it even if
8013          the overall offset is positive; sometimes this may lead to an immediate
8014          that can be constructed with fewer instructions.
8015          For example:
8016                  ldr r0, [r2, #0x3FFFFC]
8017
8018          This is best reloaded as:
8019                  add t1, r2, #0x400000
8020                  ldr r0, [t1, #-4]
8021
8022          The trick for spotting this for a load insn with N bits of offset
8023          (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
8024          negative offset that is going to make bit N and all the bits below
8025          it become zero in the remainder part.
8026
8027          The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
8028          to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
8029          used in most cases of ARM load/store instructions.  */
8030
8031 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N)                                  \
8032       (((VAL) & ((1 << (N)) - 1))                                       \
8033        ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))   \
8034        : 0)
8035
8036       if (coproc_p)
8037         {
8038           low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
8039
8040           /* NEON quad-word load/stores are made of two double-word accesses,
8041              so the valid index range is reduced by 8. Treat as 9-bit range if
8042              we go over it.  */
8043           if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
8044             low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
8045         }
8046       else if (GET_MODE_SIZE (mode) == 8)
8047         {
8048           if (TARGET_LDRD)
8049             low = (TARGET_THUMB2
8050                    ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
8051                    : SIGN_MAG_LOW_ADDR_BITS (val, 8));
8052           else
8053             /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
8054                to access doublewords. The supported load/store offsets are
8055                -8, -4, and 4, which we try to produce here.  */
8056             low = ((val & 0xf) ^ 0x8) - 0x8;
8057         }
8058       else if (GET_MODE_SIZE (mode) < 8)
8059         {
8060           /* NEON element load/stores do not have an offset.  */
8061           if (TARGET_NEON_FP16 && mode == HFmode)
8062             return false;
8063
8064           if (TARGET_THUMB2)
8065             {
8066               /* Thumb-2 has an asymmetrical index range of (-256,4096).
8067                  Try the wider 12-bit range first, and re-try if the result
8068                  is out of range.  */
8069               low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8070               if (low < -255)
8071                 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8072             }
8073           else
8074             {
8075               if (mode == HImode || mode == HFmode)
8076                 {
8077                   if (arm_arch4)
8078                     low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8079                   else
8080                     {
8081                       /* The storehi/movhi_bytes fallbacks can use only
8082                          [-4094,+4094] of the full ldrb/strb index range.  */
8083                       low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8084                       if (low == 4095 || low == -4095)
8085                         return false;
8086                     }
8087                 }
8088               else
8089                 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8090             }
8091         }
8092       else
8093         return false;
8094
8095       high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
8096                ^ (unsigned HOST_WIDE_INT) 0x80000000)
8097               - (unsigned HOST_WIDE_INT) 0x80000000);
8098       /* Check for overflow or zero */
8099       if (low == 0 || high == 0 || (high + low != val))
8100         return false;
8101
8102       /* Reload the high part into a base reg; leave the low part
8103          in the mem.
8104          Note that replacing this gen_rtx_PLUS with plus_constant is
8105          wrong in this case because we rely on the
8106          (plus (plus reg c1) c2) structure being preserved so that
8107          XEXP (*p, 0) in push_reload below uses the correct term.  */
8108       *p = gen_rtx_PLUS (GET_MODE (*p),
8109                          gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8110                                        GEN_INT (high)),
8111                          GEN_INT (low));
8112       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8113                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8114                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
8115       return true;
8116     }
8117
8118   return false;
8119 }
8120
8121 rtx
8122 thumb_legitimize_reload_address (rtx *x_p,
8123                                  machine_mode mode,
8124                                  int opnum, int type,
8125                                  int ind_levels ATTRIBUTE_UNUSED)
8126 {
8127   rtx x = *x_p;
8128
8129   if (GET_CODE (x) == PLUS
8130       && GET_MODE_SIZE (mode) < 4
8131       && REG_P (XEXP (x, 0))
8132       && XEXP (x, 0) == stack_pointer_rtx
8133       && CONST_INT_P (XEXP (x, 1))
8134       && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8135     {
8136       rtx orig_x = x;
8137
8138       x = copy_rtx (x);
8139       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8140                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8141       return x;
8142     }
8143
8144   /* If both registers are hi-regs, then it's better to reload the
8145      entire expression rather than each register individually.  That
8146      only requires one reload register rather than two.  */
8147   if (GET_CODE (x) == PLUS
8148       && REG_P (XEXP (x, 0))
8149       && REG_P (XEXP (x, 1))
8150       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8151       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8152     {
8153       rtx orig_x = x;
8154
8155       x = copy_rtx (x);
8156       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8157                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8158       return x;
8159     }
8160
8161   return NULL;
8162 }
8163
8164 /* Return TRUE if X contains any TLS symbol references.  */
8165
8166 bool
8167 arm_tls_referenced_p (rtx x)
8168 {
8169   if (! TARGET_HAVE_TLS)
8170     return false;
8171
8172   subrtx_iterator::array_type array;
8173   FOR_EACH_SUBRTX (iter, array, x, ALL)
8174     {
8175       const_rtx x = *iter;
8176       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8177         return true;
8178
8179       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8180          TLS offsets, not real symbol references.  */
8181       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8182         iter.skip_subrtxes ();
8183     }
8184   return false;
8185 }
8186
8187 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8188
8189    On the ARM, allow any integer (invalid ones are removed later by insn
8190    patterns), nice doubles and symbol_refs which refer to the function's
8191    constant pool XXX.
8192
8193    When generating pic allow anything.  */
8194
8195 static bool
8196 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8197 {
8198   return flag_pic || !label_mentioned_p (x);
8199 }
8200
8201 static bool
8202 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8203 {
8204   return (CONST_INT_P (x)
8205           || CONST_DOUBLE_P (x)
8206           || CONSTANT_ADDRESS_P (x)
8207           || flag_pic);
8208 }
8209
8210 static bool
8211 arm_legitimate_constant_p (machine_mode mode, rtx x)
8212 {
8213   return (!arm_cannot_force_const_mem (mode, x)
8214           && (TARGET_32BIT
8215               ? arm_legitimate_constant_p_1 (mode, x)
8216               : thumb_legitimate_constant_p (mode, x)));
8217 }
8218
8219 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8220
8221 static bool
8222 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8223 {
8224   rtx base, offset;
8225
8226   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8227     {
8228       split_const (x, &base, &offset);
8229       if (GET_CODE (base) == SYMBOL_REF
8230           && !offset_within_block_p (base, INTVAL (offset)))
8231         return true;
8232     }
8233   return arm_tls_referenced_p (x);
8234 }
8235 \f
8236 #define REG_OR_SUBREG_REG(X)                                            \
8237   (REG_P (X)                                                    \
8238    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8239
8240 #define REG_OR_SUBREG_RTX(X)                    \
8241    (REG_P (X) ? (X) : SUBREG_REG (X))
8242
8243 static inline int
8244 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8245 {
8246   machine_mode mode = GET_MODE (x);
8247   int total, words;
8248
8249   switch (code)
8250     {
8251     case ASHIFT:
8252     case ASHIFTRT:
8253     case LSHIFTRT:
8254     case ROTATERT:
8255       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8256
8257     case PLUS:
8258     case MINUS:
8259     case COMPARE:
8260     case NEG:
8261     case NOT:
8262       return COSTS_N_INSNS (1);
8263
8264     case MULT:
8265       if (CONST_INT_P (XEXP (x, 1)))
8266         {
8267           int cycles = 0;
8268           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8269
8270           while (i)
8271             {
8272               i >>= 2;
8273               cycles++;
8274             }
8275           return COSTS_N_INSNS (2) + cycles;
8276         }
8277       return COSTS_N_INSNS (1) + 16;
8278
8279     case SET:
8280       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8281          the mode.  */
8282       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8283       return (COSTS_N_INSNS (words)
8284               + 4 * ((MEM_P (SET_SRC (x)))
8285                      + MEM_P (SET_DEST (x))));
8286
8287     case CONST_INT:
8288       if (outer == SET)
8289         {
8290           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8291             return 0;
8292           if (thumb_shiftable_const (INTVAL (x)))
8293             return COSTS_N_INSNS (2);
8294           return COSTS_N_INSNS (3);
8295         }
8296       else if ((outer == PLUS || outer == COMPARE)
8297                && INTVAL (x) < 256 && INTVAL (x) > -256)
8298         return 0;
8299       else if ((outer == IOR || outer == XOR || outer == AND)
8300                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8301         return COSTS_N_INSNS (1);
8302       else if (outer == AND)
8303         {
8304           int i;
8305           /* This duplicates the tests in the andsi3 expander.  */
8306           for (i = 9; i <= 31; i++)
8307             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8308                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8309               return COSTS_N_INSNS (2);
8310         }
8311       else if (outer == ASHIFT || outer == ASHIFTRT
8312                || outer == LSHIFTRT)
8313         return 0;
8314       return COSTS_N_INSNS (2);
8315
8316     case CONST:
8317     case CONST_DOUBLE:
8318     case LABEL_REF:
8319     case SYMBOL_REF:
8320       return COSTS_N_INSNS (3);
8321
8322     case UDIV:
8323     case UMOD:
8324     case DIV:
8325     case MOD:
8326       return 100;
8327
8328     case TRUNCATE:
8329       return 99;
8330
8331     case AND:
8332     case XOR:
8333     case IOR:
8334       /* XXX guess.  */
8335       return 8;
8336
8337     case MEM:
8338       /* XXX another guess.  */
8339       /* Memory costs quite a lot for the first word, but subsequent words
8340          load at the equivalent of a single insn each.  */
8341       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8342               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8343                  ? 4 : 0));
8344
8345     case IF_THEN_ELSE:
8346       /* XXX a guess.  */
8347       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8348         return 14;
8349       return 2;
8350
8351     case SIGN_EXTEND:
8352     case ZERO_EXTEND:
8353       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8354       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8355
8356       if (mode == SImode)
8357         return total;
8358
8359       if (arm_arch6)
8360         return total + COSTS_N_INSNS (1);
8361
8362       /* Assume a two-shift sequence.  Increase the cost slightly so
8363          we prefer actual shifts over an extend operation.  */
8364       return total + 1 + COSTS_N_INSNS (2);
8365
8366     default:
8367       return 99;
8368     }
8369 }
8370
8371 static inline bool
8372 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8373 {
8374   machine_mode mode = GET_MODE (x);
8375   enum rtx_code subcode;
8376   rtx operand;
8377   enum rtx_code code = GET_CODE (x);
8378   *total = 0;
8379
8380   switch (code)
8381     {
8382     case MEM:
8383       /* Memory costs quite a lot for the first word, but subsequent words
8384          load at the equivalent of a single insn each.  */
8385       *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8386       return true;
8387
8388     case DIV:
8389     case MOD:
8390     case UDIV:
8391     case UMOD:
8392       if (TARGET_HARD_FLOAT && mode == SFmode)
8393         *total = COSTS_N_INSNS (2);
8394       else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8395         *total = COSTS_N_INSNS (4);
8396       else
8397         *total = COSTS_N_INSNS (20);
8398       return false;
8399
8400     case ROTATE:
8401       if (REG_P (XEXP (x, 1)))
8402         *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8403       else if (!CONST_INT_P (XEXP (x, 1)))
8404         *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8405
8406       /* Fall through */
8407     case ROTATERT:
8408       if (mode != SImode)
8409         {
8410           *total += COSTS_N_INSNS (4);
8411           return true;
8412         }
8413
8414       /* Fall through */
8415     case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8416       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8417       if (mode == DImode)
8418         {
8419           *total += COSTS_N_INSNS (3);
8420           return true;
8421         }
8422
8423       *total += COSTS_N_INSNS (1);
8424       /* Increase the cost of complex shifts because they aren't any faster,
8425          and reduce dual issue opportunities.  */
8426       if (arm_tune_cortex_a9
8427           && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8428         ++*total;
8429
8430       return true;
8431
8432     case MINUS:
8433       if (mode == DImode)
8434         {
8435           *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8436           if (CONST_INT_P (XEXP (x, 0))
8437               && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8438             {
8439               *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8440               return true;
8441             }
8442
8443           if (CONST_INT_P (XEXP (x, 1))
8444               && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8445             {
8446               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8447               return true;
8448             }
8449
8450           return false;
8451         }
8452
8453       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8454         {
8455           if (TARGET_HARD_FLOAT
8456               && (mode == SFmode
8457                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8458             {
8459               *total = COSTS_N_INSNS (1);
8460               if (CONST_DOUBLE_P (XEXP (x, 0))
8461                   && arm_const_double_rtx (XEXP (x, 0)))
8462                 {
8463                   *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8464                   return true;
8465                 }
8466
8467               if (CONST_DOUBLE_P (XEXP (x, 1))
8468                   && arm_const_double_rtx (XEXP (x, 1)))
8469                 {
8470                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8471                   return true;
8472                 }
8473
8474               return false;
8475             }
8476           *total = COSTS_N_INSNS (20);
8477           return false;
8478         }
8479
8480       *total = COSTS_N_INSNS (1);
8481       if (CONST_INT_P (XEXP (x, 0))
8482           && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8483         {
8484           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8485           return true;
8486         }
8487
8488       subcode = GET_CODE (XEXP (x, 1));
8489       if (subcode == ASHIFT || subcode == ASHIFTRT
8490           || subcode == LSHIFTRT
8491           || subcode == ROTATE || subcode == ROTATERT)
8492         {
8493           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8494           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8495           return true;
8496         }
8497
8498       /* A shift as a part of RSB costs no more than RSB itself.  */
8499       if (GET_CODE (XEXP (x, 0)) == MULT
8500           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8501         {
8502           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8503           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8504           return true;
8505         }
8506
8507       if (subcode == MULT
8508           && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8509         {
8510           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8511           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8512           return true;
8513         }
8514
8515       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8516           || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8517         {
8518           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8519           if (REG_P (XEXP (XEXP (x, 1), 0))
8520               && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8521             *total += COSTS_N_INSNS (1);
8522
8523           return true;
8524         }
8525
8526       /* Fall through */
8527
8528     case PLUS:
8529       if (code == PLUS && arm_arch6 && mode == SImode
8530           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8531               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8532         {
8533           *total = COSTS_N_INSNS (1);
8534           *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8535                               0, speed);
8536           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8537           return true;
8538         }
8539
8540       /* MLA: All arguments must be registers.  We filter out
8541          multiplication by a power of two, so that we fall down into
8542          the code below.  */
8543       if (GET_CODE (XEXP (x, 0)) == MULT
8544           && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8545         {
8546           /* The cost comes from the cost of the multiply.  */
8547           return false;
8548         }
8549
8550       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8551         {
8552           if (TARGET_HARD_FLOAT
8553               && (mode == SFmode
8554                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8555             {
8556               *total = COSTS_N_INSNS (1);
8557               if (CONST_DOUBLE_P (XEXP (x, 1))
8558                   && arm_const_double_rtx (XEXP (x, 1)))
8559                 {
8560                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8561                   return true;
8562                 }
8563
8564               return false;
8565             }
8566
8567           *total = COSTS_N_INSNS (20);
8568           return false;
8569         }
8570
8571       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8572           || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8573         {
8574           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8575           if (REG_P (XEXP (XEXP (x, 0), 0))
8576               && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8577             *total += COSTS_N_INSNS (1);
8578           return true;
8579         }
8580
8581       /* Fall through */
8582
8583     case AND: case XOR: case IOR:
8584
8585       /* Normally the frame registers will be spilt into reg+const during
8586          reload, so it is a bad idea to combine them with other instructions,
8587          since then they might not be moved outside of loops.  As a compromise
8588          we allow integration with ops that have a constant as their second
8589          operand.  */
8590       if (REG_OR_SUBREG_REG (XEXP (x, 0))
8591           && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8592           && !CONST_INT_P (XEXP (x, 1)))
8593         *total = COSTS_N_INSNS (1);
8594
8595       if (mode == DImode)
8596         {
8597           *total += COSTS_N_INSNS (2);
8598           if (CONST_INT_P (XEXP (x, 1))
8599               && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8600             {
8601               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8602               return true;
8603             }
8604
8605           return false;
8606         }
8607
8608       *total += COSTS_N_INSNS (1);
8609       if (CONST_INT_P (XEXP (x, 1))
8610           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8611         {
8612           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8613           return true;
8614         }
8615       subcode = GET_CODE (XEXP (x, 0));
8616       if (subcode == ASHIFT || subcode == ASHIFTRT
8617           || subcode == LSHIFTRT
8618           || subcode == ROTATE || subcode == ROTATERT)
8619         {
8620           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8621           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8622           return true;
8623         }
8624
8625       if (subcode == MULT
8626           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8627         {
8628           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8629           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8630           return true;
8631         }
8632
8633       if (subcode == UMIN || subcode == UMAX
8634           || subcode == SMIN || subcode == SMAX)
8635         {
8636           *total = COSTS_N_INSNS (3);
8637           return true;
8638         }
8639
8640       return false;
8641
8642     case MULT:
8643       /* This should have been handled by the CPU specific routines.  */
8644       gcc_unreachable ();
8645
8646     case TRUNCATE:
8647       if (arm_arch3m && mode == SImode
8648           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8649           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8650           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8651               == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8652           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8653               || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8654         {
8655           *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8656           return true;
8657         }
8658       *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8659       return false;
8660
8661     case NEG:
8662       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8663         {
8664           if (TARGET_HARD_FLOAT
8665               && (mode == SFmode
8666                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8667             {
8668               *total = COSTS_N_INSNS (1);
8669               return false;
8670             }
8671           *total = COSTS_N_INSNS (2);
8672           return false;
8673         }
8674
8675       /* Fall through */
8676     case NOT:
8677       *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8678       if (mode == SImode && code == NOT)
8679         {
8680           subcode = GET_CODE (XEXP (x, 0));
8681           if (subcode == ASHIFT || subcode == ASHIFTRT
8682               || subcode == LSHIFTRT
8683               || subcode == ROTATE || subcode == ROTATERT
8684               || (subcode == MULT
8685                   && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8686             {
8687               *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8688               /* Register shifts cost an extra cycle.  */
8689               if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8690                 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8691                                                         subcode, 1, speed);
8692               return true;
8693             }
8694         }
8695
8696       return false;
8697
8698     case IF_THEN_ELSE:
8699       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8700         {
8701           *total = COSTS_N_INSNS (4);
8702           return true;
8703         }
8704
8705       operand = XEXP (x, 0);
8706
8707       if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8708              || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8709             && REG_P (XEXP (operand, 0))
8710             && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8711         *total += COSTS_N_INSNS (1);
8712       *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8713                  + rtx_cost (XEXP (x, 2), code, 2, speed));
8714       return true;
8715
8716     case NE:
8717       if (mode == SImode && XEXP (x, 1) == const0_rtx)
8718         {
8719           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8720           return true;
8721         }
8722       goto scc_insn;
8723
8724     case GE:
8725       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8726           && mode == SImode && XEXP (x, 1) == const0_rtx)
8727         {
8728           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8729           return true;
8730         }
8731       goto scc_insn;
8732
8733     case LT:
8734       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8735           && mode == SImode && XEXP (x, 1) == const0_rtx)
8736         {
8737           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8738           return true;
8739         }
8740       goto scc_insn;
8741
8742     case EQ:
8743     case GT:
8744     case LE:
8745     case GEU:
8746     case LTU:
8747     case GTU:
8748     case LEU:
8749     case UNORDERED:
8750     case ORDERED:
8751     case UNEQ:
8752     case UNGE:
8753     case UNLT:
8754     case UNGT:
8755     case UNLE:
8756     scc_insn:
8757       /* SCC insns.  In the case where the comparison has already been
8758          performed, then they cost 2 instructions.  Otherwise they need
8759          an additional comparison before them.  */
8760       *total = COSTS_N_INSNS (2);
8761       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8762         {
8763           return true;
8764         }
8765
8766       /* Fall through */
8767     case COMPARE:
8768       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8769         {
8770           *total = 0;
8771           return true;
8772         }
8773
8774       *total += COSTS_N_INSNS (1);
8775       if (CONST_INT_P (XEXP (x, 1))
8776           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8777         {
8778           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8779           return true;
8780         }
8781
8782       subcode = GET_CODE (XEXP (x, 0));
8783       if (subcode == ASHIFT || subcode == ASHIFTRT
8784           || subcode == LSHIFTRT
8785           || subcode == ROTATE || subcode == ROTATERT)
8786         {
8787           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8788           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8789           return true;
8790         }
8791
8792       if (subcode == MULT
8793           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8794         {
8795           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8796           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8797           return true;
8798         }
8799
8800       return false;
8801
8802     case UMIN:
8803     case UMAX:
8804     case SMIN:
8805     case SMAX:
8806       *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8807       if (!CONST_INT_P (XEXP (x, 1))
8808           || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8809         *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8810       return true;
8811
8812     case ABS:
8813       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8814         {
8815           if (TARGET_HARD_FLOAT
8816               && (mode == SFmode
8817                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8818             {
8819               *total = COSTS_N_INSNS (1);
8820               return false;
8821             }
8822           *total = COSTS_N_INSNS (20);
8823           return false;
8824         }
8825       *total = COSTS_N_INSNS (1);
8826       if (mode == DImode)
8827         *total += COSTS_N_INSNS (3);
8828       return false;
8829
8830     case SIGN_EXTEND:
8831     case ZERO_EXTEND:
8832       *total = 0;
8833       if (GET_MODE_CLASS (mode) == MODE_INT)
8834         {
8835           rtx op = XEXP (x, 0);
8836           machine_mode opmode = GET_MODE (op);
8837
8838           if (mode == DImode)
8839             *total += COSTS_N_INSNS (1);
8840
8841           if (opmode != SImode)
8842             {
8843               if (MEM_P (op))
8844                 {
8845                   /* If !arm_arch4, we use one of the extendhisi2_mem
8846                      or movhi_bytes patterns for HImode.  For a QImode
8847                      sign extension, we first zero-extend from memory
8848                      and then perform a shift sequence.  */
8849                   if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8850                     *total += COSTS_N_INSNS (2);
8851                 }
8852               else if (arm_arch6)
8853                 *total += COSTS_N_INSNS (1);
8854
8855               /* We don't have the necessary insn, so we need to perform some
8856                  other operation.  */
8857               else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8858                 /* An and with constant 255.  */
8859                 *total += COSTS_N_INSNS (1);
8860               else
8861                 /* A shift sequence.  Increase costs slightly to avoid
8862                    combining two shifts into an extend operation.  */
8863                 *total += COSTS_N_INSNS (2) + 1;
8864             }
8865
8866           return false;
8867         }
8868
8869       switch (GET_MODE (XEXP (x, 0)))
8870         {
8871         case V8QImode:
8872         case V4HImode:
8873         case V2SImode:
8874         case V4QImode:
8875         case V2HImode:
8876           *total = COSTS_N_INSNS (1);
8877           return false;
8878
8879         default:
8880           gcc_unreachable ();
8881         }
8882       gcc_unreachable ();
8883
8884     case ZERO_EXTRACT:
8885     case SIGN_EXTRACT:
8886       *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8887       return true;
8888
8889     case CONST_INT:
8890       if (const_ok_for_arm (INTVAL (x))
8891           || const_ok_for_arm (~INTVAL (x)))
8892         *total = COSTS_N_INSNS (1);
8893       else
8894         *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8895                                                   INTVAL (x), NULL_RTX,
8896                                                   NULL_RTX, 0, 0));
8897       return true;
8898
8899     case CONST:
8900     case LABEL_REF:
8901     case SYMBOL_REF:
8902       *total = COSTS_N_INSNS (3);
8903       return true;
8904
8905     case HIGH:
8906       *total = COSTS_N_INSNS (1);
8907       return true;
8908
8909     case LO_SUM:
8910       *total = COSTS_N_INSNS (1);
8911       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8912       return true;
8913
8914     case CONST_DOUBLE:
8915       if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8916           && (mode == SFmode || !TARGET_VFP_SINGLE))
8917         *total = COSTS_N_INSNS (1);
8918       else
8919         *total = COSTS_N_INSNS (4);
8920       return true;
8921
8922     case SET:
8923       /* The vec_extract patterns accept memory operands that require an
8924          address reload.  Account for the cost of that reload to give the
8925          auto-inc-dec pass an incentive to try to replace them.  */
8926       if (TARGET_NEON && MEM_P (SET_DEST (x))
8927           && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8928         {
8929           *total = rtx_cost (SET_DEST (x), code, 0, speed);
8930           if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8931             *total += COSTS_N_INSNS (1);
8932           return true;
8933         }
8934       /* Likewise for the vec_set patterns.  */
8935       if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8936           && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8937           && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8938         {
8939           rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8940           *total = rtx_cost (mem, code, 0, speed);
8941           if (!neon_vector_mem_operand (mem, 2, true))
8942             *total += COSTS_N_INSNS (1);
8943           return true;
8944         }
8945       return false;
8946
8947     case UNSPEC:
8948       /* We cost this as high as our memory costs to allow this to
8949          be hoisted from loops.  */
8950       if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8951         {
8952           *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8953         }
8954       return true;
8955
8956     case CONST_VECTOR:
8957       if (TARGET_NEON
8958           && TARGET_HARD_FLOAT
8959           && outer == SET
8960           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8961           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8962         *total = COSTS_N_INSNS (1);
8963       else
8964         *total = COSTS_N_INSNS (4);
8965       return true;
8966
8967     default:
8968       *total = COSTS_N_INSNS (4);
8969       return false;
8970     }
8971 }
8972
8973 /* Estimates the size cost of thumb1 instructions.
8974    For now most of the code is copied from thumb1_rtx_costs. We need more
8975    fine grain tuning when we have more related test cases.  */
8976 static inline int
8977 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8978 {
8979   machine_mode mode = GET_MODE (x);
8980   int words;
8981
8982   switch (code)
8983     {
8984     case ASHIFT:
8985     case ASHIFTRT:
8986     case LSHIFTRT:
8987     case ROTATERT:
8988       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8989
8990     case PLUS:
8991     case MINUS:
8992       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8993          defined by RTL expansion, especially for the expansion of
8994          multiplication.  */
8995       if ((GET_CODE (XEXP (x, 0)) == MULT
8996            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8997           || (GET_CODE (XEXP (x, 1)) == MULT
8998               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8999         return COSTS_N_INSNS (2);
9000       /* On purpose fall through for normal RTX.  */
9001     case COMPARE:
9002     case NEG:
9003     case NOT:
9004       return COSTS_N_INSNS (1);
9005
9006     case MULT:
9007       if (CONST_INT_P (XEXP (x, 1)))
9008         {
9009           /* Thumb1 mul instruction can't operate on const. We must Load it
9010              into a register first.  */
9011           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9012           /* For the targets which have a very small and high-latency multiply
9013              unit, we prefer to synthesize the mult with up to 5 instructions,
9014              giving a good balance between size and performance.  */
9015           if (arm_arch6m && arm_m_profile_small_mul)
9016             return COSTS_N_INSNS (5);
9017           else
9018             return COSTS_N_INSNS (1) + const_size;
9019         }
9020       return COSTS_N_INSNS (1);
9021
9022     case SET:
9023       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9024          the mode.  */
9025       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9026       return COSTS_N_INSNS (words)
9027              + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
9028                                     || satisfies_constraint_K (SET_SRC (x))
9029                                        /* thumb1_movdi_insn.  */
9030                                     || ((words > 1) && MEM_P (SET_SRC (x))));
9031
9032     case CONST_INT:
9033       if (outer == SET)
9034         {
9035           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
9036             return COSTS_N_INSNS (1);
9037           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9038           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9039             return COSTS_N_INSNS (2);
9040           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9041           if (thumb_shiftable_const (INTVAL (x)))
9042             return COSTS_N_INSNS (2);
9043           return COSTS_N_INSNS (3);
9044         }
9045       else if ((outer == PLUS || outer == COMPARE)
9046                && INTVAL (x) < 256 && INTVAL (x) > -256)
9047         return 0;
9048       else if ((outer == IOR || outer == XOR || outer == AND)
9049                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9050         return COSTS_N_INSNS (1);
9051       else if (outer == AND)
9052         {
9053           int i;
9054           /* This duplicates the tests in the andsi3 expander.  */
9055           for (i = 9; i <= 31; i++)
9056             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
9057                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
9058               return COSTS_N_INSNS (2);
9059         }
9060       else if (outer == ASHIFT || outer == ASHIFTRT
9061                || outer == LSHIFTRT)
9062         return 0;
9063       return COSTS_N_INSNS (2);
9064
9065     case CONST:
9066     case CONST_DOUBLE:
9067     case LABEL_REF:
9068     case SYMBOL_REF:
9069       return COSTS_N_INSNS (3);
9070
9071     case UDIV:
9072     case UMOD:
9073     case DIV:
9074     case MOD:
9075       return 100;
9076
9077     case TRUNCATE:
9078       return 99;
9079
9080     case AND:
9081     case XOR:
9082     case IOR:
9083       return COSTS_N_INSNS (1);
9084
9085     case MEM:
9086       return (COSTS_N_INSNS (1)
9087               + COSTS_N_INSNS (1)
9088                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9089               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9090                  ? COSTS_N_INSNS (1) : 0));
9091
9092     case IF_THEN_ELSE:
9093       /* XXX a guess.  */
9094       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9095         return 14;
9096       return 2;
9097
9098     case ZERO_EXTEND:
9099       /* XXX still guessing.  */
9100       switch (GET_MODE (XEXP (x, 0)))
9101         {
9102           case QImode:
9103             return (1 + (mode == DImode ? 4 : 0)
9104                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9105
9106           case HImode:
9107             return (4 + (mode == DImode ? 4 : 0)
9108                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9109
9110           case SImode:
9111             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9112
9113           default:
9114             return 99;
9115         }
9116
9117     default:
9118       return 99;
9119     }
9120 }
9121
9122 /* RTX costs when optimizing for size.  */
9123 static bool
9124 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9125                     int *total)
9126 {
9127   machine_mode mode = GET_MODE (x);
9128   if (TARGET_THUMB1)
9129     {
9130       *total = thumb1_size_rtx_costs (x, code, outer_code);
9131       return true;
9132     }
9133
9134   /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
9135   switch (code)
9136     {
9137     case MEM:
9138       /* A memory access costs 1 insn if the mode is small, or the address is
9139          a single register, otherwise it costs one insn per word.  */
9140       if (REG_P (XEXP (x, 0)))
9141         *total = COSTS_N_INSNS (1);
9142       else if (flag_pic
9143                && GET_CODE (XEXP (x, 0)) == PLUS
9144                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9145         /* This will be split into two instructions.
9146            See arm.md:calculate_pic_address.  */
9147         *total = COSTS_N_INSNS (2);
9148       else
9149         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9150       return true;
9151
9152     case DIV:
9153     case MOD:
9154     case UDIV:
9155     case UMOD:
9156       /* Needs a libcall, so it costs about this.  */
9157       *total = COSTS_N_INSNS (2);
9158       return false;
9159
9160     case ROTATE:
9161       if (mode == SImode && REG_P (XEXP (x, 1)))
9162         {
9163           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9164           return true;
9165         }
9166       /* Fall through */
9167     case ROTATERT:
9168     case ASHIFT:
9169     case LSHIFTRT:
9170     case ASHIFTRT:
9171       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9172         {
9173           *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9174           return true;
9175         }
9176       else if (mode == SImode)
9177         {
9178           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9179           /* Slightly disparage register shifts, but not by much.  */
9180           if (!CONST_INT_P (XEXP (x, 1)))
9181             *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9182           return true;
9183         }
9184
9185       /* Needs a libcall.  */
9186       *total = COSTS_N_INSNS (2);
9187       return false;
9188
9189     case MINUS:
9190       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9191           && (mode == SFmode || !TARGET_VFP_SINGLE))
9192         {
9193           *total = COSTS_N_INSNS (1);
9194           return false;
9195         }
9196
9197       if (mode == SImode)
9198         {
9199           enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9200           enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9201
9202           if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9203               || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9204               || subcode1 == ROTATE || subcode1 == ROTATERT
9205               || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9206               || subcode1 == ASHIFTRT)
9207             {
9208               /* It's just the cost of the two operands.  */
9209               *total = 0;
9210               return false;
9211             }
9212
9213           *total = COSTS_N_INSNS (1);
9214           return false;
9215         }
9216
9217       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9218       return false;
9219
9220     case PLUS:
9221       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9222           && (mode == SFmode || !TARGET_VFP_SINGLE))
9223         {
9224           *total = COSTS_N_INSNS (1);
9225           return false;
9226         }
9227
9228       /* A shift as a part of ADD costs nothing.  */
9229       if (GET_CODE (XEXP (x, 0)) == MULT
9230           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9231         {
9232           *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9233           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9234           *total += rtx_cost (XEXP (x, 1), code, 1, false);
9235           return true;
9236         }
9237
9238       /* Fall through */
9239     case AND: case XOR: case IOR:
9240       if (mode == SImode)
9241         {
9242           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9243
9244           if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9245               || subcode == LSHIFTRT || subcode == ASHIFTRT
9246               || (code == AND && subcode == NOT))
9247             {
9248               /* It's just the cost of the two operands.  */
9249               *total = 0;
9250               return false;
9251             }
9252         }
9253
9254       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9255       return false;
9256
9257     case MULT:
9258       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9259       return false;
9260
9261     case NEG:
9262       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9263           && (mode == SFmode || !TARGET_VFP_SINGLE))
9264         {
9265           *total = COSTS_N_INSNS (1);
9266           return false;
9267         }
9268
9269       /* Fall through */
9270     case NOT:
9271       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9272
9273       return false;
9274
9275     case IF_THEN_ELSE:
9276       *total = 0;
9277       return false;
9278
9279     case COMPARE:
9280       if (cc_register (XEXP (x, 0), VOIDmode))
9281         * total = 0;
9282       else
9283         *total = COSTS_N_INSNS (1);
9284       return false;
9285
9286     case ABS:
9287       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9288           && (mode == SFmode || !TARGET_VFP_SINGLE))
9289         *total = COSTS_N_INSNS (1);
9290       else
9291         *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9292       return false;
9293
9294     case SIGN_EXTEND:
9295     case ZERO_EXTEND:
9296       return arm_rtx_costs_1 (x, outer_code, total, 0);
9297
9298     case CONST_INT:
9299       if (const_ok_for_arm (INTVAL (x)))
9300         /* A multiplication by a constant requires another instruction
9301            to load the constant to a register.  */
9302         *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9303                                 ? 1 : 0);
9304       else if (const_ok_for_arm (~INTVAL (x)))
9305         *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9306       else if (const_ok_for_arm (-INTVAL (x)))
9307         {
9308           if (outer_code == COMPARE || outer_code == PLUS
9309               || outer_code == MINUS)
9310             *total = 0;
9311           else
9312             *total = COSTS_N_INSNS (1);
9313         }
9314       else
9315         *total = COSTS_N_INSNS (2);
9316       return true;
9317
9318     case CONST:
9319     case LABEL_REF:
9320     case SYMBOL_REF:
9321       *total = COSTS_N_INSNS (2);
9322       return true;
9323
9324     case CONST_DOUBLE:
9325       *total = COSTS_N_INSNS (4);
9326       return true;
9327
9328     case CONST_VECTOR:
9329       if (TARGET_NEON
9330           && TARGET_HARD_FLOAT
9331           && outer_code == SET
9332           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9333           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9334         *total = COSTS_N_INSNS (1);
9335       else
9336         *total = COSTS_N_INSNS (4);
9337       return true;
9338
9339     case HIGH:
9340     case LO_SUM:
9341       /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9342          cost of these slightly.  */
9343       *total = COSTS_N_INSNS (1) + 1;
9344       return true;
9345
9346     case SET:
9347       return false;
9348
9349     default:
9350       if (mode != VOIDmode)
9351         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9352       else
9353         *total = COSTS_N_INSNS (4); /* How knows?  */
9354       return false;
9355     }
9356 }
9357
9358 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9359    operand, then return the operand that is being shifted.  If the shift
9360    is not by a constant, then set SHIFT_REG to point to the operand.
9361    Return NULL if OP is not a shifter operand.  */
9362 static rtx
9363 shifter_op_p (rtx op, rtx *shift_reg)
9364 {
9365   enum rtx_code code = GET_CODE (op);
9366
9367   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9368       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9369     return XEXP (op, 0);
9370   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9371     return XEXP (op, 0);
9372   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9373            || code == ASHIFTRT)
9374     {
9375       if (!CONST_INT_P (XEXP (op, 1)))
9376         *shift_reg = XEXP (op, 1);
9377       return XEXP (op, 0);
9378     }
9379
9380   return NULL;
9381 }
9382
9383 static bool
9384 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9385 {
9386   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9387   gcc_assert (GET_CODE (x) == UNSPEC);
9388
9389   switch (XINT (x, 1))
9390     {
9391     case UNSPEC_UNALIGNED_LOAD:
9392       /* We can only do unaligned loads into the integer unit, and we can't
9393          use LDM or LDRD.  */
9394       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9395       if (speed_p)
9396         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9397                   + extra_cost->ldst.load_unaligned);
9398
9399 #ifdef NOT_YET
9400       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9401                                  ADDR_SPACE_GENERIC, speed_p);
9402 #endif
9403       return true;
9404
9405     case UNSPEC_UNALIGNED_STORE:
9406       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9407       if (speed_p)
9408         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9409                   + extra_cost->ldst.store_unaligned);
9410
9411       *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9412 #ifdef NOT_YET
9413       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9414                                  ADDR_SPACE_GENERIC, speed_p);
9415 #endif
9416       return true;
9417
9418     case UNSPEC_VRINTZ:
9419     case UNSPEC_VRINTP:
9420     case UNSPEC_VRINTM:
9421     case UNSPEC_VRINTR:
9422     case UNSPEC_VRINTX:
9423     case UNSPEC_VRINTA:
9424       *cost = COSTS_N_INSNS (1);
9425       if (speed_p)
9426         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9427
9428       return true;
9429     default:
9430       *cost = COSTS_N_INSNS (2);
9431       break;
9432     }
9433   return false;
9434 }
9435
9436 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9437    call (one insn for -Os) and then one for processing the result.  */
9438 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9439
9440 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9441         do                                                              \
9442           {                                                             \
9443             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9444             if (shift_op != NULL                                        \
9445                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9446               {                                                         \
9447                 if (shift_reg)                                          \
9448                   {                                                     \
9449                     if (speed_p)                                        \
9450                       *cost += extra_cost->alu.arith_shift_reg; \
9451                     *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);  \
9452                   }                                                     \
9453                 else if (speed_p)                                       \
9454                   *cost += extra_cost->alu.arith_shift;         \
9455                                                                         \
9456                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)     \
9457                           + rtx_cost (XEXP (x, 1 - IDX),                \
9458                                       OP, 1, speed_p));         \
9459                 return true;                                            \
9460               }                                                         \
9461           }                                                             \
9462         while (0);
9463
9464 /* RTX costs.  Make an estimate of the cost of executing the operation
9465    X, which is contained with an operation with code OUTER_CODE.
9466    SPEED_P indicates whether the cost desired is the performance cost,
9467    or the size cost.  The estimate is stored in COST and the return
9468    value is TRUE if the cost calculation is final, or FALSE if the
9469    caller should recurse through the operands of X to add additional
9470    costs.
9471
9472    We currently make no attempt to model the size savings of Thumb-2
9473    16-bit instructions.  At the normal points in compilation where
9474    this code is called we have no measure of whether the condition
9475    flags are live or not, and thus no realistic way to determine what
9476    the size will eventually be.  */
9477 static bool
9478 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9479                    const struct cpu_cost_table *extra_cost,
9480                    int *cost, bool speed_p)
9481 {
9482   machine_mode mode = GET_MODE (x);
9483
9484   if (TARGET_THUMB1)
9485     {
9486       if (speed_p)
9487         *cost = thumb1_rtx_costs (x, code, outer_code);
9488       else
9489         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9490       return true;
9491     }
9492
9493   switch (code)
9494     {
9495     case SET:
9496       *cost = 0;
9497       /* SET RTXs don't have a mode so we get it from the destination.  */
9498       mode = GET_MODE (SET_DEST (x));
9499
9500       if (REG_P (SET_SRC (x))
9501           && REG_P (SET_DEST (x)))
9502         {
9503           /* Assume that most copies can be done with a single insn,
9504              unless we don't have HW FP, in which case everything
9505              larger than word mode will require two insns.  */
9506           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9507                                    && GET_MODE_SIZE (mode) > 4)
9508                                   || mode == DImode)
9509                                  ? 2 : 1);
9510           /* Conditional register moves can be encoded
9511              in 16 bits in Thumb mode.  */
9512           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9513             *cost >>= 1;
9514
9515           return true;
9516         }
9517
9518       if (CONST_INT_P (SET_SRC (x)))
9519         {
9520           /* Handle CONST_INT here, since the value doesn't have a mode
9521              and we would otherwise be unable to work out the true cost.  */
9522           *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9523           outer_code = SET;
9524           /* Slightly lower the cost of setting a core reg to a constant.
9525              This helps break up chains and allows for better scheduling.  */
9526           if (REG_P (SET_DEST (x))
9527               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9528             *cost -= 1;
9529           x = SET_SRC (x);
9530           /* Immediate moves with an immediate in the range [0, 255] can be
9531              encoded in 16 bits in Thumb mode.  */
9532           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9533               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9534             *cost >>= 1;
9535           goto const_int_cost;
9536         }
9537
9538       return false;
9539
9540     case MEM:
9541       /* A memory access costs 1 insn if the mode is small, or the address is
9542          a single register, otherwise it costs one insn per word.  */
9543       if (REG_P (XEXP (x, 0)))
9544         *cost = COSTS_N_INSNS (1);
9545       else if (flag_pic
9546                && GET_CODE (XEXP (x, 0)) == PLUS
9547                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9548         /* This will be split into two instructions.
9549            See arm.md:calculate_pic_address.  */
9550         *cost = COSTS_N_INSNS (2);
9551       else
9552         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9553
9554       /* For speed optimizations, add the costs of the address and
9555          accessing memory.  */
9556       if (speed_p)
9557 #ifdef NOT_YET
9558         *cost += (extra_cost->ldst.load
9559                   + arm_address_cost (XEXP (x, 0), mode,
9560                                       ADDR_SPACE_GENERIC, speed_p));
9561 #else
9562         *cost += extra_cost->ldst.load;
9563 #endif
9564       return true;
9565
9566     case PARALLEL:
9567     {
9568    /* Calculations of LDM costs are complex.  We assume an initial cost
9569    (ldm_1st) which will load the number of registers mentioned in
9570    ldm_regs_per_insn_1st registers; then each additional
9571    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9572    formula for N regs is thus:
9573
9574    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9575                              + ldm_regs_per_insn_subsequent - 1)
9576                             / ldm_regs_per_insn_subsequent).
9577
9578    Additional costs may also be added for addressing.  A similar
9579    formula is used for STM.  */
9580
9581       bool is_ldm = load_multiple_operation (x, SImode);
9582       bool is_stm = store_multiple_operation (x, SImode);
9583
9584       *cost = COSTS_N_INSNS (1);
9585
9586       if (is_ldm || is_stm)
9587         {
9588           if (speed_p)
9589             {
9590               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9591               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9592                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9593                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9594               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9595                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9596                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9597
9598               *cost += regs_per_insn_1st
9599                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9600                                             + regs_per_insn_sub - 1)
9601                                           / regs_per_insn_sub);
9602               return true;
9603             }
9604
9605         }
9606       return false;
9607     }
9608     case DIV:
9609     case UDIV:
9610       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9611           && (mode == SFmode || !TARGET_VFP_SINGLE))
9612         *cost = COSTS_N_INSNS (speed_p
9613                                ? extra_cost->fp[mode != SFmode].div : 1);
9614       else if (mode == SImode && TARGET_IDIV)
9615         *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9616       else
9617         *cost = LIBCALL_COST (2);
9618       return false;     /* All arguments must be in registers.  */
9619
9620     case MOD:
9621     case UMOD:
9622       *cost = LIBCALL_COST (2);
9623       return false;     /* All arguments must be in registers.  */
9624
9625     case ROTATE:
9626       if (mode == SImode && REG_P (XEXP (x, 1)))
9627         {
9628           *cost = (COSTS_N_INSNS (2)
9629                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9630           if (speed_p)
9631             *cost += extra_cost->alu.shift_reg;
9632           return true;
9633         }
9634       /* Fall through */
9635     case ROTATERT:
9636     case ASHIFT:
9637     case LSHIFTRT:
9638     case ASHIFTRT:
9639       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9640         {
9641           *cost = (COSTS_N_INSNS (3)
9642                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9643           if (speed_p)
9644             *cost += 2 * extra_cost->alu.shift;
9645           return true;
9646         }
9647       else if (mode == SImode)
9648         {
9649           *cost = (COSTS_N_INSNS (1)
9650                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9651           /* Slightly disparage register shifts at -Os, but not by much.  */
9652           if (!CONST_INT_P (XEXP (x, 1)))
9653             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9654                       + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9655           return true;
9656         }
9657       else if (GET_MODE_CLASS (mode) == MODE_INT
9658                && GET_MODE_SIZE (mode) < 4)
9659         {
9660           if (code == ASHIFT)
9661             {
9662               *cost = (COSTS_N_INSNS (1)
9663                        + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9664               /* Slightly disparage register shifts at -Os, but not by
9665                  much.  */
9666               if (!CONST_INT_P (XEXP (x, 1)))
9667                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9668                           + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9669             }
9670           else if (code == LSHIFTRT || code == ASHIFTRT)
9671             {
9672               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9673                 {
9674                   /* Can use SBFX/UBFX.  */
9675                   *cost = COSTS_N_INSNS (1);
9676                   if (speed_p)
9677                     *cost += extra_cost->alu.bfx;
9678                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9679                 }
9680               else
9681                 {
9682                   *cost = COSTS_N_INSNS (2);
9683                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9684                   if (speed_p)
9685                     {
9686                       if (CONST_INT_P (XEXP (x, 1)))
9687                         *cost += 2 * extra_cost->alu.shift;
9688                       else
9689                         *cost += (extra_cost->alu.shift
9690                                   + extra_cost->alu.shift_reg);
9691                     }
9692                   else
9693                     /* Slightly disparage register shifts.  */
9694                     *cost += !CONST_INT_P (XEXP (x, 1));
9695                 }
9696             }
9697           else /* Rotates.  */
9698             {
9699               *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9700               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9701               if (speed_p)
9702                 {
9703                   if (CONST_INT_P (XEXP (x, 1)))
9704                     *cost += (2 * extra_cost->alu.shift
9705                               + extra_cost->alu.log_shift);
9706                   else
9707                     *cost += (extra_cost->alu.shift
9708                               + extra_cost->alu.shift_reg
9709                               + extra_cost->alu.log_shift_reg);
9710                 }
9711             }
9712           return true;
9713         }
9714
9715       *cost = LIBCALL_COST (2);
9716       return false;
9717
9718     case BSWAP:
9719       if (arm_arch6)
9720         {
9721           if (mode == SImode)
9722             {
9723               *cost = COSTS_N_INSNS (1);
9724               if (speed_p)
9725                 *cost += extra_cost->alu.rev;
9726
9727               return false;
9728             }
9729         }
9730       else
9731         {
9732         /* No rev instruction available.  Look at arm_legacy_rev
9733            and thumb_legacy_rev for the form of RTL used then.  */
9734           if (TARGET_THUMB)
9735             {
9736               *cost = COSTS_N_INSNS (10);
9737
9738               if (speed_p)
9739                 {
9740                   *cost += 6 * extra_cost->alu.shift;
9741                   *cost += 3 * extra_cost->alu.logical;
9742                 }
9743             }
9744           else
9745             {
9746               *cost = COSTS_N_INSNS (5);
9747
9748               if (speed_p)
9749                 {
9750                   *cost += 2 * extra_cost->alu.shift;
9751                   *cost += extra_cost->alu.arith_shift;
9752                   *cost += 2 * extra_cost->alu.logical;
9753                 }
9754             }
9755           return true;
9756         }
9757       return false;
9758
9759     case MINUS:
9760       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9761           && (mode == SFmode || !TARGET_VFP_SINGLE))
9762         {
9763           *cost = COSTS_N_INSNS (1);
9764           if (GET_CODE (XEXP (x, 0)) == MULT
9765               || GET_CODE (XEXP (x, 1)) == MULT)
9766             {
9767               rtx mul_op0, mul_op1, sub_op;
9768
9769               if (speed_p)
9770                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9771
9772               if (GET_CODE (XEXP (x, 0)) == MULT)
9773                 {
9774                   mul_op0 = XEXP (XEXP (x, 0), 0);
9775                   mul_op1 = XEXP (XEXP (x, 0), 1);
9776                   sub_op = XEXP (x, 1);
9777                 }
9778               else
9779                 {
9780                   mul_op0 = XEXP (XEXP (x, 1), 0);
9781                   mul_op1 = XEXP (XEXP (x, 1), 1);
9782                   sub_op = XEXP (x, 0);
9783                 }
9784
9785               /* The first operand of the multiply may be optionally
9786                  negated.  */
9787               if (GET_CODE (mul_op0) == NEG)
9788                 mul_op0 = XEXP (mul_op0, 0);
9789
9790               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9791                         + rtx_cost (mul_op1, code, 0, speed_p)
9792                         + rtx_cost (sub_op, code, 0, speed_p));
9793
9794               return true;
9795             }
9796
9797           if (speed_p)
9798             *cost += extra_cost->fp[mode != SFmode].addsub;
9799           return false;
9800         }
9801
9802       if (mode == SImode)
9803         {
9804           rtx shift_by_reg = NULL;
9805           rtx shift_op;
9806           rtx non_shift_op;
9807
9808           *cost = COSTS_N_INSNS (1);
9809
9810           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9811           if (shift_op == NULL)
9812             {
9813               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9814               non_shift_op = XEXP (x, 0);
9815             }
9816           else
9817             non_shift_op = XEXP (x, 1);
9818
9819           if (shift_op != NULL)
9820             {
9821               if (shift_by_reg != NULL)
9822                 {
9823                   if (speed_p)
9824                     *cost += extra_cost->alu.arith_shift_reg;
9825                   *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9826                 }
9827               else if (speed_p)
9828                 *cost += extra_cost->alu.arith_shift;
9829
9830               *cost += (rtx_cost (shift_op, code, 0, speed_p)
9831                         + rtx_cost (non_shift_op, code, 0, speed_p));
9832               return true;
9833             }
9834
9835           if (arm_arch_thumb2
9836               && GET_CODE (XEXP (x, 1)) == MULT)
9837             {
9838               /* MLS.  */
9839               if (speed_p)
9840                 *cost += extra_cost->mult[0].add;
9841               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9842                         + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9843                         + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9844               return true;
9845             }
9846
9847           if (CONST_INT_P (XEXP (x, 0)))
9848             {
9849               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9850                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9851                                             NULL_RTX, 1, 0);
9852               *cost = COSTS_N_INSNS (insns);
9853               if (speed_p)
9854                 *cost += insns * extra_cost->alu.arith;
9855               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9856               return true;
9857             }
9858           else if (speed_p)
9859             *cost += extra_cost->alu.arith;
9860
9861           return false;
9862         }
9863
9864       if (GET_MODE_CLASS (mode) == MODE_INT
9865           && GET_MODE_SIZE (mode) < 4)
9866         {
9867           rtx shift_op, shift_reg;
9868           shift_reg = NULL;
9869
9870           /* We check both sides of the MINUS for shifter operands since,
9871              unlike PLUS, it's not commutative.  */
9872
9873           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9874           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9875
9876           /* Slightly disparage, as we might need to widen the result.  */
9877           *cost = 1 + COSTS_N_INSNS (1);
9878           if (speed_p)
9879             *cost += extra_cost->alu.arith;
9880
9881           if (CONST_INT_P (XEXP (x, 0)))
9882             {
9883               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9884               return true;
9885             }
9886
9887           return false;
9888         }
9889
9890       if (mode == DImode)
9891         {
9892           *cost = COSTS_N_INSNS (2);
9893
9894           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9895             {
9896               rtx op1 = XEXP (x, 1);
9897
9898               if (speed_p)
9899                 *cost += 2 * extra_cost->alu.arith;
9900
9901               if (GET_CODE (op1) == ZERO_EXTEND)
9902                 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9903               else
9904                 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9905               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9906                                  0, speed_p);
9907               return true;
9908             }
9909           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9910             {
9911               if (speed_p)
9912                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9913               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9914                                   0, speed_p)
9915                         + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9916               return true;
9917             }
9918           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9919                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9920             {
9921               if (speed_p)
9922                 *cost += (extra_cost->alu.arith
9923                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9924                              ? extra_cost->alu.arith
9925                              : extra_cost->alu.arith_shift));
9926               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9927                         + rtx_cost (XEXP (XEXP (x, 1), 0),
9928                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9929               return true;
9930             }
9931
9932           if (speed_p)
9933             *cost += 2 * extra_cost->alu.arith;
9934           return false;
9935         }
9936
9937       /* Vector mode?  */
9938
9939       *cost = LIBCALL_COST (2);
9940       return false;
9941
9942     case PLUS:
9943       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9944           && (mode == SFmode || !TARGET_VFP_SINGLE))
9945         {
9946           *cost = COSTS_N_INSNS (1);
9947           if (GET_CODE (XEXP (x, 0)) == MULT)
9948             {
9949               rtx mul_op0, mul_op1, add_op;
9950
9951               if (speed_p)
9952                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9953
9954               mul_op0 = XEXP (XEXP (x, 0), 0);
9955               mul_op1 = XEXP (XEXP (x, 0), 1);
9956               add_op = XEXP (x, 1);
9957
9958               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9959                         + rtx_cost (mul_op1, code, 0, speed_p)
9960                         + rtx_cost (add_op, code, 0, speed_p));
9961
9962               return true;
9963             }
9964
9965           if (speed_p)
9966             *cost += extra_cost->fp[mode != SFmode].addsub;
9967           return false;
9968         }
9969       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9970         {
9971           *cost = LIBCALL_COST (2);
9972           return false;
9973         }
9974
9975         /* Narrow modes can be synthesized in SImode, but the range
9976            of useful sub-operations is limited.  Check for shift operations
9977            on one of the operands.  Only left shifts can be used in the
9978            narrow modes.  */
9979       if (GET_MODE_CLASS (mode) == MODE_INT
9980           && GET_MODE_SIZE (mode) < 4)
9981         {
9982           rtx shift_op, shift_reg;
9983           shift_reg = NULL;
9984
9985           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9986
9987           if (CONST_INT_P (XEXP (x, 1)))
9988             {
9989               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9990                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9991                                             NULL_RTX, 1, 0);
9992               *cost = COSTS_N_INSNS (insns);
9993               if (speed_p)
9994                 *cost += insns * extra_cost->alu.arith;
9995               /* Slightly penalize a narrow operation as the result may
9996                  need widening.  */
9997               *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9998               return true;
9999             }
10000
10001           /* Slightly penalize a narrow operation as the result may
10002              need widening.  */
10003           *cost = 1 + COSTS_N_INSNS (1);
10004           if (speed_p)
10005             *cost += extra_cost->alu.arith;
10006
10007           return false;
10008         }
10009
10010       if (mode == SImode)
10011         {
10012           rtx shift_op, shift_reg;
10013
10014           *cost = COSTS_N_INSNS (1);
10015           if (TARGET_INT_SIMD
10016               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10017                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10018             {
10019               /* UXTA[BH] or SXTA[BH].  */
10020               if (speed_p)
10021                 *cost += extra_cost->alu.extend_arith;
10022               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10023                                   speed_p)
10024                         + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
10025               return true;
10026             }
10027
10028           shift_reg = NULL;
10029           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10030           if (shift_op != NULL)
10031             {
10032               if (shift_reg)
10033                 {
10034                   if (speed_p)
10035                     *cost += extra_cost->alu.arith_shift_reg;
10036                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10037                 }
10038               else if (speed_p)
10039                 *cost += extra_cost->alu.arith_shift;
10040
10041               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10042                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10043               return true;
10044             }
10045           if (GET_CODE (XEXP (x, 0)) == MULT)
10046             {
10047               rtx mul_op = XEXP (x, 0);
10048
10049               *cost = COSTS_N_INSNS (1);
10050
10051               if (TARGET_DSP_MULTIPLY
10052                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10053                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10054                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10055                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10056                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10057                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10058                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10059                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10060                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10061                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10062                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10063                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10064                                       == 16))))))
10065                 {
10066                   /* SMLA[BT][BT].  */
10067                   if (speed_p)
10068                     *cost += extra_cost->mult[0].extend_add;
10069                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
10070                                       SIGN_EXTEND, 0, speed_p)
10071                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
10072                                         SIGN_EXTEND, 0, speed_p)
10073                             + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10074                   return true;
10075                 }
10076
10077               if (speed_p)
10078                 *cost += extra_cost->mult[0].add;
10079               *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
10080                         + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
10081                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10082               return true;
10083             }
10084           if (CONST_INT_P (XEXP (x, 1)))
10085             {
10086               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10087                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10088                                             NULL_RTX, 1, 0);
10089               *cost = COSTS_N_INSNS (insns);
10090               if (speed_p)
10091                 *cost += insns * extra_cost->alu.arith;
10092               *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
10093               return true;
10094             }
10095           else if (speed_p)
10096             *cost += extra_cost->alu.arith;
10097
10098           return false;
10099         }
10100
10101       if (mode == DImode)
10102         {
10103           if (arm_arch3m
10104               && GET_CODE (XEXP (x, 0)) == MULT
10105               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10106                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10107                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10108                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10109             {
10110               *cost = COSTS_N_INSNS (1);
10111               if (speed_p)
10112                 *cost += extra_cost->mult[1].extend_add;
10113               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10114                                   ZERO_EXTEND, 0, speed_p)
10115                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10116                                     ZERO_EXTEND, 0, speed_p)
10117                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10118               return true;
10119             }
10120
10121           *cost = COSTS_N_INSNS (2);
10122
10123           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10124               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10125             {
10126               if (speed_p)
10127                 *cost += (extra_cost->alu.arith
10128                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10129                              ? extra_cost->alu.arith
10130                              : extra_cost->alu.arith_shift));
10131
10132               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10133                                   speed_p)
10134                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10135               return true;
10136             }
10137
10138           if (speed_p)
10139             *cost += 2 * extra_cost->alu.arith;
10140           return false;
10141         }
10142
10143       /* Vector mode?  */
10144       *cost = LIBCALL_COST (2);
10145       return false;
10146     case IOR:
10147       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10148         {
10149           *cost = COSTS_N_INSNS (1);
10150           if (speed_p)
10151             *cost += extra_cost->alu.rev;
10152
10153           return true;
10154         }
10155     /* Fall through.  */
10156     case AND: case XOR:
10157       if (mode == SImode)
10158         {
10159           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10160           rtx op0 = XEXP (x, 0);
10161           rtx shift_op, shift_reg;
10162
10163           *cost = COSTS_N_INSNS (1);
10164
10165           if (subcode == NOT
10166               && (code == AND
10167                   || (code == IOR && TARGET_THUMB2)))
10168             op0 = XEXP (op0, 0);
10169
10170           shift_reg = NULL;
10171           shift_op = shifter_op_p (op0, &shift_reg);
10172           if (shift_op != NULL)
10173             {
10174               if (shift_reg)
10175                 {
10176                   if (speed_p)
10177                     *cost += extra_cost->alu.log_shift_reg;
10178                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10179                 }
10180               else if (speed_p)
10181                 *cost += extra_cost->alu.log_shift;
10182
10183               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10184                         + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10185               return true;
10186             }
10187
10188           if (CONST_INT_P (XEXP (x, 1)))
10189             {
10190               int insns = arm_gen_constant (code, SImode, NULL_RTX,
10191                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10192                                             NULL_RTX, 1, 0);
10193
10194               *cost = COSTS_N_INSNS (insns);
10195               if (speed_p)
10196                 *cost += insns * extra_cost->alu.logical;
10197               *cost += rtx_cost (op0, code, 0, speed_p);
10198               return true;
10199             }
10200
10201           if (speed_p)
10202             *cost += extra_cost->alu.logical;
10203           *cost += (rtx_cost (op0, code, 0, speed_p)
10204                     + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10205           return true;
10206         }
10207
10208       if (mode == DImode)
10209         {
10210           rtx op0 = XEXP (x, 0);
10211           enum rtx_code subcode = GET_CODE (op0);
10212
10213           *cost = COSTS_N_INSNS (2);
10214
10215           if (subcode == NOT
10216               && (code == AND
10217                   || (code == IOR && TARGET_THUMB2)))
10218             op0 = XEXP (op0, 0);
10219
10220           if (GET_CODE (op0) == ZERO_EXTEND)
10221             {
10222               if (speed_p)
10223                 *cost += 2 * extra_cost->alu.logical;
10224
10225               *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10226                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10227               return true;
10228             }
10229           else if (GET_CODE (op0) == SIGN_EXTEND)
10230             {
10231               if (speed_p)
10232                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10233
10234               *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10235                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10236               return true;
10237             }
10238
10239           if (speed_p)
10240             *cost += 2 * extra_cost->alu.logical;
10241
10242           return true;
10243         }
10244       /* Vector mode?  */
10245
10246       *cost = LIBCALL_COST (2);
10247       return false;
10248
10249     case MULT:
10250       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10251           && (mode == SFmode || !TARGET_VFP_SINGLE))
10252         {
10253           rtx op0 = XEXP (x, 0);
10254
10255           *cost = COSTS_N_INSNS (1);
10256
10257           if (GET_CODE (op0) == NEG)
10258             op0 = XEXP (op0, 0);
10259
10260           if (speed_p)
10261             *cost += extra_cost->fp[mode != SFmode].mult;
10262
10263           *cost += (rtx_cost (op0, MULT, 0, speed_p)
10264                     + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10265           return true;
10266         }
10267       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10268         {
10269           *cost = LIBCALL_COST (2);
10270           return false;
10271         }
10272
10273       if (mode == SImode)
10274         {
10275           *cost = COSTS_N_INSNS (1);
10276           if (TARGET_DSP_MULTIPLY
10277               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10278                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10279                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10280                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10281                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10282                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10283                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10284                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10285                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10286                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10287                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10288                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10289                                   == 16))))))
10290             {
10291               /* SMUL[TB][TB].  */
10292               if (speed_p)
10293                 *cost += extra_cost->mult[0].extend;
10294               *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10295                         + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10296               return true;
10297             }
10298           if (speed_p)
10299             *cost += extra_cost->mult[0].simple;
10300           return false;
10301         }
10302
10303       if (mode == DImode)
10304         {
10305           if (arm_arch3m
10306               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10307                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10308                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10309                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10310             {
10311               *cost = COSTS_N_INSNS (1);
10312               if (speed_p)
10313                 *cost += extra_cost->mult[1].extend;
10314               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10315                                   ZERO_EXTEND, 0, speed_p)
10316                         + rtx_cost (XEXP (XEXP (x, 1), 0),
10317                                     ZERO_EXTEND, 0, speed_p));
10318               return true;
10319             }
10320
10321           *cost = LIBCALL_COST (2);
10322           return false;
10323         }
10324
10325       /* Vector mode?  */
10326       *cost = LIBCALL_COST (2);
10327       return false;
10328
10329     case NEG:
10330       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10331           && (mode == SFmode || !TARGET_VFP_SINGLE))
10332         {
10333           *cost = COSTS_N_INSNS (1);
10334           if (speed_p)
10335             *cost += extra_cost->fp[mode != SFmode].neg;
10336
10337           return false;
10338         }
10339       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10340         {
10341           *cost = LIBCALL_COST (1);
10342           return false;
10343         }
10344
10345       if (mode == SImode)
10346         {
10347           if (GET_CODE (XEXP (x, 0)) == ABS)
10348             {
10349               *cost = COSTS_N_INSNS (2);
10350               /* Assume the non-flag-changing variant.  */
10351               if (speed_p)
10352                 *cost += (extra_cost->alu.log_shift
10353                           + extra_cost->alu.arith_shift);
10354               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10355               return true;
10356             }
10357
10358           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10359               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10360             {
10361               *cost = COSTS_N_INSNS (2);
10362               /* No extra cost for MOV imm and MVN imm.  */
10363               /* If the comparison op is using the flags, there's no further
10364                  cost, otherwise we need to add the cost of the comparison.  */
10365               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10366                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10367                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10368                 {
10369                   *cost += (COSTS_N_INSNS (1)
10370                             + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10371                                         speed_p)
10372                             + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10373                                         speed_p));
10374                   if (speed_p)
10375                     *cost += extra_cost->alu.arith;
10376                 }
10377               return true;
10378             }
10379           *cost = COSTS_N_INSNS (1);
10380           if (speed_p)
10381             *cost += extra_cost->alu.arith;
10382           return false;
10383         }
10384
10385       if (GET_MODE_CLASS (mode) == MODE_INT
10386           && GET_MODE_SIZE (mode) < 4)
10387         {
10388           /* Slightly disparage, as we might need an extend operation.  */
10389           *cost = 1 + COSTS_N_INSNS (1);
10390           if (speed_p)
10391             *cost += extra_cost->alu.arith;
10392           return false;
10393         }
10394
10395       if (mode == DImode)
10396         {
10397           *cost = COSTS_N_INSNS (2);
10398           if (speed_p)
10399             *cost += 2 * extra_cost->alu.arith;
10400           return false;
10401         }
10402
10403       /* Vector mode?  */
10404       *cost = LIBCALL_COST (1);
10405       return false;
10406
10407     case NOT:
10408       if (mode == SImode)
10409         {
10410           rtx shift_op;
10411           rtx shift_reg = NULL;
10412
10413           *cost = COSTS_N_INSNS (1);
10414           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10415
10416           if (shift_op)
10417             {
10418               if (shift_reg != NULL)
10419                 {
10420                   if (speed_p)
10421                     *cost += extra_cost->alu.log_shift_reg;
10422                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10423                 }
10424               else if (speed_p)
10425                 *cost += extra_cost->alu.log_shift;
10426               *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10427               return true;
10428             }
10429
10430           if (speed_p)
10431             *cost += extra_cost->alu.logical;
10432           return false;
10433         }
10434       if (mode == DImode)
10435         {
10436           *cost = COSTS_N_INSNS (2);
10437           return false;
10438         }
10439
10440       /* Vector mode?  */
10441
10442       *cost += LIBCALL_COST (1);
10443       return false;
10444
10445     case IF_THEN_ELSE:
10446       {
10447         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10448           {
10449             *cost = COSTS_N_INSNS (4);
10450             return true;
10451           }
10452         int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10453         int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10454
10455         *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10456         /* Assume that if one arm of the if_then_else is a register,
10457            that it will be tied with the result and eliminate the
10458            conditional insn.  */
10459         if (REG_P (XEXP (x, 1)))
10460           *cost += op2cost;
10461         else if (REG_P (XEXP (x, 2)))
10462           *cost += op1cost;
10463         else
10464           {
10465             if (speed_p)
10466               {
10467                 if (extra_cost->alu.non_exec_costs_exec)
10468                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10469                 else
10470                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10471               }
10472             else
10473               *cost += op1cost + op2cost;
10474           }
10475       }
10476       return true;
10477
10478     case COMPARE:
10479       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10480         *cost = 0;
10481       else
10482         {
10483           machine_mode op0mode;
10484           /* We'll mostly assume that the cost of a compare is the cost of the
10485              LHS.  However, there are some notable exceptions.  */
10486
10487           /* Floating point compares are never done as side-effects.  */
10488           op0mode = GET_MODE (XEXP (x, 0));
10489           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10490               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10491             {
10492               *cost = COSTS_N_INSNS (1);
10493               if (speed_p)
10494                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10495
10496               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10497                 {
10498                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10499                   return true;
10500                 }
10501
10502               return false;
10503             }
10504           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10505             {
10506               *cost = LIBCALL_COST (2);
10507               return false;
10508             }
10509
10510           /* DImode compares normally take two insns.  */
10511           if (op0mode == DImode)
10512             {
10513               *cost = COSTS_N_INSNS (2);
10514               if (speed_p)
10515                 *cost += 2 * extra_cost->alu.arith;
10516               return false;
10517             }
10518
10519           if (op0mode == SImode)
10520             {
10521               rtx shift_op;
10522               rtx shift_reg;
10523
10524               if (XEXP (x, 1) == const0_rtx
10525                   && !(REG_P (XEXP (x, 0))
10526                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10527                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10528                 {
10529                   *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10530
10531                   /* Multiply operations that set the flags are often
10532                      significantly more expensive.  */
10533                   if (speed_p
10534                       && GET_CODE (XEXP (x, 0)) == MULT
10535                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10536                     *cost += extra_cost->mult[0].flag_setting;
10537
10538                   if (speed_p
10539                       && GET_CODE (XEXP (x, 0)) == PLUS
10540                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10541                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10542                                                             0), 1), mode))
10543                     *cost += extra_cost->mult[0].flag_setting;
10544                   return true;
10545                 }
10546
10547               shift_reg = NULL;
10548               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10549               if (shift_op != NULL)
10550                 {
10551                   *cost = COSTS_N_INSNS (1);
10552                   if (shift_reg != NULL)
10553                     {
10554                       *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10555                       if (speed_p)
10556                         *cost += extra_cost->alu.arith_shift_reg;
10557                     }
10558                   else if (speed_p)
10559                     *cost += extra_cost->alu.arith_shift;
10560                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10561                             + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10562                   return true;
10563                 }
10564
10565               *cost = COSTS_N_INSNS (1);
10566               if (speed_p)
10567                 *cost += extra_cost->alu.arith;
10568               if (CONST_INT_P (XEXP (x, 1))
10569                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10570                 {
10571                   *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10572                   return true;
10573                 }
10574               return false;
10575             }
10576
10577           /* Vector mode?  */
10578
10579           *cost = LIBCALL_COST (2);
10580           return false;
10581         }
10582       return true;
10583
10584     case EQ:
10585     case NE:
10586     case LT:
10587     case LE:
10588     case GT:
10589     case GE:
10590     case LTU:
10591     case LEU:
10592     case GEU:
10593     case GTU:
10594     case ORDERED:
10595     case UNORDERED:
10596     case UNEQ:
10597     case UNLE:
10598     case UNLT:
10599     case UNGE:
10600     case UNGT:
10601     case LTGT:
10602       if (outer_code == SET)
10603         {
10604           /* Is it a store-flag operation?  */
10605           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10606               && XEXP (x, 1) == const0_rtx)
10607             {
10608               /* Thumb also needs an IT insn.  */
10609               *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10610               return true;
10611             }
10612           if (XEXP (x, 1) == const0_rtx)
10613             {
10614               switch (code)
10615                 {
10616                 case LT:
10617                   /* LSR Rd, Rn, #31.  */
10618                   *cost = COSTS_N_INSNS (1);
10619                   if (speed_p)
10620                     *cost += extra_cost->alu.shift;
10621                   break;
10622
10623                 case EQ:
10624                   /* RSBS T1, Rn, #0
10625                      ADC  Rd, Rn, T1.  */
10626
10627                 case NE:
10628                   /* SUBS T1, Rn, #1
10629                      SBC  Rd, Rn, T1.  */
10630                   *cost = COSTS_N_INSNS (2);
10631                   break;
10632
10633                 case LE:
10634                   /* RSBS T1, Rn, Rn, LSR #31
10635                      ADC  Rd, Rn, T1. */
10636                   *cost = COSTS_N_INSNS (2);
10637                   if (speed_p)
10638                     *cost += extra_cost->alu.arith_shift;
10639                   break;
10640
10641                 case GT:
10642                   /* RSB  Rd, Rn, Rn, ASR #1
10643                      LSR  Rd, Rd, #31.  */
10644                   *cost = COSTS_N_INSNS (2);
10645                   if (speed_p)
10646                     *cost += (extra_cost->alu.arith_shift
10647                               + extra_cost->alu.shift);
10648                   break;
10649
10650                 case GE:
10651                   /* ASR  Rd, Rn, #31
10652                      ADD  Rd, Rn, #1.  */
10653                   *cost = COSTS_N_INSNS (2);
10654                   if (speed_p)
10655                     *cost += extra_cost->alu.shift;
10656                   break;
10657
10658                 default:
10659                   /* Remaining cases are either meaningless or would take
10660                      three insns anyway.  */
10661                   *cost = COSTS_N_INSNS (3);
10662                   break;
10663                 }
10664               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10665               return true;
10666             }
10667           else
10668             {
10669               *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10670               if (CONST_INT_P (XEXP (x, 1))
10671                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10672                 {
10673                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10674                   return true;
10675                 }
10676
10677               return false;
10678             }
10679         }
10680       /* Not directly inside a set.  If it involves the condition code
10681          register it must be the condition for a branch, cond_exec or
10682          I_T_E operation.  Since the comparison is performed elsewhere
10683          this is just the control part which has no additional
10684          cost.  */
10685       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10686                && XEXP (x, 1) == const0_rtx)
10687         {
10688           *cost = 0;
10689           return true;
10690         }
10691       return false;
10692
10693     case ABS:
10694       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10695           && (mode == SFmode || !TARGET_VFP_SINGLE))
10696         {
10697           *cost = COSTS_N_INSNS (1);
10698           if (speed_p)
10699             *cost += extra_cost->fp[mode != SFmode].neg;
10700
10701           return false;
10702         }
10703       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10704         {
10705           *cost = LIBCALL_COST (1);
10706           return false;
10707         }
10708
10709       if (mode == SImode)
10710         {
10711           *cost = COSTS_N_INSNS (1);
10712           if (speed_p)
10713             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10714           return false;
10715         }
10716       /* Vector mode?  */
10717       *cost = LIBCALL_COST (1);
10718       return false;
10719
10720     case SIGN_EXTEND:
10721       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10722           && MEM_P (XEXP (x, 0)))
10723         {
10724           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10725
10726           if (mode == DImode)
10727             *cost += COSTS_N_INSNS (1);
10728
10729           if (!speed_p)
10730             return true;
10731
10732           if (GET_MODE (XEXP (x, 0)) == SImode)
10733             *cost += extra_cost->ldst.load;
10734           else
10735             *cost += extra_cost->ldst.load_sign_extend;
10736
10737           if (mode == DImode)
10738             *cost += extra_cost->alu.shift;
10739
10740           return true;
10741         }
10742
10743       /* Widening from less than 32-bits requires an extend operation.  */
10744       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10745         {
10746           /* We have SXTB/SXTH.  */
10747           *cost = COSTS_N_INSNS (1);
10748           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10749           if (speed_p)
10750             *cost += extra_cost->alu.extend;
10751         }
10752       else if (GET_MODE (XEXP (x, 0)) != SImode)
10753         {
10754           /* Needs two shifts.  */
10755           *cost = COSTS_N_INSNS (2);
10756           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10757           if (speed_p)
10758             *cost += 2 * extra_cost->alu.shift;
10759         }
10760
10761       /* Widening beyond 32-bits requires one more insn.  */
10762       if (mode == DImode)
10763         {
10764           *cost += COSTS_N_INSNS (1);
10765           if (speed_p)
10766             *cost += extra_cost->alu.shift;
10767         }
10768
10769       return true;
10770
10771     case ZERO_EXTEND:
10772       if ((arm_arch4
10773            || GET_MODE (XEXP (x, 0)) == SImode
10774            || GET_MODE (XEXP (x, 0)) == QImode)
10775           && MEM_P (XEXP (x, 0)))
10776         {
10777           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10778
10779           if (mode == DImode)
10780             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10781
10782           return true;
10783         }
10784
10785       /* Widening from less than 32-bits requires an extend operation.  */
10786       if (GET_MODE (XEXP (x, 0)) == QImode)
10787         {
10788           /* UXTB can be a shorter instruction in Thumb2, but it might
10789              be slower than the AND Rd, Rn, #255 alternative.  When
10790              optimizing for speed it should never be slower to use
10791              AND, and we don't really model 16-bit vs 32-bit insns
10792              here.  */
10793           *cost = COSTS_N_INSNS (1);
10794           if (speed_p)
10795             *cost += extra_cost->alu.logical;
10796         }
10797       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10798         {
10799           /* We have UXTB/UXTH.  */
10800           *cost = COSTS_N_INSNS (1);
10801           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10802           if (speed_p)
10803             *cost += extra_cost->alu.extend;
10804         }
10805       else if (GET_MODE (XEXP (x, 0)) != SImode)
10806         {
10807           /* Needs two shifts.  It's marginally preferable to use
10808              shifts rather than two BIC instructions as the second
10809              shift may merge with a subsequent insn as a shifter
10810              op.  */
10811           *cost = COSTS_N_INSNS (2);
10812           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10813           if (speed_p)
10814             *cost += 2 * extra_cost->alu.shift;
10815         }
10816       else  /* GET_MODE (XEXP (x, 0)) == SImode.  */
10817         *cost = COSTS_N_INSNS (1);
10818
10819       /* Widening beyond 32-bits requires one more insn.  */
10820       if (mode == DImode)
10821         {
10822           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10823         }
10824
10825       return true;
10826
10827     case CONST_INT:
10828       *cost = 0;
10829       /* CONST_INT has no mode, so we cannot tell for sure how many
10830          insns are really going to be needed.  The best we can do is
10831          look at the value passed.  If it fits in SImode, then assume
10832          that's the mode it will be used for.  Otherwise assume it
10833          will be used in DImode.  */
10834       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10835         mode = SImode;
10836       else
10837         mode = DImode;
10838
10839       /* Avoid blowing up in arm_gen_constant ().  */
10840       if (!(outer_code == PLUS
10841             || outer_code == AND
10842             || outer_code == IOR
10843             || outer_code == XOR
10844             || outer_code == MINUS))
10845         outer_code = SET;
10846
10847     const_int_cost:
10848       if (mode == SImode)
10849         {
10850           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10851                                                     INTVAL (x), NULL, NULL,
10852                                                     0, 0));
10853           /* Extra costs?  */
10854         }
10855       else
10856         {
10857           *cost += COSTS_N_INSNS (arm_gen_constant
10858                                   (outer_code, SImode, NULL,
10859                                    trunc_int_for_mode (INTVAL (x), SImode),
10860                                    NULL, NULL, 0, 0)
10861                                   + arm_gen_constant (outer_code, SImode, NULL,
10862                                                       INTVAL (x) >> 32, NULL,
10863                                                       NULL, 0, 0));
10864           /* Extra costs?  */
10865         }
10866
10867       return true;
10868
10869     case CONST:
10870     case LABEL_REF:
10871     case SYMBOL_REF:
10872       if (speed_p)
10873         {
10874           if (arm_arch_thumb2 && !flag_pic)
10875             *cost = COSTS_N_INSNS (2);
10876           else
10877             *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10878         }
10879       else
10880         *cost = COSTS_N_INSNS (2);
10881
10882       if (flag_pic)
10883         {
10884           *cost += COSTS_N_INSNS (1);
10885           if (speed_p)
10886             *cost += extra_cost->alu.arith;
10887         }
10888
10889       return true;
10890
10891     case CONST_FIXED:
10892       *cost = COSTS_N_INSNS (4);
10893       /* Fixme.  */
10894       return true;
10895
10896     case CONST_DOUBLE:
10897       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10898           && (mode == SFmode || !TARGET_VFP_SINGLE))
10899         {
10900           if (vfp3_const_double_rtx (x))
10901             {
10902               *cost = COSTS_N_INSNS (1);
10903               if (speed_p)
10904                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10905               return true;
10906             }
10907
10908           if (speed_p)
10909             {
10910               *cost = COSTS_N_INSNS (1);
10911               if (mode == DFmode)
10912                 *cost += extra_cost->ldst.loadd;
10913               else
10914                 *cost += extra_cost->ldst.loadf;
10915             }
10916           else
10917             *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10918
10919           return true;
10920         }
10921       *cost = COSTS_N_INSNS (4);
10922       return true;
10923
10924     case CONST_VECTOR:
10925       /* Fixme.  */
10926       if (TARGET_NEON
10927           && TARGET_HARD_FLOAT
10928           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10929           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10930         *cost = COSTS_N_INSNS (1);
10931       else
10932         *cost = COSTS_N_INSNS (4);
10933       return true;
10934
10935     case HIGH:
10936     case LO_SUM:
10937       *cost = COSTS_N_INSNS (1);
10938       /* When optimizing for size, we prefer constant pool entries to
10939          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10940       if (!speed_p)
10941         *cost += 1;
10942       return true;
10943
10944     case CLZ:
10945       *cost = COSTS_N_INSNS (1);
10946       if (speed_p)
10947         *cost += extra_cost->alu.clz;
10948       return false;
10949
10950     case SMIN:
10951       if (XEXP (x, 1) == const0_rtx)
10952         {
10953           *cost = COSTS_N_INSNS (1);
10954           if (speed_p)
10955             *cost += extra_cost->alu.log_shift;
10956           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10957           return true;
10958         }
10959       /* Fall through.  */
10960     case SMAX:
10961     case UMIN:
10962     case UMAX:
10963       *cost = COSTS_N_INSNS (2);
10964       return false;
10965
10966     case TRUNCATE:
10967       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10968           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10969           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10970           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10971           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10972                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10973               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10974                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10975                       == ZERO_EXTEND))))
10976         {
10977           *cost = COSTS_N_INSNS (1);
10978           if (speed_p)
10979             *cost += extra_cost->mult[1].extend;
10980           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10981                               speed_p)
10982                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10983                                 0, speed_p));
10984           return true;
10985         }
10986       *cost = LIBCALL_COST (1);
10987       return false;
10988
10989     case UNSPEC:
10990       return arm_unspec_cost (x, outer_code, speed_p, cost);
10991
10992     case PC:
10993       /* Reading the PC is like reading any other register.  Writing it
10994          is more expensive, but we take that into account elsewhere.  */
10995       *cost = 0;
10996       return true;
10997
10998     case ZERO_EXTRACT:
10999       /* TODO: Simple zero_extract of bottom bits using AND.  */
11000       /* Fall through.  */
11001     case SIGN_EXTRACT:
11002       if (arm_arch6
11003           && mode == SImode
11004           && CONST_INT_P (XEXP (x, 1))
11005           && CONST_INT_P (XEXP (x, 2)))
11006         {
11007           *cost = COSTS_N_INSNS (1);
11008           if (speed_p)
11009             *cost += extra_cost->alu.bfx;
11010           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11011           return true;
11012         }
11013       /* Without UBFX/SBFX, need to resort to shift operations.  */
11014       *cost = COSTS_N_INSNS (2);
11015       if (speed_p)
11016         *cost += 2 * extra_cost->alu.shift;
11017       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
11018       return true;
11019
11020     case FLOAT_EXTEND:
11021       if (TARGET_HARD_FLOAT)
11022         {
11023           *cost = COSTS_N_INSNS (1);
11024           if (speed_p)
11025             *cost += extra_cost->fp[mode == DFmode].widen;
11026           if (!TARGET_FPU_ARMV8
11027               && GET_MODE (XEXP (x, 0)) == HFmode)
11028             {
11029               /* Pre v8, widening HF->DF is a two-step process, first
11030                  widening to SFmode.  */
11031               *cost += COSTS_N_INSNS (1);
11032               if (speed_p)
11033                 *cost += extra_cost->fp[0].widen;
11034             }
11035           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11036           return true;
11037         }
11038
11039       *cost = LIBCALL_COST (1);
11040       return false;
11041
11042     case FLOAT_TRUNCATE:
11043       if (TARGET_HARD_FLOAT)
11044         {
11045           *cost = COSTS_N_INSNS (1);
11046           if (speed_p)
11047             *cost += extra_cost->fp[mode == DFmode].narrow;
11048           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11049           return true;
11050           /* Vector modes?  */
11051         }
11052       *cost = LIBCALL_COST (1);
11053       return false;
11054
11055     case FMA:
11056       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11057         {
11058           rtx op0 = XEXP (x, 0);
11059           rtx op1 = XEXP (x, 1);
11060           rtx op2 = XEXP (x, 2);
11061
11062           *cost = COSTS_N_INSNS (1);
11063
11064           /* vfms or vfnma.  */
11065           if (GET_CODE (op0) == NEG)
11066             op0 = XEXP (op0, 0);
11067
11068           /* vfnms or vfnma.  */
11069           if (GET_CODE (op2) == NEG)
11070             op2 = XEXP (op2, 0);
11071
11072           *cost += rtx_cost (op0, FMA, 0, speed_p);
11073           *cost += rtx_cost (op1, FMA, 1, speed_p);
11074           *cost += rtx_cost (op2, FMA, 2, speed_p);
11075
11076           if (speed_p)
11077             *cost += extra_cost->fp[mode ==DFmode].fma;
11078
11079           return true;
11080         }
11081
11082       *cost = LIBCALL_COST (3);
11083       return false;
11084
11085     case FIX:
11086     case UNSIGNED_FIX:
11087       if (TARGET_HARD_FLOAT)
11088         {
11089           if (GET_MODE_CLASS (mode) == MODE_INT)
11090             {
11091               *cost = COSTS_N_INSNS (1);
11092               if (speed_p)
11093                 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
11094               /* Strip of the 'cost' of rounding towards zero.  */
11095               if (GET_CODE (XEXP (x, 0)) == FIX)
11096                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11097               else
11098                 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11099               /* ??? Increase the cost to deal with transferring from
11100                  FP -> CORE registers?  */
11101               return true;
11102             }
11103           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11104                    && TARGET_FPU_ARMV8)
11105             {
11106               *cost = COSTS_N_INSNS (1);
11107               if (speed_p)
11108                 *cost += extra_cost->fp[mode == DFmode].roundint;
11109               return false;
11110             }
11111           /* Vector costs? */
11112         }
11113       *cost = LIBCALL_COST (1);
11114       return false;
11115
11116     case FLOAT:
11117     case UNSIGNED_FLOAT:
11118       if (TARGET_HARD_FLOAT)
11119         {
11120           /* ??? Increase the cost to deal with transferring from CORE
11121              -> FP registers?  */
11122           *cost = COSTS_N_INSNS (1);
11123           if (speed_p)
11124             *cost += extra_cost->fp[mode == DFmode].fromint;
11125           return false;
11126         }
11127       *cost = LIBCALL_COST (1);
11128       return false;
11129
11130     case CALL:
11131       *cost = COSTS_N_INSNS (1);
11132       return true;
11133
11134     case ASM_OPERANDS:
11135       {
11136       /* Just a guess.  Guess number of instructions in the asm
11137          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11138          though (see PR60663).  */
11139         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11140         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11141
11142         *cost = COSTS_N_INSNS (asm_length + num_operands);
11143         return true;
11144       }
11145     default:
11146       if (mode != VOIDmode)
11147         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11148       else
11149         *cost = COSTS_N_INSNS (4); /* Who knows?  */
11150       return false;
11151     }
11152 }
11153
11154 #undef HANDLE_NARROW_SHIFT_ARITH
11155
11156 /* RTX costs when optimizing for size.  */
11157 static bool
11158 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11159                int *total, bool speed)
11160 {
11161   bool result;
11162
11163   if (TARGET_OLD_RTX_COSTS
11164       || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11165     {
11166       /* Old way.  (Deprecated.)  */
11167       if (!speed)
11168         result = arm_size_rtx_costs (x, (enum rtx_code) code,
11169                                      (enum rtx_code) outer_code, total);
11170       else
11171         result = current_tune->rtx_costs (x,  (enum rtx_code) code,
11172                                           (enum rtx_code) outer_code, total,
11173                                           speed);
11174     }
11175   else
11176     {
11177     /* New way.  */
11178       if (current_tune->insn_extra_cost)
11179         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11180                                      (enum rtx_code) outer_code,
11181                                      current_tune->insn_extra_cost,
11182                                      total, speed);
11183     /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11184        && current_tune->insn_extra_cost != NULL  */
11185       else
11186         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11187                                     (enum rtx_code) outer_code,
11188                                     &generic_extra_costs, total, speed);
11189     }
11190
11191   if (dump_file && (dump_flags & TDF_DETAILS))
11192     {
11193       print_rtl_single (dump_file, x);
11194       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11195                *total, result ? "final" : "partial");
11196     }
11197   return result;
11198 }
11199
11200 /* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
11201    supported on any "slowmul" cores, so it can be ignored.  */
11202
11203 static bool
11204 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11205                        int *total, bool speed)
11206 {
11207   machine_mode mode = GET_MODE (x);
11208
11209   if (TARGET_THUMB)
11210     {
11211       *total = thumb1_rtx_costs (x, code, outer_code);
11212       return true;
11213     }
11214
11215   switch (code)
11216     {
11217     case MULT:
11218       if (GET_MODE_CLASS (mode) == MODE_FLOAT
11219           || mode == DImode)
11220         {
11221           *total = COSTS_N_INSNS (20);
11222           return false;
11223         }
11224
11225       if (CONST_INT_P (XEXP (x, 1)))
11226         {
11227           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11228                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11229           int cost, const_ok = const_ok_for_arm (i);
11230           int j, booth_unit_size;
11231
11232           /* Tune as appropriate.  */
11233           cost = const_ok ? 4 : 8;
11234           booth_unit_size = 2;
11235           for (j = 0; i && j < 32; j += booth_unit_size)
11236             {
11237               i >>= booth_unit_size;
11238               cost++;
11239             }
11240
11241           *total = COSTS_N_INSNS (cost);
11242           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11243           return true;
11244         }
11245
11246       *total = COSTS_N_INSNS (20);
11247       return false;
11248
11249     default:
11250       return arm_rtx_costs_1 (x, outer_code, total, speed);;
11251     }
11252 }
11253
11254
11255 /* RTX cost for cores with a fast multiply unit (M variants).  */
11256
11257 static bool
11258 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11259                        int *total, bool speed)
11260 {
11261   machine_mode mode = GET_MODE (x);
11262
11263   if (TARGET_THUMB1)
11264     {
11265       *total = thumb1_rtx_costs (x, code, outer_code);
11266       return true;
11267     }
11268
11269   /* ??? should thumb2 use different costs?  */
11270   switch (code)
11271     {
11272     case MULT:
11273       /* There is no point basing this on the tuning, since it is always the
11274          fast variant if it exists at all.  */
11275       if (mode == DImode
11276           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11277           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11278               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11279         {
11280           *total = COSTS_N_INSNS(2);
11281           return false;
11282         }
11283
11284
11285       if (mode == DImode)
11286         {
11287           *total = COSTS_N_INSNS (5);
11288           return false;
11289         }
11290
11291       if (CONST_INT_P (XEXP (x, 1)))
11292         {
11293           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11294                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11295           int cost, const_ok = const_ok_for_arm (i);
11296           int j, booth_unit_size;
11297
11298           /* Tune as appropriate.  */
11299           cost = const_ok ? 4 : 8;
11300           booth_unit_size = 8;
11301           for (j = 0; i && j < 32; j += booth_unit_size)
11302             {
11303               i >>= booth_unit_size;
11304               cost++;
11305             }
11306
11307           *total = COSTS_N_INSNS(cost);
11308           return false;
11309         }
11310
11311       if (mode == SImode)
11312         {
11313           *total = COSTS_N_INSNS (4);
11314           return false;
11315         }
11316
11317       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11318         {
11319           if (TARGET_HARD_FLOAT
11320               && (mode == SFmode
11321                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11322             {
11323               *total = COSTS_N_INSNS (1);
11324               return false;
11325             }
11326         }
11327
11328       /* Requires a lib call */
11329       *total = COSTS_N_INSNS (20);
11330       return false;
11331
11332     default:
11333       return arm_rtx_costs_1 (x, outer_code, total, speed);
11334     }
11335 }
11336
11337
11338 /* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
11339    so it can be ignored.  */
11340
11341 static bool
11342 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11343                       int *total, bool speed)
11344 {
11345   machine_mode mode = GET_MODE (x);
11346
11347   if (TARGET_THUMB)
11348     {
11349       *total = thumb1_rtx_costs (x, code, outer_code);
11350       return true;
11351     }
11352
11353   switch (code)
11354     {
11355     case COMPARE:
11356       if (GET_CODE (XEXP (x, 0)) != MULT)
11357         return arm_rtx_costs_1 (x, outer_code, total, speed);
11358
11359       /* A COMPARE of a MULT is slow on XScale; the muls instruction
11360          will stall until the multiplication is complete.  */
11361       *total = COSTS_N_INSNS (3);
11362       return false;
11363
11364     case MULT:
11365       /* There is no point basing this on the tuning, since it is always the
11366          fast variant if it exists at all.  */
11367       if (mode == DImode
11368           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11369           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11370               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11371         {
11372           *total = COSTS_N_INSNS (2);
11373           return false;
11374         }
11375
11376
11377       if (mode == DImode)
11378         {
11379           *total = COSTS_N_INSNS (5);
11380           return false;
11381         }
11382
11383       if (CONST_INT_P (XEXP (x, 1)))
11384         {
11385           /* If operand 1 is a constant we can more accurately
11386              calculate the cost of the multiply.  The multiplier can
11387              retire 15 bits on the first cycle and a further 12 on the
11388              second.  We do, of course, have to load the constant into
11389              a register first.  */
11390           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11391           /* There's a general overhead of one cycle.  */
11392           int cost = 1;
11393           unsigned HOST_WIDE_INT masked_const;
11394
11395           if (i & 0x80000000)
11396             i = ~i;
11397
11398           i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11399
11400           masked_const = i & 0xffff8000;
11401           if (masked_const != 0)
11402             {
11403               cost++;
11404               masked_const = i & 0xf8000000;
11405               if (masked_const != 0)
11406                 cost++;
11407             }
11408           *total = COSTS_N_INSNS (cost);
11409           return false;
11410         }
11411
11412       if (mode == SImode)
11413         {
11414           *total = COSTS_N_INSNS (3);
11415           return false;
11416         }
11417
11418       /* Requires a lib call */
11419       *total = COSTS_N_INSNS (20);
11420       return false;
11421
11422     default:
11423       return arm_rtx_costs_1 (x, outer_code, total, speed);
11424     }
11425 }
11426
11427
11428 /* RTX costs for 9e (and later) cores.  */
11429
11430 static bool
11431 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11432                   int *total, bool speed)
11433 {
11434   machine_mode mode = GET_MODE (x);
11435
11436   if (TARGET_THUMB1)
11437     {
11438       switch (code)
11439         {
11440         case MULT:
11441           /* Small multiply: 32 cycles for an integer multiply inst.  */
11442           if (arm_arch6m && arm_m_profile_small_mul)
11443             *total = COSTS_N_INSNS (32);
11444           else
11445             *total = COSTS_N_INSNS (3);
11446           return true;
11447
11448         default:
11449           *total = thumb1_rtx_costs (x, code, outer_code);
11450           return true;
11451         }
11452     }
11453
11454   switch (code)
11455     {
11456     case MULT:
11457       /* There is no point basing this on the tuning, since it is always the
11458          fast variant if it exists at all.  */
11459       if (mode == DImode
11460           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11461           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11462               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11463         {
11464           *total = COSTS_N_INSNS (2);
11465           return false;
11466         }
11467
11468
11469       if (mode == DImode)
11470         {
11471           *total = COSTS_N_INSNS (5);
11472           return false;
11473         }
11474
11475       if (mode == SImode)
11476         {
11477           *total = COSTS_N_INSNS (2);
11478           return false;
11479         }
11480
11481       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11482         {
11483           if (TARGET_HARD_FLOAT
11484               && (mode == SFmode
11485                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11486             {
11487               *total = COSTS_N_INSNS (1);
11488               return false;
11489             }
11490         }
11491
11492       *total = COSTS_N_INSNS (20);
11493       return false;
11494
11495     default:
11496       return arm_rtx_costs_1 (x, outer_code, total, speed);
11497     }
11498 }
11499 /* All address computations that can be done are free, but rtx cost returns
11500    the same for practically all of them.  So we weight the different types
11501    of address here in the order (most pref first):
11502    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11503 static inline int
11504 arm_arm_address_cost (rtx x)
11505 {
11506   enum rtx_code c  = GET_CODE (x);
11507
11508   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11509     return 0;
11510   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11511     return 10;
11512
11513   if (c == PLUS)
11514     {
11515       if (CONST_INT_P (XEXP (x, 1)))
11516         return 2;
11517
11518       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11519         return 3;
11520
11521       return 4;
11522     }
11523
11524   return 6;
11525 }
11526
11527 static inline int
11528 arm_thumb_address_cost (rtx x)
11529 {
11530   enum rtx_code c  = GET_CODE (x);
11531
11532   if (c == REG)
11533     return 1;
11534   if (c == PLUS
11535       && REG_P (XEXP (x, 0))
11536       && CONST_INT_P (XEXP (x, 1)))
11537     return 1;
11538
11539   return 2;
11540 }
11541
11542 static int
11543 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11544                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11545 {
11546   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11547 }
11548
11549 /* Adjust cost hook for XScale.  */
11550 static bool
11551 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11552 {
11553   /* Some true dependencies can have a higher cost depending
11554      on precisely how certain input operands are used.  */
11555   if (REG_NOTE_KIND(link) == 0
11556       && recog_memoized (insn) >= 0
11557       && recog_memoized (dep) >= 0)
11558     {
11559       int shift_opnum = get_attr_shift (insn);
11560       enum attr_type attr_type = get_attr_type (dep);
11561
11562       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11563          operand for INSN.  If we have a shifted input operand and the
11564          instruction we depend on is another ALU instruction, then we may
11565          have to account for an additional stall.  */
11566       if (shift_opnum != 0
11567           && (attr_type == TYPE_ALU_SHIFT_IMM
11568               || attr_type == TYPE_ALUS_SHIFT_IMM
11569               || attr_type == TYPE_LOGIC_SHIFT_IMM
11570               || attr_type == TYPE_LOGICS_SHIFT_IMM
11571               || attr_type == TYPE_ALU_SHIFT_REG
11572               || attr_type == TYPE_ALUS_SHIFT_REG
11573               || attr_type == TYPE_LOGIC_SHIFT_REG
11574               || attr_type == TYPE_LOGICS_SHIFT_REG
11575               || attr_type == TYPE_MOV_SHIFT
11576               || attr_type == TYPE_MVN_SHIFT
11577               || attr_type == TYPE_MOV_SHIFT_REG
11578               || attr_type == TYPE_MVN_SHIFT_REG))
11579         {
11580           rtx shifted_operand;
11581           int opno;
11582
11583           /* Get the shifted operand.  */
11584           extract_insn (insn);
11585           shifted_operand = recog_data.operand[shift_opnum];
11586
11587           /* Iterate over all the operands in DEP.  If we write an operand
11588              that overlaps with SHIFTED_OPERAND, then we have increase the
11589              cost of this dependency.  */
11590           extract_insn (dep);
11591           preprocess_constraints (dep);
11592           for (opno = 0; opno < recog_data.n_operands; opno++)
11593             {
11594               /* We can ignore strict inputs.  */
11595               if (recog_data.operand_type[opno] == OP_IN)
11596                 continue;
11597
11598               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11599                                            shifted_operand))
11600                 {
11601                   *cost = 2;
11602                   return false;
11603                 }
11604             }
11605         }
11606     }
11607   return true;
11608 }
11609
11610 /* Adjust cost hook for Cortex A9.  */
11611 static bool
11612 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11613 {
11614   switch (REG_NOTE_KIND (link))
11615     {
11616     case REG_DEP_ANTI:
11617       *cost = 0;
11618       return false;
11619
11620     case REG_DEP_TRUE:
11621     case REG_DEP_OUTPUT:
11622         if (recog_memoized (insn) >= 0
11623             && recog_memoized (dep) >= 0)
11624           {
11625             if (GET_CODE (PATTERN (insn)) == SET)
11626               {
11627                 if (GET_MODE_CLASS
11628                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11629                   || GET_MODE_CLASS
11630                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11631                   {
11632                     enum attr_type attr_type_insn = get_attr_type (insn);
11633                     enum attr_type attr_type_dep = get_attr_type (dep);
11634
11635                     /* By default all dependencies of the form
11636                        s0 = s0 <op> s1
11637                        s0 = s0 <op> s2
11638                        have an extra latency of 1 cycle because
11639                        of the input and output dependency in this
11640                        case. However this gets modeled as an true
11641                        dependency and hence all these checks.  */
11642                     if (REG_P (SET_DEST (PATTERN (insn)))
11643                         && REG_P (SET_DEST (PATTERN (dep)))
11644                         && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11645                                                     SET_DEST (PATTERN (dep))))
11646                       {
11647                         /* FMACS is a special case where the dependent
11648                            instruction can be issued 3 cycles before
11649                            the normal latency in case of an output
11650                            dependency.  */
11651                         if ((attr_type_insn == TYPE_FMACS
11652                              || attr_type_insn == TYPE_FMACD)
11653                             && (attr_type_dep == TYPE_FMACS
11654                                 || attr_type_dep == TYPE_FMACD))
11655                           {
11656                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11657                               *cost = insn_default_latency (dep) - 3;
11658                             else
11659                               *cost = insn_default_latency (dep);
11660                             return false;
11661                           }
11662                         else
11663                           {
11664                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11665                               *cost = insn_default_latency (dep) + 1;
11666                             else
11667                               *cost = insn_default_latency (dep);
11668                           }
11669                         return false;
11670                       }
11671                   }
11672               }
11673           }
11674         break;
11675
11676     default:
11677       gcc_unreachable ();
11678     }
11679
11680   return true;
11681 }
11682
11683 /* Adjust cost hook for FA726TE.  */
11684 static bool
11685 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11686 {
11687   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11688      have penalty of 3.  */
11689   if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11690       && recog_memoized (insn) >= 0
11691       && recog_memoized (dep) >= 0
11692       && get_attr_conds (dep) == CONDS_SET)
11693     {
11694       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11695       if (get_attr_conds (insn) == CONDS_USE
11696           && get_attr_type (insn) != TYPE_BRANCH)
11697         {
11698           *cost = 3;
11699           return false;
11700         }
11701
11702       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11703           || get_attr_conds (insn) == CONDS_USE)
11704         {
11705           *cost = 0;
11706           return false;
11707         }
11708     }
11709
11710   return true;
11711 }
11712
11713 /* Implement TARGET_REGISTER_MOVE_COST.
11714
11715    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11716    it is typically more expensive than a single memory access.  We set
11717    the cost to less than two memory accesses so that floating
11718    point to integer conversion does not go through memory.  */
11719
11720 int
11721 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11722                         reg_class_t from, reg_class_t to)
11723 {
11724   if (TARGET_32BIT)
11725     {
11726       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11727           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11728         return 15;
11729       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11730                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11731         return 4;
11732       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11733         return 20;
11734       else
11735         return 2;
11736     }
11737   else
11738     {
11739       if (from == HI_REGS || to == HI_REGS)
11740         return 4;
11741       else
11742         return 2;
11743     }
11744 }
11745
11746 /* Implement TARGET_MEMORY_MOVE_COST.  */
11747
11748 int
11749 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11750                       bool in ATTRIBUTE_UNUSED)
11751 {
11752   if (TARGET_32BIT)
11753     return 10;
11754   else
11755     {
11756       if (GET_MODE_SIZE (mode) < 4)
11757         return 8;
11758       else
11759         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11760     }
11761 }
11762
11763 /* Vectorizer cost model implementation.  */
11764
11765 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11766 static int
11767 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11768                                 tree vectype,
11769                                 int misalign ATTRIBUTE_UNUSED)
11770 {
11771   unsigned elements;
11772
11773   switch (type_of_cost)
11774     {
11775       case scalar_stmt:
11776         return current_tune->vec_costs->scalar_stmt_cost;
11777
11778       case scalar_load:
11779         return current_tune->vec_costs->scalar_load_cost;
11780
11781       case scalar_store:
11782         return current_tune->vec_costs->scalar_store_cost;
11783
11784       case vector_stmt:
11785         return current_tune->vec_costs->vec_stmt_cost;
11786
11787       case vector_load:
11788         return current_tune->vec_costs->vec_align_load_cost;
11789
11790       case vector_store:
11791         return current_tune->vec_costs->vec_store_cost;
11792
11793       case vec_to_scalar:
11794         return current_tune->vec_costs->vec_to_scalar_cost;
11795
11796       case scalar_to_vec:
11797         return current_tune->vec_costs->scalar_to_vec_cost;
11798
11799       case unaligned_load:
11800         return current_tune->vec_costs->vec_unalign_load_cost;
11801
11802       case unaligned_store:
11803         return current_tune->vec_costs->vec_unalign_store_cost;
11804
11805       case cond_branch_taken:
11806         return current_tune->vec_costs->cond_taken_branch_cost;
11807
11808       case cond_branch_not_taken:
11809         return current_tune->vec_costs->cond_not_taken_branch_cost;
11810
11811       case vec_perm:
11812       case vec_promote_demote:
11813         return current_tune->vec_costs->vec_stmt_cost;
11814
11815       case vec_construct:
11816         elements = TYPE_VECTOR_SUBPARTS (vectype);
11817         return elements / 2 + 1;
11818
11819       default:
11820         gcc_unreachable ();
11821     }
11822 }
11823
11824 /* Implement targetm.vectorize.add_stmt_cost.  */
11825
11826 static unsigned
11827 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11828                    struct _stmt_vec_info *stmt_info, int misalign,
11829                    enum vect_cost_model_location where)
11830 {
11831   unsigned *cost = (unsigned *) data;
11832   unsigned retval = 0;
11833
11834   if (flag_vect_cost_model)
11835     {
11836       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11837       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11838
11839       /* Statements in an inner loop relative to the loop being
11840          vectorized are weighted more heavily.  The value here is
11841          arbitrary and could potentially be improved with analysis.  */
11842       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11843         count *= 50;  /* FIXME.  */
11844
11845       retval = (unsigned) (count * stmt_cost);
11846       cost[where] += retval;
11847     }
11848
11849   return retval;
11850 }
11851
11852 /* Return true if and only if this insn can dual-issue only as older.  */
11853 static bool
11854 cortexa7_older_only (rtx_insn *insn)
11855 {
11856   if (recog_memoized (insn) < 0)
11857     return false;
11858
11859   switch (get_attr_type (insn))
11860     {
11861     case TYPE_ALU_DSP_REG:
11862     case TYPE_ALU_SREG:
11863     case TYPE_ALUS_SREG:
11864     case TYPE_LOGIC_REG:
11865     case TYPE_LOGICS_REG:
11866     case TYPE_ADC_REG:
11867     case TYPE_ADCS_REG:
11868     case TYPE_ADR:
11869     case TYPE_BFM:
11870     case TYPE_REV:
11871     case TYPE_MVN_REG:
11872     case TYPE_SHIFT_IMM:
11873     case TYPE_SHIFT_REG:
11874     case TYPE_LOAD_BYTE:
11875     case TYPE_LOAD1:
11876     case TYPE_STORE1:
11877     case TYPE_FFARITHS:
11878     case TYPE_FADDS:
11879     case TYPE_FFARITHD:
11880     case TYPE_FADDD:
11881     case TYPE_FMOV:
11882     case TYPE_F_CVT:
11883     case TYPE_FCMPS:
11884     case TYPE_FCMPD:
11885     case TYPE_FCONSTS:
11886     case TYPE_FCONSTD:
11887     case TYPE_FMULS:
11888     case TYPE_FMACS:
11889     case TYPE_FMULD:
11890     case TYPE_FMACD:
11891     case TYPE_FDIVS:
11892     case TYPE_FDIVD:
11893     case TYPE_F_MRC:
11894     case TYPE_F_MRRC:
11895     case TYPE_F_FLAG:
11896     case TYPE_F_LOADS:
11897     case TYPE_F_STORES:
11898       return true;
11899     default:
11900       return false;
11901     }
11902 }
11903
11904 /* Return true if and only if this insn can dual-issue as younger.  */
11905 static bool
11906 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11907 {
11908   if (recog_memoized (insn) < 0)
11909     {
11910       if (verbose > 5)
11911         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11912       return false;
11913     }
11914
11915   switch (get_attr_type (insn))
11916     {
11917     case TYPE_ALU_IMM:
11918     case TYPE_ALUS_IMM:
11919     case TYPE_LOGIC_IMM:
11920     case TYPE_LOGICS_IMM:
11921     case TYPE_EXTEND:
11922     case TYPE_MVN_IMM:
11923     case TYPE_MOV_IMM:
11924     case TYPE_MOV_REG:
11925     case TYPE_MOV_SHIFT:
11926     case TYPE_MOV_SHIFT_REG:
11927     case TYPE_BRANCH:
11928     case TYPE_CALL:
11929       return true;
11930     default:
11931       return false;
11932     }
11933 }
11934
11935
11936 /* Look for an instruction that can dual issue only as an older
11937    instruction, and move it in front of any instructions that can
11938    dual-issue as younger, while preserving the relative order of all
11939    other instructions in the ready list.  This is a hueuristic to help
11940    dual-issue in later cycles, by postponing issue of more flexible
11941    instructions.  This heuristic may affect dual issue opportunities
11942    in the current cycle.  */
11943 static void
11944 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11945                         int *n_readyp, int clock)
11946 {
11947   int i;
11948   int first_older_only = -1, first_younger = -1;
11949
11950   if (verbose > 5)
11951     fprintf (file,
11952              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11953              clock,
11954              *n_readyp);
11955
11956   /* Traverse the ready list from the head (the instruction to issue
11957      first), and looking for the first instruction that can issue as
11958      younger and the first instruction that can dual-issue only as
11959      older.  */
11960   for (i = *n_readyp - 1; i >= 0; i--)
11961     {
11962       rtx_insn *insn = ready[i];
11963       if (cortexa7_older_only (insn))
11964         {
11965           first_older_only = i;
11966           if (verbose > 5)
11967             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11968           break;
11969         }
11970       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11971         first_younger = i;
11972     }
11973
11974   /* Nothing to reorder because either no younger insn found or insn
11975      that can dual-issue only as older appears before any insn that
11976      can dual-issue as younger.  */
11977   if (first_younger == -1)
11978     {
11979       if (verbose > 5)
11980         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11981       return;
11982     }
11983
11984   /* Nothing to reorder because no older-only insn in the ready list.  */
11985   if (first_older_only == -1)
11986     {
11987       if (verbose > 5)
11988         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11989       return;
11990     }
11991
11992   /* Move first_older_only insn before first_younger.  */
11993   if (verbose > 5)
11994     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11995              INSN_UID(ready [first_older_only]),
11996              INSN_UID(ready [first_younger]));
11997   rtx_insn *first_older_only_insn = ready [first_older_only];
11998   for (i = first_older_only; i < first_younger; i++)
11999     {
12000       ready[i] = ready[i+1];
12001     }
12002
12003   ready[i] = first_older_only_insn;
12004   return;
12005 }
12006
12007 /* Implement TARGET_SCHED_REORDER. */
12008 static int
12009 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12010                    int clock)
12011 {
12012   switch (arm_tune)
12013     {
12014     case cortexa7:
12015       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12016       break;
12017     default:
12018       /* Do nothing for other cores.  */
12019       break;
12020     }
12021
12022   return arm_issue_rate ();
12023 }
12024
12025 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12026    It corrects the value of COST based on the relationship between
12027    INSN and DEP through the dependence LINK.  It returns the new
12028    value. There is a per-core adjust_cost hook to adjust scheduler costs
12029    and the per-core hook can choose to completely override the generic
12030    adjust_cost function. Only put bits of code into arm_adjust_cost that
12031    are common across all cores.  */
12032 static int
12033 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
12034 {
12035   rtx i_pat, d_pat;
12036
12037  /* When generating Thumb-1 code, we want to place flag-setting operations
12038     close to a conditional branch which depends on them, so that we can
12039     omit the comparison. */
12040   if (TARGET_THUMB1
12041       && REG_NOTE_KIND (link) == 0
12042       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12043       && recog_memoized (dep) >= 0
12044       && get_attr_conds (dep) == CONDS_SET)
12045     return 0;
12046
12047   if (current_tune->sched_adjust_cost != NULL)
12048     {
12049       if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
12050         return cost;
12051     }
12052
12053   /* XXX Is this strictly true?  */
12054   if (REG_NOTE_KIND (link) == REG_DEP_ANTI
12055       || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
12056     return 0;
12057
12058   /* Call insns don't incur a stall, even if they follow a load.  */
12059   if (REG_NOTE_KIND (link) == 0
12060       && CALL_P (insn))
12061     return 1;
12062
12063   if ((i_pat = single_set (insn)) != NULL
12064       && MEM_P (SET_SRC (i_pat))
12065       && (d_pat = single_set (dep)) != NULL
12066       && MEM_P (SET_DEST (d_pat)))
12067     {
12068       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12069       /* This is a load after a store, there is no conflict if the load reads
12070          from a cached area.  Assume that loads from the stack, and from the
12071          constant pool are cached, and that others will miss.  This is a
12072          hack.  */
12073
12074       if ((GET_CODE (src_mem) == SYMBOL_REF
12075            && CONSTANT_POOL_ADDRESS_P (src_mem))
12076           || reg_mentioned_p (stack_pointer_rtx, src_mem)
12077           || reg_mentioned_p (frame_pointer_rtx, src_mem)
12078           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12079         return 1;
12080     }
12081
12082   return cost;
12083 }
12084
12085 int
12086 arm_max_conditional_execute (void)
12087 {
12088   return max_insns_skipped;
12089 }
12090
12091 static int
12092 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12093 {
12094   if (TARGET_32BIT)
12095     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12096   else
12097     return (optimize > 0) ? 2 : 0;
12098 }
12099
12100 static int
12101 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12102 {
12103   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12104 }
12105
12106 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12107    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12108    sequences of non-executed instructions in IT blocks probably take the same
12109    amount of time as executed instructions (and the IT instruction itself takes
12110    space in icache).  This function was experimentally determined to give good
12111    results on a popular embedded benchmark.  */
12112
12113 static int
12114 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12115 {
12116   return (TARGET_32BIT && speed_p) ? 1
12117          : arm_default_branch_cost (speed_p, predictable_p);
12118 }
12119
12120 static int
12121 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12122 {
12123   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12124 }
12125
12126 static bool fp_consts_inited = false;
12127
12128 static REAL_VALUE_TYPE value_fp0;
12129
12130 static void
12131 init_fp_table (void)
12132 {
12133   REAL_VALUE_TYPE r;
12134
12135   r = REAL_VALUE_ATOF ("0", DFmode);
12136   value_fp0 = r;
12137   fp_consts_inited = true;
12138 }
12139
12140 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12141 int
12142 arm_const_double_rtx (rtx x)
12143 {
12144   REAL_VALUE_TYPE r;
12145
12146   if (!fp_consts_inited)
12147     init_fp_table ();
12148
12149   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12150   if (REAL_VALUE_MINUS_ZERO (r))
12151     return 0;
12152
12153   if (REAL_VALUES_EQUAL (r, value_fp0))
12154     return 1;
12155
12156   return 0;
12157 }
12158
12159 /* VFPv3 has a fairly wide range of representable immediates, formed from
12160    "quarter-precision" floating-point values. These can be evaluated using this
12161    formula (with ^ for exponentiation):
12162
12163      -1^s * n * 2^-r
12164
12165    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12166    16 <= n <= 31 and 0 <= r <= 7.
12167
12168    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12169
12170      - A (most-significant) is the sign bit.
12171      - BCD are the exponent (encoded as r XOR 3).
12172      - EFGH are the mantissa (encoded as n - 16).
12173 */
12174
12175 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12176    fconst[sd] instruction, or -1 if X isn't suitable.  */
12177 static int
12178 vfp3_const_double_index (rtx x)
12179 {
12180   REAL_VALUE_TYPE r, m;
12181   int sign, exponent;
12182   unsigned HOST_WIDE_INT mantissa, mant_hi;
12183   unsigned HOST_WIDE_INT mask;
12184   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12185   bool fail;
12186
12187   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12188     return -1;
12189
12190   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12191
12192   /* We can't represent these things, so detect them first.  */
12193   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12194     return -1;
12195
12196   /* Extract sign, exponent and mantissa.  */
12197   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12198   r = real_value_abs (&r);
12199   exponent = REAL_EXP (&r);
12200   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12201      highest (sign) bit, with a fixed binary point at bit point_pos.
12202      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12203      bits for the mantissa, this may fail (low bits would be lost).  */
12204   real_ldexp (&m, &r, point_pos - exponent);
12205   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12206   mantissa = w.elt (0);
12207   mant_hi = w.elt (1);
12208
12209   /* If there are bits set in the low part of the mantissa, we can't
12210      represent this value.  */
12211   if (mantissa != 0)
12212     return -1;
12213
12214   /* Now make it so that mantissa contains the most-significant bits, and move
12215      the point_pos to indicate that the least-significant bits have been
12216      discarded.  */
12217   point_pos -= HOST_BITS_PER_WIDE_INT;
12218   mantissa = mant_hi;
12219
12220   /* We can permit four significant bits of mantissa only, plus a high bit
12221      which is always 1.  */
12222   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12223   if ((mantissa & mask) != 0)
12224     return -1;
12225
12226   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12227   mantissa >>= point_pos - 5;
12228
12229   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12230      floating-point immediate zero with Neon using an integer-zero load, but
12231      that case is handled elsewhere.)  */
12232   if (mantissa == 0)
12233     return -1;
12234
12235   gcc_assert (mantissa >= 16 && mantissa <= 31);
12236
12237   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12238      normalized significands are in the range [1, 2). (Our mantissa is shifted
12239      left 4 places at this point relative to normalized IEEE754 values).  GCC
12240      internally uses [0.5, 1) (see real.c), so the exponent returned from
12241      REAL_EXP must be altered.  */
12242   exponent = 5 - exponent;
12243
12244   if (exponent < 0 || exponent > 7)
12245     return -1;
12246
12247   /* Sign, mantissa and exponent are now in the correct form to plug into the
12248      formula described in the comment above.  */
12249   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12250 }
12251
12252 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12253 int
12254 vfp3_const_double_rtx (rtx x)
12255 {
12256   if (!TARGET_VFP3)
12257     return 0;
12258
12259   return vfp3_const_double_index (x) != -1;
12260 }
12261
12262 /* Recognize immediates which can be used in various Neon instructions. Legal
12263    immediates are described by the following table (for VMVN variants, the
12264    bitwise inverse of the constant shown is recognized. In either case, VMOV
12265    is output and the correct instruction to use for a given constant is chosen
12266    by the assembler). The constant shown is replicated across all elements of
12267    the destination vector.
12268
12269    insn elems variant constant (binary)
12270    ---- ----- ------- -----------------
12271    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12272    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12273    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12274    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12275    vmov  i16     4    00000000 abcdefgh
12276    vmov  i16     5    abcdefgh 00000000
12277    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12278    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12279    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12280    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12281    vmvn  i16    10    00000000 abcdefgh
12282    vmvn  i16    11    abcdefgh 00000000
12283    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12284    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12285    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12286    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12287    vmov   i8    16    abcdefgh
12288    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12289                       eeeeeeee ffffffff gggggggg hhhhhhhh
12290    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12291    vmov  f32    19    00000000 00000000 00000000 00000000
12292
12293    For case 18, B = !b. Representable values are exactly those accepted by
12294    vfp3_const_double_index, but are output as floating-point numbers rather
12295    than indices.
12296
12297    For case 19, we will change it to vmov.i32 when assembling.
12298
12299    Variants 0-5 (inclusive) may also be used as immediates for the second
12300    operand of VORR/VBIC instructions.
12301
12302    The INVERSE argument causes the bitwise inverse of the given operand to be
12303    recognized instead (used for recognizing legal immediates for the VAND/VORN
12304    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12305    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12306    output, rather than the real insns vbic/vorr).
12307
12308    INVERSE makes no difference to the recognition of float vectors.
12309
12310    The return value is the variant of immediate as shown in the above table, or
12311    -1 if the given value doesn't match any of the listed patterns.
12312 */
12313 static int
12314 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12315                       rtx *modconst, int *elementwidth)
12316 {
12317 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12318   matches = 1;                                  \
12319   for (i = 0; i < idx; i += (STRIDE))           \
12320     if (!(TEST))                                \
12321       matches = 0;                              \
12322   if (matches)                                  \
12323     {                                           \
12324       immtype = (CLASS);                        \
12325       elsize = (ELSIZE);                        \
12326       break;                                    \
12327     }
12328
12329   unsigned int i, elsize = 0, idx = 0, n_elts;
12330   unsigned int innersize;
12331   unsigned char bytes[16];
12332   int immtype = -1, matches;
12333   unsigned int invmask = inverse ? 0xff : 0;
12334   bool vector = GET_CODE (op) == CONST_VECTOR;
12335
12336   if (vector)
12337     {
12338       n_elts = CONST_VECTOR_NUNITS (op);
12339       innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12340     }
12341   else
12342     {
12343       n_elts = 1;
12344       if (mode == VOIDmode)
12345         mode = DImode;
12346       innersize = GET_MODE_SIZE (mode);
12347     }
12348
12349   /* Vectors of float constants.  */
12350   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12351     {
12352       rtx el0 = CONST_VECTOR_ELT (op, 0);
12353       REAL_VALUE_TYPE r0;
12354
12355       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12356         return -1;
12357
12358       REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12359
12360       for (i = 1; i < n_elts; i++)
12361         {
12362           rtx elt = CONST_VECTOR_ELT (op, i);
12363           REAL_VALUE_TYPE re;
12364
12365           REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12366
12367           if (!REAL_VALUES_EQUAL (r0, re))
12368             return -1;
12369         }
12370
12371       if (modconst)
12372         *modconst = CONST_VECTOR_ELT (op, 0);
12373
12374       if (elementwidth)
12375         *elementwidth = 0;
12376
12377       if (el0 == CONST0_RTX (GET_MODE (el0)))
12378         return 19;
12379       else
12380         return 18;
12381     }
12382
12383   /* Splat vector constant out into a byte vector.  */
12384   for (i = 0; i < n_elts; i++)
12385     {
12386       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12387       unsigned HOST_WIDE_INT elpart;
12388       unsigned int part, parts;
12389
12390       if (CONST_INT_P (el))
12391         {
12392           elpart = INTVAL (el);
12393           parts = 1;
12394         }
12395       else if (CONST_DOUBLE_P (el))
12396         {
12397           elpart = CONST_DOUBLE_LOW (el);
12398           parts = 2;
12399         }
12400       else
12401         gcc_unreachable ();
12402
12403       for (part = 0; part < parts; part++)
12404         {
12405           unsigned int byte;
12406           for (byte = 0; byte < innersize; byte++)
12407             {
12408               bytes[idx++] = (elpart & 0xff) ^ invmask;
12409               elpart >>= BITS_PER_UNIT;
12410             }
12411           if (CONST_DOUBLE_P (el))
12412             elpart = CONST_DOUBLE_HIGH (el);
12413         }
12414     }
12415
12416   /* Sanity check.  */
12417   gcc_assert (idx == GET_MODE_SIZE (mode));
12418
12419   do
12420     {
12421       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12422                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12423
12424       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12425                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12426
12427       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12428                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12429
12430       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12431                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12432
12433       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12434
12435       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12436
12437       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12438                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12439
12440       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12441                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12442
12443       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12444                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12445
12446       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12447                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12448
12449       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12450
12451       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12452
12453       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12454                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12455
12456       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12457                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12458
12459       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12460                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12461
12462       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12463                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12464
12465       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12466
12467       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12468                         && bytes[i] == bytes[(i + 8) % idx]);
12469     }
12470   while (0);
12471
12472   if (immtype == -1)
12473     return -1;
12474
12475   if (elementwidth)
12476     *elementwidth = elsize;
12477
12478   if (modconst)
12479     {
12480       unsigned HOST_WIDE_INT imm = 0;
12481
12482       /* Un-invert bytes of recognized vector, if necessary.  */
12483       if (invmask != 0)
12484         for (i = 0; i < idx; i++)
12485           bytes[i] ^= invmask;
12486
12487       if (immtype == 17)
12488         {
12489           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12490           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12491
12492           for (i = 0; i < 8; i++)
12493             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12494                    << (i * BITS_PER_UNIT);
12495
12496           *modconst = GEN_INT (imm);
12497         }
12498       else
12499         {
12500           unsigned HOST_WIDE_INT imm = 0;
12501
12502           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12503             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12504
12505           *modconst = GEN_INT (imm);
12506         }
12507     }
12508
12509   return immtype;
12510 #undef CHECK
12511 }
12512
12513 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12514    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12515    float elements), and a modified constant (whatever should be output for a
12516    VMOV) in *MODCONST.  */
12517
12518 int
12519 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12520                                rtx *modconst, int *elementwidth)
12521 {
12522   rtx tmpconst;
12523   int tmpwidth;
12524   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12525
12526   if (retval == -1)
12527     return 0;
12528
12529   if (modconst)
12530     *modconst = tmpconst;
12531
12532   if (elementwidth)
12533     *elementwidth = tmpwidth;
12534
12535   return 1;
12536 }
12537
12538 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12539    the immediate is valid, write a constant suitable for using as an operand
12540    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12541    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12542
12543 int
12544 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12545                                 rtx *modconst, int *elementwidth)
12546 {
12547   rtx tmpconst;
12548   int tmpwidth;
12549   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12550
12551   if (retval < 0 || retval > 5)
12552     return 0;
12553
12554   if (modconst)
12555     *modconst = tmpconst;
12556
12557   if (elementwidth)
12558     *elementwidth = tmpwidth;
12559
12560   return 1;
12561 }
12562
12563 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12564    the immediate is valid, write a constant suitable for using as an operand
12565    to VSHR/VSHL to *MODCONST and the corresponding element width to
12566    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12567    because they have different limitations.  */
12568
12569 int
12570 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12571                                 rtx *modconst, int *elementwidth,
12572                                 bool isleftshift)
12573 {
12574   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12575   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12576   unsigned HOST_WIDE_INT last_elt = 0;
12577   unsigned HOST_WIDE_INT maxshift;
12578
12579   /* Split vector constant out into a byte vector.  */
12580   for (i = 0; i < n_elts; i++)
12581     {
12582       rtx el = CONST_VECTOR_ELT (op, i);
12583       unsigned HOST_WIDE_INT elpart;
12584
12585       if (CONST_INT_P (el))
12586         elpart = INTVAL (el);
12587       else if (CONST_DOUBLE_P (el))
12588         return 0;
12589       else
12590         gcc_unreachable ();
12591
12592       if (i != 0 && elpart != last_elt)
12593         return 0;
12594
12595       last_elt = elpart;
12596     }
12597
12598   /* Shift less than element size.  */
12599   maxshift = innersize * 8;
12600
12601   if (isleftshift)
12602     {
12603       /* Left shift immediate value can be from 0 to <size>-1.  */
12604       if (last_elt >= maxshift)
12605         return 0;
12606     }
12607   else
12608     {
12609       /* Right shift immediate value can be from 1 to <size>.  */
12610       if (last_elt == 0 || last_elt > maxshift)
12611         return 0;
12612     }
12613
12614   if (elementwidth)
12615     *elementwidth = innersize * 8;
12616
12617   if (modconst)
12618     *modconst = CONST_VECTOR_ELT (op, 0);
12619
12620   return 1;
12621 }
12622
12623 /* Return a string suitable for output of Neon immediate logic operation
12624    MNEM.  */
12625
12626 char *
12627 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12628                              int inverse, int quad)
12629 {
12630   int width, is_valid;
12631   static char templ[40];
12632
12633   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12634
12635   gcc_assert (is_valid != 0);
12636
12637   if (quad)
12638     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12639   else
12640     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12641
12642   return templ;
12643 }
12644
12645 /* Return a string suitable for output of Neon immediate shift operation
12646    (VSHR or VSHL) MNEM.  */
12647
12648 char *
12649 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12650                              machine_mode mode, int quad,
12651                              bool isleftshift)
12652 {
12653   int width, is_valid;
12654   static char templ[40];
12655
12656   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12657   gcc_assert (is_valid != 0);
12658
12659   if (quad)
12660     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12661   else
12662     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12663
12664   return templ;
12665 }
12666
12667 /* Output a sequence of pairwise operations to implement a reduction.
12668    NOTE: We do "too much work" here, because pairwise operations work on two
12669    registers-worth of operands in one go. Unfortunately we can't exploit those
12670    extra calculations to do the full operation in fewer steps, I don't think.
12671    Although all vector elements of the result but the first are ignored, we
12672    actually calculate the same result in each of the elements. An alternative
12673    such as initially loading a vector with zero to use as each of the second
12674    operands would use up an additional register and take an extra instruction,
12675    for no particular gain.  */
12676
12677 void
12678 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12679                       rtx (*reduc) (rtx, rtx, rtx))
12680 {
12681   machine_mode inner = GET_MODE_INNER (mode);
12682   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12683   rtx tmpsum = op1;
12684
12685   for (i = parts / 2; i >= 1; i /= 2)
12686     {
12687       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12688       emit_insn (reduc (dest, tmpsum, tmpsum));
12689       tmpsum = dest;
12690     }
12691 }
12692
12693 /* If VALS is a vector constant that can be loaded into a register
12694    using VDUP, generate instructions to do so and return an RTX to
12695    assign to the register.  Otherwise return NULL_RTX.  */
12696
12697 static rtx
12698 neon_vdup_constant (rtx vals)
12699 {
12700   machine_mode mode = GET_MODE (vals);
12701   machine_mode inner_mode = GET_MODE_INNER (mode);
12702   int n_elts = GET_MODE_NUNITS (mode);
12703   bool all_same = true;
12704   rtx x;
12705   int i;
12706
12707   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12708     return NULL_RTX;
12709
12710   for (i = 0; i < n_elts; ++i)
12711     {
12712       x = XVECEXP (vals, 0, i);
12713       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12714         all_same = false;
12715     }
12716
12717   if (!all_same)
12718     /* The elements are not all the same.  We could handle repeating
12719        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12720        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12721        vdup.i16).  */
12722     return NULL_RTX;
12723
12724   /* We can load this constant by using VDUP and a constant in a
12725      single ARM register.  This will be cheaper than a vector
12726      load.  */
12727
12728   x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12729   return gen_rtx_VEC_DUPLICATE (mode, x);
12730 }
12731
12732 /* Generate code to load VALS, which is a PARALLEL containing only
12733    constants (for vec_init) or CONST_VECTOR, efficiently into a
12734    register.  Returns an RTX to copy into the register, or NULL_RTX
12735    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12736
12737 rtx
12738 neon_make_constant (rtx vals)
12739 {
12740   machine_mode mode = GET_MODE (vals);
12741   rtx target;
12742   rtx const_vec = NULL_RTX;
12743   int n_elts = GET_MODE_NUNITS (mode);
12744   int n_const = 0;
12745   int i;
12746
12747   if (GET_CODE (vals) == CONST_VECTOR)
12748     const_vec = vals;
12749   else if (GET_CODE (vals) == PARALLEL)
12750     {
12751       /* A CONST_VECTOR must contain only CONST_INTs and
12752          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12753          Only store valid constants in a CONST_VECTOR.  */
12754       for (i = 0; i < n_elts; ++i)
12755         {
12756           rtx x = XVECEXP (vals, 0, i);
12757           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12758             n_const++;
12759         }
12760       if (n_const == n_elts)
12761         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12762     }
12763   else
12764     gcc_unreachable ();
12765
12766   if (const_vec != NULL
12767       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12768     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12769     return const_vec;
12770   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12771     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12772        pipeline cycle; creating the constant takes one or two ARM
12773        pipeline cycles.  */
12774     return target;
12775   else if (const_vec != NULL_RTX)
12776     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12777        (for either double or quad vectors).  We can not take advantage
12778        of single-cycle VLD1 because we need a PC-relative addressing
12779        mode.  */
12780     return const_vec;
12781   else
12782     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12783        We can not construct an initializer.  */
12784     return NULL_RTX;
12785 }
12786
12787 /* Initialize vector TARGET to VALS.  */
12788
12789 void
12790 neon_expand_vector_init (rtx target, rtx vals)
12791 {
12792   machine_mode mode = GET_MODE (target);
12793   machine_mode inner_mode = GET_MODE_INNER (mode);
12794   int n_elts = GET_MODE_NUNITS (mode);
12795   int n_var = 0, one_var = -1;
12796   bool all_same = true;
12797   rtx x, mem;
12798   int i;
12799
12800   for (i = 0; i < n_elts; ++i)
12801     {
12802       x = XVECEXP (vals, 0, i);
12803       if (!CONSTANT_P (x))
12804         ++n_var, one_var = i;
12805
12806       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12807         all_same = false;
12808     }
12809
12810   if (n_var == 0)
12811     {
12812       rtx constant = neon_make_constant (vals);
12813       if (constant != NULL_RTX)
12814         {
12815           emit_move_insn (target, constant);
12816           return;
12817         }
12818     }
12819
12820   /* Splat a single non-constant element if we can.  */
12821   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12822     {
12823       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12824       emit_insn (gen_rtx_SET (VOIDmode, target,
12825                               gen_rtx_VEC_DUPLICATE (mode, x)));
12826       return;
12827     }
12828
12829   /* One field is non-constant.  Load constant then overwrite varying
12830      field.  This is more efficient than using the stack.  */
12831   if (n_var == 1)
12832     {
12833       rtx copy = copy_rtx (vals);
12834       rtx index = GEN_INT (one_var);
12835
12836       /* Load constant part of vector, substitute neighboring value for
12837          varying element.  */
12838       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12839       neon_expand_vector_init (target, copy);
12840
12841       /* Insert variable.  */
12842       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12843       switch (mode)
12844         {
12845         case V8QImode:
12846           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12847           break;
12848         case V16QImode:
12849           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12850           break;
12851         case V4HImode:
12852           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12853           break;
12854         case V8HImode:
12855           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12856           break;
12857         case V2SImode:
12858           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12859           break;
12860         case V4SImode:
12861           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12862           break;
12863         case V2SFmode:
12864           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12865           break;
12866         case V4SFmode:
12867           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12868           break;
12869         case V2DImode:
12870           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12871           break;
12872         default:
12873           gcc_unreachable ();
12874         }
12875       return;
12876     }
12877
12878   /* Construct the vector in memory one field at a time
12879      and load the whole vector.  */
12880   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12881   for (i = 0; i < n_elts; i++)
12882     emit_move_insn (adjust_address_nv (mem, inner_mode,
12883                                     i * GET_MODE_SIZE (inner_mode)),
12884                     XVECEXP (vals, 0, i));
12885   emit_move_insn (target, mem);
12886 }
12887
12888 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12889    ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
12890    reported source locations are bogus.  */
12891
12892 static void
12893 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12894               const char *err)
12895 {
12896   HOST_WIDE_INT lane;
12897
12898   gcc_assert (CONST_INT_P (operand));
12899
12900   lane = INTVAL (operand);
12901
12902   if (lane < low || lane >= high)
12903     error (err);
12904 }
12905
12906 /* Bounds-check lanes.  */
12907
12908 void
12909 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12910 {
12911   bounds_check (operand, low, high, "lane out of range");
12912 }
12913
12914 /* Bounds-check constants.  */
12915
12916 void
12917 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12918 {
12919   bounds_check (operand, low, high, "constant out of range");
12920 }
12921
12922 HOST_WIDE_INT
12923 neon_element_bits (machine_mode mode)
12924 {
12925   if (mode == DImode)
12926     return GET_MODE_BITSIZE (mode);
12927   else
12928     return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12929 }
12930
12931 \f
12932 /* Predicates for `match_operand' and `match_operator'.  */
12933
12934 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12935    WB is true if full writeback address modes are allowed and is false
12936    if limited writeback address modes (POST_INC and PRE_DEC) are
12937    allowed.  */
12938
12939 int
12940 arm_coproc_mem_operand (rtx op, bool wb)
12941 {
12942   rtx ind;
12943
12944   /* Reject eliminable registers.  */
12945   if (! (reload_in_progress || reload_completed || lra_in_progress)
12946       && (   reg_mentioned_p (frame_pointer_rtx, op)
12947           || reg_mentioned_p (arg_pointer_rtx, op)
12948           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12949           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12950           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12951           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12952     return FALSE;
12953
12954   /* Constants are converted into offsets from labels.  */
12955   if (!MEM_P (op))
12956     return FALSE;
12957
12958   ind = XEXP (op, 0);
12959
12960   if (reload_completed
12961       && (GET_CODE (ind) == LABEL_REF
12962           || (GET_CODE (ind) == CONST
12963               && GET_CODE (XEXP (ind, 0)) == PLUS
12964               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12965               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12966     return TRUE;
12967
12968   /* Match: (mem (reg)).  */
12969   if (REG_P (ind))
12970     return arm_address_register_rtx_p (ind, 0);
12971
12972   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12973      acceptable in any case (subject to verification by
12974      arm_address_register_rtx_p).  We need WB to be true to accept
12975      PRE_INC and POST_DEC.  */
12976   if (GET_CODE (ind) == POST_INC
12977       || GET_CODE (ind) == PRE_DEC
12978       || (wb
12979           && (GET_CODE (ind) == PRE_INC
12980               || GET_CODE (ind) == POST_DEC)))
12981     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12982
12983   if (wb
12984       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12985       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12986       && GET_CODE (XEXP (ind, 1)) == PLUS
12987       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12988     ind = XEXP (ind, 1);
12989
12990   /* Match:
12991      (plus (reg)
12992            (const)).  */
12993   if (GET_CODE (ind) == PLUS
12994       && REG_P (XEXP (ind, 0))
12995       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12996       && CONST_INT_P (XEXP (ind, 1))
12997       && INTVAL (XEXP (ind, 1)) > -1024
12998       && INTVAL (XEXP (ind, 1)) <  1024
12999       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13000     return TRUE;
13001
13002   return FALSE;
13003 }
13004
13005 /* Return TRUE if OP is a memory operand which we can load or store a vector
13006    to/from. TYPE is one of the following values:
13007     0 - Vector load/stor (vldr)
13008     1 - Core registers (ldm)
13009     2 - Element/structure loads (vld1)
13010  */
13011 int
13012 neon_vector_mem_operand (rtx op, int type, bool strict)
13013 {
13014   rtx ind;
13015
13016   /* Reject eliminable registers.  */
13017   if (! (reload_in_progress || reload_completed)
13018       && (   reg_mentioned_p (frame_pointer_rtx, op)
13019           || reg_mentioned_p (arg_pointer_rtx, op)
13020           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13021           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13022           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13023           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13024     return !strict;
13025
13026   /* Constants are converted into offsets from labels.  */
13027   if (!MEM_P (op))
13028     return FALSE;
13029
13030   ind = XEXP (op, 0);
13031
13032   if (reload_completed
13033       && (GET_CODE (ind) == LABEL_REF
13034           || (GET_CODE (ind) == CONST
13035               && GET_CODE (XEXP (ind, 0)) == PLUS
13036               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13037               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13038     return TRUE;
13039
13040   /* Match: (mem (reg)).  */
13041   if (REG_P (ind))
13042     return arm_address_register_rtx_p (ind, 0);
13043
13044   /* Allow post-increment with Neon registers.  */
13045   if ((type != 1 && GET_CODE (ind) == POST_INC)
13046       || (type == 0 && GET_CODE (ind) == PRE_DEC))
13047     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13048
13049   /* Allow post-increment by register for VLDn */
13050   if (type == 2 && GET_CODE (ind) == POST_MODIFY
13051       && GET_CODE (XEXP (ind, 1)) == PLUS
13052       && REG_P (XEXP (XEXP (ind, 1), 1)))
13053      return true;
13054
13055   /* Match:
13056      (plus (reg)
13057           (const)).  */
13058   if (type == 0
13059       && GET_CODE (ind) == PLUS
13060       && REG_P (XEXP (ind, 0))
13061       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13062       && CONST_INT_P (XEXP (ind, 1))
13063       && INTVAL (XEXP (ind, 1)) > -1024
13064       /* For quad modes, we restrict the constant offset to be slightly less
13065          than what the instruction format permits.  We have no such constraint
13066          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13067       && (INTVAL (XEXP (ind, 1))
13068           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13069       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13070     return TRUE;
13071
13072   return FALSE;
13073 }
13074
13075 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13076    type.  */
13077 int
13078 neon_struct_mem_operand (rtx op)
13079 {
13080   rtx ind;
13081
13082   /* Reject eliminable registers.  */
13083   if (! (reload_in_progress || reload_completed)
13084       && (   reg_mentioned_p (frame_pointer_rtx, op)
13085           || reg_mentioned_p (arg_pointer_rtx, op)
13086           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13087           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13088           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13089           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13090     return FALSE;
13091
13092   /* Constants are converted into offsets from labels.  */
13093   if (!MEM_P (op))
13094     return FALSE;
13095
13096   ind = XEXP (op, 0);
13097
13098   if (reload_completed
13099       && (GET_CODE (ind) == LABEL_REF
13100           || (GET_CODE (ind) == CONST
13101               && GET_CODE (XEXP (ind, 0)) == PLUS
13102               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13103               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13104     return TRUE;
13105
13106   /* Match: (mem (reg)).  */
13107   if (REG_P (ind))
13108     return arm_address_register_rtx_p (ind, 0);
13109
13110   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13111   if (GET_CODE (ind) == POST_INC
13112       || GET_CODE (ind) == PRE_DEC)
13113     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13114
13115   return FALSE;
13116 }
13117
13118 /* Return true if X is a register that will be eliminated later on.  */
13119 int
13120 arm_eliminable_register (rtx x)
13121 {
13122   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13123                        || REGNO (x) == ARG_POINTER_REGNUM
13124                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13125                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13126 }
13127
13128 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13129    coprocessor registers.  Otherwise return NO_REGS.  */
13130
13131 enum reg_class
13132 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13133 {
13134   if (mode == HFmode)
13135     {
13136       if (!TARGET_NEON_FP16)
13137         return GENERAL_REGS;
13138       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13139         return NO_REGS;
13140       return GENERAL_REGS;
13141     }
13142
13143   /* The neon move patterns handle all legitimate vector and struct
13144      addresses.  */
13145   if (TARGET_NEON
13146       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13147       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13148           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13149           || VALID_NEON_STRUCT_MODE (mode)))
13150     return NO_REGS;
13151
13152   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13153     return NO_REGS;
13154
13155   return GENERAL_REGS;
13156 }
13157
13158 /* Values which must be returned in the most-significant end of the return
13159    register.  */
13160
13161 static bool
13162 arm_return_in_msb (const_tree valtype)
13163 {
13164   return (TARGET_AAPCS_BASED
13165           && BYTES_BIG_ENDIAN
13166           && (AGGREGATE_TYPE_P (valtype)
13167               || TREE_CODE (valtype) == COMPLEX_TYPE
13168               || FIXED_POINT_TYPE_P (valtype)));
13169 }
13170
13171 /* Return TRUE if X references a SYMBOL_REF.  */
13172 int
13173 symbol_mentioned_p (rtx x)
13174 {
13175   const char * fmt;
13176   int i;
13177
13178   if (GET_CODE (x) == SYMBOL_REF)
13179     return 1;
13180
13181   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13182      are constant offsets, not symbols.  */
13183   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13184     return 0;
13185
13186   fmt = GET_RTX_FORMAT (GET_CODE (x));
13187
13188   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13189     {
13190       if (fmt[i] == 'E')
13191         {
13192           int j;
13193
13194           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13195             if (symbol_mentioned_p (XVECEXP (x, i, j)))
13196               return 1;
13197         }
13198       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13199         return 1;
13200     }
13201
13202   return 0;
13203 }
13204
13205 /* Return TRUE if X references a LABEL_REF.  */
13206 int
13207 label_mentioned_p (rtx x)
13208 {
13209   const char * fmt;
13210   int i;
13211
13212   if (GET_CODE (x) == LABEL_REF)
13213     return 1;
13214
13215   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13216      instruction, but they are constant offsets, not symbols.  */
13217   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13218     return 0;
13219
13220   fmt = GET_RTX_FORMAT (GET_CODE (x));
13221   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13222     {
13223       if (fmt[i] == 'E')
13224         {
13225           int j;
13226
13227           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13228             if (label_mentioned_p (XVECEXP (x, i, j)))
13229               return 1;
13230         }
13231       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13232         return 1;
13233     }
13234
13235   return 0;
13236 }
13237
13238 int
13239 tls_mentioned_p (rtx x)
13240 {
13241   switch (GET_CODE (x))
13242     {
13243     case CONST:
13244       return tls_mentioned_p (XEXP (x, 0));
13245
13246     case UNSPEC:
13247       if (XINT (x, 1) == UNSPEC_TLS)
13248         return 1;
13249
13250     default:
13251       return 0;
13252     }
13253 }
13254
13255 /* Must not copy any rtx that uses a pc-relative address.  */
13256
13257 static bool
13258 arm_cannot_copy_insn_p (rtx_insn *insn)
13259 {
13260   /* The tls call insn cannot be copied, as it is paired with a data
13261      word.  */
13262   if (recog_memoized (insn) == CODE_FOR_tlscall)
13263     return true;
13264
13265   subrtx_iterator::array_type array;
13266   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13267     {
13268       const_rtx x = *iter;
13269       if (GET_CODE (x) == UNSPEC
13270           && (XINT (x, 1) == UNSPEC_PIC_BASE
13271               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13272         return true;
13273     }
13274   return false;
13275 }
13276
13277 enum rtx_code
13278 minmax_code (rtx x)
13279 {
13280   enum rtx_code code = GET_CODE (x);
13281
13282   switch (code)
13283     {
13284     case SMAX:
13285       return GE;
13286     case SMIN:
13287       return LE;
13288     case UMIN:
13289       return LEU;
13290     case UMAX:
13291       return GEU;
13292     default:
13293       gcc_unreachable ();
13294     }
13295 }
13296
13297 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
13298
13299 bool
13300 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13301                         int *mask, bool *signed_sat)
13302 {
13303   /* The high bound must be a power of two minus one.  */
13304   int log = exact_log2 (INTVAL (hi_bound) + 1);
13305   if (log == -1)
13306     return false;
13307
13308   /* The low bound is either zero (for usat) or one less than the
13309      negation of the high bound (for ssat).  */
13310   if (INTVAL (lo_bound) == 0)
13311     {
13312       if (mask)
13313         *mask = log;
13314       if (signed_sat)
13315         *signed_sat = false;
13316
13317       return true;
13318     }
13319
13320   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13321     {
13322       if (mask)
13323         *mask = log + 1;
13324       if (signed_sat)
13325         *signed_sat = true;
13326
13327       return true;
13328     }
13329
13330   return false;
13331 }
13332
13333 /* Return 1 if memory locations are adjacent.  */
13334 int
13335 adjacent_mem_locations (rtx a, rtx b)
13336 {
13337   /* We don't guarantee to preserve the order of these memory refs.  */
13338   if (volatile_refs_p (a) || volatile_refs_p (b))
13339     return 0;
13340
13341   if ((REG_P (XEXP (a, 0))
13342        || (GET_CODE (XEXP (a, 0)) == PLUS
13343            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13344       && (REG_P (XEXP (b, 0))
13345           || (GET_CODE (XEXP (b, 0)) == PLUS
13346               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13347     {
13348       HOST_WIDE_INT val0 = 0, val1 = 0;
13349       rtx reg0, reg1;
13350       int val_diff;
13351
13352       if (GET_CODE (XEXP (a, 0)) == PLUS)
13353         {
13354           reg0 = XEXP (XEXP (a, 0), 0);
13355           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13356         }
13357       else
13358         reg0 = XEXP (a, 0);
13359
13360       if (GET_CODE (XEXP (b, 0)) == PLUS)
13361         {
13362           reg1 = XEXP (XEXP (b, 0), 0);
13363           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13364         }
13365       else
13366         reg1 = XEXP (b, 0);
13367
13368       /* Don't accept any offset that will require multiple
13369          instructions to handle, since this would cause the
13370          arith_adjacentmem pattern to output an overlong sequence.  */
13371       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13372         return 0;
13373
13374       /* Don't allow an eliminable register: register elimination can make
13375          the offset too large.  */
13376       if (arm_eliminable_register (reg0))
13377         return 0;
13378
13379       val_diff = val1 - val0;
13380
13381       if (arm_ld_sched)
13382         {
13383           /* If the target has load delay slots, then there's no benefit
13384              to using an ldm instruction unless the offset is zero and
13385              we are optimizing for size.  */
13386           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13387                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13388                   && (val_diff == 4 || val_diff == -4));
13389         }
13390
13391       return ((REGNO (reg0) == REGNO (reg1))
13392               && (val_diff == 4 || val_diff == -4));
13393     }
13394
13395   return 0;
13396 }
13397
13398 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13399    for load operations, false for store operations.  CONSECUTIVE is true
13400    if the register numbers in the operation must be consecutive in the register
13401    bank. RETURN_PC is true if value is to be loaded in PC.
13402    The pattern we are trying to match for load is:
13403      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13404       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13405        :
13406        :
13407       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13408      ]
13409      where
13410      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13411      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13412      3.  If consecutive is TRUE, then for kth register being loaded,
13413          REGNO (R_dk) = REGNO (R_d0) + k.
13414    The pattern for store is similar.  */
13415 bool
13416 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13417                      bool consecutive, bool return_pc)
13418 {
13419   HOST_WIDE_INT count = XVECLEN (op, 0);
13420   rtx reg, mem, addr;
13421   unsigned regno;
13422   unsigned first_regno;
13423   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13424   rtx elt;
13425   bool addr_reg_in_reglist = false;
13426   bool update = false;
13427   int reg_increment;
13428   int offset_adj;
13429   int regs_per_val;
13430
13431   /* If not in SImode, then registers must be consecutive
13432      (e.g., VLDM instructions for DFmode).  */
13433   gcc_assert ((mode == SImode) || consecutive);
13434   /* Setting return_pc for stores is illegal.  */
13435   gcc_assert (!return_pc || load);
13436
13437   /* Set up the increments and the regs per val based on the mode.  */
13438   reg_increment = GET_MODE_SIZE (mode);
13439   regs_per_val = reg_increment / 4;
13440   offset_adj = return_pc ? 1 : 0;
13441
13442   if (count <= 1
13443       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13444       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13445     return false;
13446
13447   /* Check if this is a write-back.  */
13448   elt = XVECEXP (op, 0, offset_adj);
13449   if (GET_CODE (SET_SRC (elt)) == PLUS)
13450     {
13451       i++;
13452       base = 1;
13453       update = true;
13454
13455       /* The offset adjustment must be the number of registers being
13456          popped times the size of a single register.  */
13457       if (!REG_P (SET_DEST (elt))
13458           || !REG_P (XEXP (SET_SRC (elt), 0))
13459           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13460           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13461           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13462              ((count - 1 - offset_adj) * reg_increment))
13463         return false;
13464     }
13465
13466   i = i + offset_adj;
13467   base = base + offset_adj;
13468   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13469      success depends on the type: VLDM can do just one reg,
13470      LDM must do at least two.  */
13471   if ((count <= i) && (mode == SImode))
13472       return false;
13473
13474   elt = XVECEXP (op, 0, i - 1);
13475   if (GET_CODE (elt) != SET)
13476     return false;
13477
13478   if (load)
13479     {
13480       reg = SET_DEST (elt);
13481       mem = SET_SRC (elt);
13482     }
13483   else
13484     {
13485       reg = SET_SRC (elt);
13486       mem = SET_DEST (elt);
13487     }
13488
13489   if (!REG_P (reg) || !MEM_P (mem))
13490     return false;
13491
13492   regno = REGNO (reg);
13493   first_regno = regno;
13494   addr = XEXP (mem, 0);
13495   if (GET_CODE (addr) == PLUS)
13496     {
13497       if (!CONST_INT_P (XEXP (addr, 1)))
13498         return false;
13499
13500       offset = INTVAL (XEXP (addr, 1));
13501       addr = XEXP (addr, 0);
13502     }
13503
13504   if (!REG_P (addr))
13505     return false;
13506
13507   /* Don't allow SP to be loaded unless it is also the base register. It
13508      guarantees that SP is reset correctly when an LDM instruction
13509      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13510   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13511     return false;
13512
13513   for (; i < count; i++)
13514     {
13515       elt = XVECEXP (op, 0, i);
13516       if (GET_CODE (elt) != SET)
13517         return false;
13518
13519       if (load)
13520         {
13521           reg = SET_DEST (elt);
13522           mem = SET_SRC (elt);
13523         }
13524       else
13525         {
13526           reg = SET_SRC (elt);
13527           mem = SET_DEST (elt);
13528         }
13529
13530       if (!REG_P (reg)
13531           || GET_MODE (reg) != mode
13532           || REGNO (reg) <= regno
13533           || (consecutive
13534               && (REGNO (reg) !=
13535                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13536           /* Don't allow SP to be loaded unless it is also the base register. It
13537              guarantees that SP is reset correctly when an LDM instruction
13538              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13539           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13540           || !MEM_P (mem)
13541           || GET_MODE (mem) != mode
13542           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13543                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13544                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13545                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13546                    offset + (i - base) * reg_increment))
13547               && (!REG_P (XEXP (mem, 0))
13548                   || offset + (i - base) * reg_increment != 0)))
13549         return false;
13550
13551       regno = REGNO (reg);
13552       if (regno == REGNO (addr))
13553         addr_reg_in_reglist = true;
13554     }
13555
13556   if (load)
13557     {
13558       if (update && addr_reg_in_reglist)
13559         return false;
13560
13561       /* For Thumb-1, address register is always modified - either by write-back
13562          or by explicit load.  If the pattern does not describe an update,
13563          then the address register must be in the list of loaded registers.  */
13564       if (TARGET_THUMB1)
13565         return update || addr_reg_in_reglist;
13566     }
13567
13568   return true;
13569 }
13570
13571 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13572    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13573    instruction.  ADD_OFFSET is nonzero if the base address register needs
13574    to be modified with an add instruction before we can use it.  */
13575
13576 static bool
13577 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13578                                  int nops, HOST_WIDE_INT add_offset)
13579  {
13580   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13581      if the offset isn't small enough.  The reason 2 ldrs are faster
13582      is because these ARMs are able to do more than one cache access
13583      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13584      whilst the ARM8 has a double bandwidth cache.  This means that
13585      these cores can do both an instruction fetch and a data fetch in
13586      a single cycle, so the trick of calculating the address into a
13587      scratch register (one of the result regs) and then doing a load
13588      multiple actually becomes slower (and no smaller in code size).
13589      That is the transformation
13590
13591         ldr     rd1, [rbase + offset]
13592         ldr     rd2, [rbase + offset + 4]
13593
13594      to
13595
13596         add     rd1, rbase, offset
13597         ldmia   rd1, {rd1, rd2}
13598
13599      produces worse code -- '3 cycles + any stalls on rd2' instead of
13600      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13601      access per cycle, the first sequence could never complete in less
13602      than 6 cycles, whereas the ldm sequence would only take 5 and
13603      would make better use of sequential accesses if not hitting the
13604      cache.
13605
13606      We cheat here and test 'arm_ld_sched' which we currently know to
13607      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13608      changes, then the test below needs to be reworked.  */
13609   if (nops == 2 && arm_ld_sched && add_offset != 0)
13610     return false;
13611
13612   /* XScale has load-store double instructions, but they have stricter
13613      alignment requirements than load-store multiple, so we cannot
13614      use them.
13615
13616      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13617      the pipeline until completion.
13618
13619         NREGS           CYCLES
13620           1               3
13621           2               4
13622           3               5
13623           4               6
13624
13625      An ldr instruction takes 1-3 cycles, but does not block the
13626      pipeline.
13627
13628         NREGS           CYCLES
13629           1              1-3
13630           2              2-6
13631           3              3-9
13632           4              4-12
13633
13634      Best case ldr will always win.  However, the more ldr instructions
13635      we issue, the less likely we are to be able to schedule them well.
13636      Using ldr instructions also increases code size.
13637
13638      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13639      for counts of 3 or 4 regs.  */
13640   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13641     return false;
13642   return true;
13643 }
13644
13645 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13646    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13647    an array ORDER which describes the sequence to use when accessing the
13648    offsets that produces an ascending order.  In this sequence, each
13649    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13650    must have been filled in with the lowest offset by the caller.
13651    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13652    we use to verify that ORDER produces an ascending order of registers.
13653    Return true if it was possible to construct such an order, false if
13654    not.  */
13655
13656 static bool
13657 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13658                       int *unsorted_regs)
13659 {
13660   int i;
13661   for (i = 1; i < nops; i++)
13662     {
13663       int j;
13664
13665       order[i] = order[i - 1];
13666       for (j = 0; j < nops; j++)
13667         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13668           {
13669             /* We must find exactly one offset that is higher than the
13670                previous one by 4.  */
13671             if (order[i] != order[i - 1])
13672               return false;
13673             order[i] = j;
13674           }
13675       if (order[i] == order[i - 1])
13676         return false;
13677       /* The register numbers must be ascending.  */
13678       if (unsorted_regs != NULL
13679           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13680         return false;
13681     }
13682   return true;
13683 }
13684
13685 /* Used to determine in a peephole whether a sequence of load
13686    instructions can be changed into a load-multiple instruction.
13687    NOPS is the number of separate load instructions we are examining.  The
13688    first NOPS entries in OPERANDS are the destination registers, the
13689    next NOPS entries are memory operands.  If this function is
13690    successful, *BASE is set to the common base register of the memory
13691    accesses; *LOAD_OFFSET is set to the first memory location's offset
13692    from that base register.
13693    REGS is an array filled in with the destination register numbers.
13694    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13695    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13696    the sequence of registers in REGS matches the loads from ascending memory
13697    locations, and the function verifies that the register numbers are
13698    themselves ascending.  If CHECK_REGS is false, the register numbers
13699    are stored in the order they are found in the operands.  */
13700 static int
13701 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13702                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13703 {
13704   int unsorted_regs[MAX_LDM_STM_OPS];
13705   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13706   int order[MAX_LDM_STM_OPS];
13707   rtx base_reg_rtx = NULL;
13708   int base_reg = -1;
13709   int i, ldm_case;
13710
13711   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13712      easily extended if required.  */
13713   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13714
13715   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13716
13717   /* Loop over the operands and check that the memory references are
13718      suitable (i.e. immediate offsets from the same base register).  At
13719      the same time, extract the target register, and the memory
13720      offsets.  */
13721   for (i = 0; i < nops; i++)
13722     {
13723       rtx reg;
13724       rtx offset;
13725
13726       /* Convert a subreg of a mem into the mem itself.  */
13727       if (GET_CODE (operands[nops + i]) == SUBREG)
13728         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13729
13730       gcc_assert (MEM_P (operands[nops + i]));
13731
13732       /* Don't reorder volatile memory references; it doesn't seem worth
13733          looking for the case where the order is ok anyway.  */
13734       if (MEM_VOLATILE_P (operands[nops + i]))
13735         return 0;
13736
13737       offset = const0_rtx;
13738
13739       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13740            || (GET_CODE (reg) == SUBREG
13741                && REG_P (reg = SUBREG_REG (reg))))
13742           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13743               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13744                   || (GET_CODE (reg) == SUBREG
13745                       && REG_P (reg = SUBREG_REG (reg))))
13746               && (CONST_INT_P (offset
13747                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13748         {
13749           if (i == 0)
13750             {
13751               base_reg = REGNO (reg);
13752               base_reg_rtx = reg;
13753               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13754                 return 0;
13755             }
13756           else if (base_reg != (int) REGNO (reg))
13757             /* Not addressed from the same base register.  */
13758             return 0;
13759
13760           unsorted_regs[i] = (REG_P (operands[i])
13761                               ? REGNO (operands[i])
13762                               : REGNO (SUBREG_REG (operands[i])));
13763
13764           /* If it isn't an integer register, or if it overwrites the
13765              base register but isn't the last insn in the list, then
13766              we can't do this.  */
13767           if (unsorted_regs[i] < 0
13768               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13769               || unsorted_regs[i] > 14
13770               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13771             return 0;
13772
13773           /* Don't allow SP to be loaded unless it is also the base
13774              register.  It guarantees that SP is reset correctly when
13775              an LDM instruction is interrupted.  Otherwise, we might
13776              end up with a corrupt stack.  */
13777           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13778             return 0;
13779
13780           unsorted_offsets[i] = INTVAL (offset);
13781           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13782             order[0] = i;
13783         }
13784       else
13785         /* Not a suitable memory address.  */
13786         return 0;
13787     }
13788
13789   /* All the useful information has now been extracted from the
13790      operands into unsorted_regs and unsorted_offsets; additionally,
13791      order[0] has been set to the lowest offset in the list.  Sort
13792      the offsets into order, verifying that they are adjacent, and
13793      check that the register numbers are ascending.  */
13794   if (!compute_offset_order (nops, unsorted_offsets, order,
13795                              check_regs ? unsorted_regs : NULL))
13796     return 0;
13797
13798   if (saved_order)
13799     memcpy (saved_order, order, sizeof order);
13800
13801   if (base)
13802     {
13803       *base = base_reg;
13804
13805       for (i = 0; i < nops; i++)
13806         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13807
13808       *load_offset = unsorted_offsets[order[0]];
13809     }
13810
13811   if (TARGET_THUMB1
13812       && !peep2_reg_dead_p (nops, base_reg_rtx))
13813     return 0;
13814
13815   if (unsorted_offsets[order[0]] == 0)
13816     ldm_case = 1; /* ldmia */
13817   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13818     ldm_case = 2; /* ldmib */
13819   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13820     ldm_case = 3; /* ldmda */
13821   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13822     ldm_case = 4; /* ldmdb */
13823   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13824            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13825     ldm_case = 5;
13826   else
13827     return 0;
13828
13829   if (!multiple_operation_profitable_p (false, nops,
13830                                         ldm_case == 5
13831                                         ? unsorted_offsets[order[0]] : 0))
13832     return 0;
13833
13834   return ldm_case;
13835 }
13836
13837 /* Used to determine in a peephole whether a sequence of store instructions can
13838    be changed into a store-multiple instruction.
13839    NOPS is the number of separate store instructions we are examining.
13840    NOPS_TOTAL is the total number of instructions recognized by the peephole
13841    pattern.
13842    The first NOPS entries in OPERANDS are the source registers, the next
13843    NOPS entries are memory operands.  If this function is successful, *BASE is
13844    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13845    to the first memory location's offset from that base register.  REGS is an
13846    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13847    likewise filled with the corresponding rtx's.
13848    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13849    numbers to an ascending order of stores.
13850    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13851    from ascending memory locations, and the function verifies that the register
13852    numbers are themselves ascending.  If CHECK_REGS is false, the register
13853    numbers are stored in the order they are found in the operands.  */
13854 static int
13855 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13856                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13857                          HOST_WIDE_INT *load_offset, bool check_regs)
13858 {
13859   int unsorted_regs[MAX_LDM_STM_OPS];
13860   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13861   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13862   int order[MAX_LDM_STM_OPS];
13863   int base_reg = -1;
13864   rtx base_reg_rtx = NULL;
13865   int i, stm_case;
13866
13867   /* Write back of base register is currently only supported for Thumb 1.  */
13868   int base_writeback = TARGET_THUMB1;
13869
13870   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13871      easily extended if required.  */
13872   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13873
13874   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13875
13876   /* Loop over the operands and check that the memory references are
13877      suitable (i.e. immediate offsets from the same base register).  At
13878      the same time, extract the target register, and the memory
13879      offsets.  */
13880   for (i = 0; i < nops; i++)
13881     {
13882       rtx reg;
13883       rtx offset;
13884
13885       /* Convert a subreg of a mem into the mem itself.  */
13886       if (GET_CODE (operands[nops + i]) == SUBREG)
13887         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13888
13889       gcc_assert (MEM_P (operands[nops + i]));
13890
13891       /* Don't reorder volatile memory references; it doesn't seem worth
13892          looking for the case where the order is ok anyway.  */
13893       if (MEM_VOLATILE_P (operands[nops + i]))
13894         return 0;
13895
13896       offset = const0_rtx;
13897
13898       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13899            || (GET_CODE (reg) == SUBREG
13900                && REG_P (reg = SUBREG_REG (reg))))
13901           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13902               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13903                   || (GET_CODE (reg) == SUBREG
13904                       && REG_P (reg = SUBREG_REG (reg))))
13905               && (CONST_INT_P (offset
13906                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13907         {
13908           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13909                                   ? operands[i] : SUBREG_REG (operands[i]));
13910           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13911
13912           if (i == 0)
13913             {
13914               base_reg = REGNO (reg);
13915               base_reg_rtx = reg;
13916               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13917                 return 0;
13918             }
13919           else if (base_reg != (int) REGNO (reg))
13920             /* Not addressed from the same base register.  */
13921             return 0;
13922
13923           /* If it isn't an integer register, then we can't do this.  */
13924           if (unsorted_regs[i] < 0
13925               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13926               /* The effects are unpredictable if the base register is
13927                  both updated and stored.  */
13928               || (base_writeback && unsorted_regs[i] == base_reg)
13929               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13930               || unsorted_regs[i] > 14)
13931             return 0;
13932
13933           unsorted_offsets[i] = INTVAL (offset);
13934           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13935             order[0] = i;
13936         }
13937       else
13938         /* Not a suitable memory address.  */
13939         return 0;
13940     }
13941
13942   /* All the useful information has now been extracted from the
13943      operands into unsorted_regs and unsorted_offsets; additionally,
13944      order[0] has been set to the lowest offset in the list.  Sort
13945      the offsets into order, verifying that they are adjacent, and
13946      check that the register numbers are ascending.  */
13947   if (!compute_offset_order (nops, unsorted_offsets, order,
13948                              check_regs ? unsorted_regs : NULL))
13949     return 0;
13950
13951   if (saved_order)
13952     memcpy (saved_order, order, sizeof order);
13953
13954   if (base)
13955     {
13956       *base = base_reg;
13957
13958       for (i = 0; i < nops; i++)
13959         {
13960           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13961           if (reg_rtxs)
13962             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13963         }
13964
13965       *load_offset = unsorted_offsets[order[0]];
13966     }
13967
13968   if (TARGET_THUMB1
13969       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13970     return 0;
13971
13972   if (unsorted_offsets[order[0]] == 0)
13973     stm_case = 1; /* stmia */
13974   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13975     stm_case = 2; /* stmib */
13976   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13977     stm_case = 3; /* stmda */
13978   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13979     stm_case = 4; /* stmdb */
13980   else
13981     return 0;
13982
13983   if (!multiple_operation_profitable_p (false, nops, 0))
13984     return 0;
13985
13986   return stm_case;
13987 }
13988 \f
13989 /* Routines for use in generating RTL.  */
13990
13991 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13992    the instruction; REGS and MEMS are arrays containing the operands.
13993    BASEREG is the base register to be used in addressing the memory operands.
13994    WBACK_OFFSET is nonzero if the instruction should update the base
13995    register.  */
13996
13997 static rtx
13998 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13999                          HOST_WIDE_INT wback_offset)
14000 {
14001   int i = 0, j;
14002   rtx result;
14003
14004   if (!multiple_operation_profitable_p (false, count, 0))
14005     {
14006       rtx seq;
14007
14008       start_sequence ();
14009
14010       for (i = 0; i < count; i++)
14011         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14012
14013       if (wback_offset != 0)
14014         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14015
14016       seq = get_insns ();
14017       end_sequence ();
14018
14019       return seq;
14020     }
14021
14022   result = gen_rtx_PARALLEL (VOIDmode,
14023                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14024   if (wback_offset != 0)
14025     {
14026       XVECEXP (result, 0, 0)
14027         = gen_rtx_SET (VOIDmode, basereg,
14028                        plus_constant (Pmode, basereg, wback_offset));
14029       i = 1;
14030       count++;
14031     }
14032
14033   for (j = 0; i < count; i++, j++)
14034     XVECEXP (result, 0, i)
14035       = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
14036
14037   return result;
14038 }
14039
14040 /* Generate a store-multiple instruction.  COUNT is the number of stores in
14041    the instruction; REGS and MEMS are arrays containing the operands.
14042    BASEREG is the base register to be used in addressing the memory operands.
14043    WBACK_OFFSET is nonzero if the instruction should update the base
14044    register.  */
14045
14046 static rtx
14047 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14048                           HOST_WIDE_INT wback_offset)
14049 {
14050   int i = 0, j;
14051   rtx result;
14052
14053   if (GET_CODE (basereg) == PLUS)
14054     basereg = XEXP (basereg, 0);
14055
14056   if (!multiple_operation_profitable_p (false, count, 0))
14057     {
14058       rtx seq;
14059
14060       start_sequence ();
14061
14062       for (i = 0; i < count; i++)
14063         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14064
14065       if (wback_offset != 0)
14066         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14067
14068       seq = get_insns ();
14069       end_sequence ();
14070
14071       return seq;
14072     }
14073
14074   result = gen_rtx_PARALLEL (VOIDmode,
14075                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14076   if (wback_offset != 0)
14077     {
14078       XVECEXP (result, 0, 0)
14079         = gen_rtx_SET (VOIDmode, basereg,
14080                        plus_constant (Pmode, basereg, wback_offset));
14081       i = 1;
14082       count++;
14083     }
14084
14085   for (j = 0; i < count; i++, j++)
14086     XVECEXP (result, 0, i)
14087       = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
14088
14089   return result;
14090 }
14091
14092 /* Generate either a load-multiple or a store-multiple instruction.  This
14093    function can be used in situations where we can start with a single MEM
14094    rtx and adjust its address upwards.
14095    COUNT is the number of operations in the instruction, not counting a
14096    possible update of the base register.  REGS is an array containing the
14097    register operands.
14098    BASEREG is the base register to be used in addressing the memory operands,
14099    which are constructed from BASEMEM.
14100    WRITE_BACK specifies whether the generated instruction should include an
14101    update of the base register.
14102    OFFSETP is used to pass an offset to and from this function; this offset
14103    is not used when constructing the address (instead BASEMEM should have an
14104    appropriate offset in its address), it is used only for setting
14105    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14106
14107 static rtx
14108 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14109                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14110 {
14111   rtx mems[MAX_LDM_STM_OPS];
14112   HOST_WIDE_INT offset = *offsetp;
14113   int i;
14114
14115   gcc_assert (count <= MAX_LDM_STM_OPS);
14116
14117   if (GET_CODE (basereg) == PLUS)
14118     basereg = XEXP (basereg, 0);
14119
14120   for (i = 0; i < count; i++)
14121     {
14122       rtx addr = plus_constant (Pmode, basereg, i * 4);
14123       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14124       offset += 4;
14125     }
14126
14127   if (write_back)
14128     *offsetp = offset;
14129
14130   if (is_load)
14131     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14132                                     write_back ? 4 * count : 0);
14133   else
14134     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14135                                      write_back ? 4 * count : 0);
14136 }
14137
14138 rtx
14139 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14140                        rtx basemem, HOST_WIDE_INT *offsetp)
14141 {
14142   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14143                               offsetp);
14144 }
14145
14146 rtx
14147 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14148                         rtx basemem, HOST_WIDE_INT *offsetp)
14149 {
14150   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14151                               offsetp);
14152 }
14153
14154 /* Called from a peephole2 expander to turn a sequence of loads into an
14155    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14156    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14157    is true if we can reorder the registers because they are used commutatively
14158    subsequently.
14159    Returns true iff we could generate a new instruction.  */
14160
14161 bool
14162 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14163 {
14164   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14165   rtx mems[MAX_LDM_STM_OPS];
14166   int i, j, base_reg;
14167   rtx base_reg_rtx;
14168   HOST_WIDE_INT offset;
14169   int write_back = FALSE;
14170   int ldm_case;
14171   rtx addr;
14172
14173   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14174                                      &base_reg, &offset, !sort_regs);
14175
14176   if (ldm_case == 0)
14177     return false;
14178
14179   if (sort_regs)
14180     for (i = 0; i < nops - 1; i++)
14181       for (j = i + 1; j < nops; j++)
14182         if (regs[i] > regs[j])
14183           {
14184             int t = regs[i];
14185             regs[i] = regs[j];
14186             regs[j] = t;
14187           }
14188   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14189
14190   if (TARGET_THUMB1)
14191     {
14192       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14193       gcc_assert (ldm_case == 1 || ldm_case == 5);
14194       write_back = TRUE;
14195     }
14196
14197   if (ldm_case == 5)
14198     {
14199       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14200       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14201       offset = 0;
14202       if (!TARGET_THUMB1)
14203         {
14204           base_reg = regs[0];
14205           base_reg_rtx = newbase;
14206         }
14207     }
14208
14209   for (i = 0; i < nops; i++)
14210     {
14211       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14212       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14213                                               SImode, addr, 0);
14214     }
14215   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14216                                       write_back ? offset + i * 4 : 0));
14217   return true;
14218 }
14219
14220 /* Called from a peephole2 expander to turn a sequence of stores into an
14221    STM instruction.  OPERANDS are the operands found by the peephole matcher;
14222    NOPS indicates how many separate stores we are trying to combine.
14223    Returns true iff we could generate a new instruction.  */
14224
14225 bool
14226 gen_stm_seq (rtx *operands, int nops)
14227 {
14228   int i;
14229   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14230   rtx mems[MAX_LDM_STM_OPS];
14231   int base_reg;
14232   rtx base_reg_rtx;
14233   HOST_WIDE_INT offset;
14234   int write_back = FALSE;
14235   int stm_case;
14236   rtx addr;
14237   bool base_reg_dies;
14238
14239   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14240                                       mem_order, &base_reg, &offset, true);
14241
14242   if (stm_case == 0)
14243     return false;
14244
14245   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14246
14247   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14248   if (TARGET_THUMB1)
14249     {
14250       gcc_assert (base_reg_dies);
14251       write_back = TRUE;
14252     }
14253
14254   if (stm_case == 5)
14255     {
14256       gcc_assert (base_reg_dies);
14257       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14258       offset = 0;
14259     }
14260
14261   addr = plus_constant (Pmode, base_reg_rtx, offset);
14262
14263   for (i = 0; i < nops; i++)
14264     {
14265       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14266       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14267                                               SImode, addr, 0);
14268     }
14269   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14270                                        write_back ? offset + i * 4 : 0));
14271   return true;
14272 }
14273
14274 /* Called from a peephole2 expander to turn a sequence of stores that are
14275    preceded by constant loads into an STM instruction.  OPERANDS are the
14276    operands found by the peephole matcher; NOPS indicates how many
14277    separate stores we are trying to combine; there are 2 * NOPS
14278    instructions in the peephole.
14279    Returns true iff we could generate a new instruction.  */
14280
14281 bool
14282 gen_const_stm_seq (rtx *operands, int nops)
14283 {
14284   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14285   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14286   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14287   rtx mems[MAX_LDM_STM_OPS];
14288   int base_reg;
14289   rtx base_reg_rtx;
14290   HOST_WIDE_INT offset;
14291   int write_back = FALSE;
14292   int stm_case;
14293   rtx addr;
14294   bool base_reg_dies;
14295   int i, j;
14296   HARD_REG_SET allocated;
14297
14298   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14299                                       mem_order, &base_reg, &offset, false);
14300
14301   if (stm_case == 0)
14302     return false;
14303
14304   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14305
14306   /* If the same register is used more than once, try to find a free
14307      register.  */
14308   CLEAR_HARD_REG_SET (allocated);
14309   for (i = 0; i < nops; i++)
14310     {
14311       for (j = i + 1; j < nops; j++)
14312         if (regs[i] == regs[j])
14313           {
14314             rtx t = peep2_find_free_register (0, nops * 2,
14315                                               TARGET_THUMB1 ? "l" : "r",
14316                                               SImode, &allocated);
14317             if (t == NULL_RTX)
14318               return false;
14319             reg_rtxs[i] = t;
14320             regs[i] = REGNO (t);
14321           }
14322     }
14323
14324   /* Compute an ordering that maps the register numbers to an ascending
14325      sequence.  */
14326   reg_order[0] = 0;
14327   for (i = 0; i < nops; i++)
14328     if (regs[i] < regs[reg_order[0]])
14329       reg_order[0] = i;
14330
14331   for (i = 1; i < nops; i++)
14332     {
14333       int this_order = reg_order[i - 1];
14334       for (j = 0; j < nops; j++)
14335         if (regs[j] > regs[reg_order[i - 1]]
14336             && (this_order == reg_order[i - 1]
14337                 || regs[j] < regs[this_order]))
14338           this_order = j;
14339       reg_order[i] = this_order;
14340     }
14341
14342   /* Ensure that registers that must be live after the instruction end
14343      up with the correct value.  */
14344   for (i = 0; i < nops; i++)
14345     {
14346       int this_order = reg_order[i];
14347       if ((this_order != mem_order[i]
14348            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14349           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14350         return false;
14351     }
14352
14353   /* Load the constants.  */
14354   for (i = 0; i < nops; i++)
14355     {
14356       rtx op = operands[2 * nops + mem_order[i]];
14357       sorted_regs[i] = regs[reg_order[i]];
14358       emit_move_insn (reg_rtxs[reg_order[i]], op);
14359     }
14360
14361   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14362
14363   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14364   if (TARGET_THUMB1)
14365     {
14366       gcc_assert (base_reg_dies);
14367       write_back = TRUE;
14368     }
14369
14370   if (stm_case == 5)
14371     {
14372       gcc_assert (base_reg_dies);
14373       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14374       offset = 0;
14375     }
14376
14377   addr = plus_constant (Pmode, base_reg_rtx, offset);
14378
14379   for (i = 0; i < nops; i++)
14380     {
14381       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14382       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14383                                               SImode, addr, 0);
14384     }
14385   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14386                                        write_back ? offset + i * 4 : 0));
14387   return true;
14388 }
14389
14390 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14391    unaligned copies on processors which support unaligned semantics for those
14392    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14393    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14394    An interleave factor of 1 (the minimum) will perform no interleaving.
14395    Load/store multiple are used for aligned addresses where possible.  */
14396
14397 static void
14398 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14399                                    HOST_WIDE_INT length,
14400                                    unsigned int interleave_factor)
14401 {
14402   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14403   int *regnos = XALLOCAVEC (int, interleave_factor);
14404   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14405   HOST_WIDE_INT i, j;
14406   HOST_WIDE_INT remaining = length, words;
14407   rtx halfword_tmp = NULL, byte_tmp = NULL;
14408   rtx dst, src;
14409   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14410   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14411   HOST_WIDE_INT srcoffset, dstoffset;
14412   HOST_WIDE_INT src_autoinc, dst_autoinc;
14413   rtx mem, addr;
14414
14415   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14416
14417   /* Use hard registers if we have aligned source or destination so we can use
14418      load/store multiple with contiguous registers.  */
14419   if (dst_aligned || src_aligned)
14420     for (i = 0; i < interleave_factor; i++)
14421       regs[i] = gen_rtx_REG (SImode, i);
14422   else
14423     for (i = 0; i < interleave_factor; i++)
14424       regs[i] = gen_reg_rtx (SImode);
14425
14426   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14427   src = copy_addr_to_reg (XEXP (srcbase, 0));
14428
14429   srcoffset = dstoffset = 0;
14430
14431   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14432      For copying the last bytes we want to subtract this offset again.  */
14433   src_autoinc = dst_autoinc = 0;
14434
14435   for (i = 0; i < interleave_factor; i++)
14436     regnos[i] = i;
14437
14438   /* Copy BLOCK_SIZE_BYTES chunks.  */
14439
14440   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14441     {
14442       /* Load words.  */
14443       if (src_aligned && interleave_factor > 1)
14444         {
14445           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14446                                             TRUE, srcbase, &srcoffset));
14447           src_autoinc += UNITS_PER_WORD * interleave_factor;
14448         }
14449       else
14450         {
14451           for (j = 0; j < interleave_factor; j++)
14452             {
14453               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14454                                                  - src_autoinc));
14455               mem = adjust_automodify_address (srcbase, SImode, addr,
14456                                                srcoffset + j * UNITS_PER_WORD);
14457               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14458             }
14459           srcoffset += block_size_bytes;
14460         }
14461
14462       /* Store words.  */
14463       if (dst_aligned && interleave_factor > 1)
14464         {
14465           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14466                                              TRUE, dstbase, &dstoffset));
14467           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14468         }
14469       else
14470         {
14471           for (j = 0; j < interleave_factor; j++)
14472             {
14473               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14474                                                  - dst_autoinc));
14475               mem = adjust_automodify_address (dstbase, SImode, addr,
14476                                                dstoffset + j * UNITS_PER_WORD);
14477               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14478             }
14479           dstoffset += block_size_bytes;
14480         }
14481
14482       remaining -= block_size_bytes;
14483     }
14484
14485   /* Copy any whole words left (note these aren't interleaved with any
14486      subsequent halfword/byte load/stores in the interests of simplicity).  */
14487
14488   words = remaining / UNITS_PER_WORD;
14489
14490   gcc_assert (words < interleave_factor);
14491
14492   if (src_aligned && words > 1)
14493     {
14494       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14495                                         &srcoffset));
14496       src_autoinc += UNITS_PER_WORD * words;
14497     }
14498   else
14499     {
14500       for (j = 0; j < words; j++)
14501         {
14502           addr = plus_constant (Pmode, src,
14503                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14504           mem = adjust_automodify_address (srcbase, SImode, addr,
14505                                            srcoffset + j * UNITS_PER_WORD);
14506           emit_insn (gen_unaligned_loadsi (regs[j], mem));
14507         }
14508       srcoffset += words * UNITS_PER_WORD;
14509     }
14510
14511   if (dst_aligned && words > 1)
14512     {
14513       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14514                                          &dstoffset));
14515       dst_autoinc += words * UNITS_PER_WORD;
14516     }
14517   else
14518     {
14519       for (j = 0; j < words; j++)
14520         {
14521           addr = plus_constant (Pmode, dst,
14522                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14523           mem = adjust_automodify_address (dstbase, SImode, addr,
14524                                            dstoffset + j * UNITS_PER_WORD);
14525           emit_insn (gen_unaligned_storesi (mem, regs[j]));
14526         }
14527       dstoffset += words * UNITS_PER_WORD;
14528     }
14529
14530   remaining -= words * UNITS_PER_WORD;
14531
14532   gcc_assert (remaining < 4);
14533
14534   /* Copy a halfword if necessary.  */
14535
14536   if (remaining >= 2)
14537     {
14538       halfword_tmp = gen_reg_rtx (SImode);
14539
14540       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14541       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14542       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14543
14544       /* Either write out immediately, or delay until we've loaded the last
14545          byte, depending on interleave factor.  */
14546       if (interleave_factor == 1)
14547         {
14548           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14549           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14550           emit_insn (gen_unaligned_storehi (mem,
14551                        gen_lowpart (HImode, halfword_tmp)));
14552           halfword_tmp = NULL;
14553           dstoffset += 2;
14554         }
14555
14556       remaining -= 2;
14557       srcoffset += 2;
14558     }
14559
14560   gcc_assert (remaining < 2);
14561
14562   /* Copy last byte.  */
14563
14564   if ((remaining & 1) != 0)
14565     {
14566       byte_tmp = gen_reg_rtx (SImode);
14567
14568       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14569       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14570       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14571
14572       if (interleave_factor == 1)
14573         {
14574           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14575           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14576           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14577           byte_tmp = NULL;
14578           dstoffset++;
14579         }
14580
14581       remaining--;
14582       srcoffset++;
14583     }
14584
14585   /* Store last halfword if we haven't done so already.  */
14586
14587   if (halfword_tmp)
14588     {
14589       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14590       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14591       emit_insn (gen_unaligned_storehi (mem,
14592                    gen_lowpart (HImode, halfword_tmp)));
14593       dstoffset += 2;
14594     }
14595
14596   /* Likewise for last byte.  */
14597
14598   if (byte_tmp)
14599     {
14600       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14601       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14602       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14603       dstoffset++;
14604     }
14605
14606   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14607 }
14608
14609 /* From mips_adjust_block_mem:
14610
14611    Helper function for doing a loop-based block operation on memory
14612    reference MEM.  Each iteration of the loop will operate on LENGTH
14613    bytes of MEM.
14614
14615    Create a new base register for use within the loop and point it to
14616    the start of MEM.  Create a new memory reference that uses this
14617    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14618
14619 static void
14620 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14621                       rtx *loop_mem)
14622 {
14623   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14624
14625   /* Although the new mem does not refer to a known location,
14626      it does keep up to LENGTH bytes of alignment.  */
14627   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14628   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14629 }
14630
14631 /* From mips_block_move_loop:
14632
14633    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14634    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14635    the memory regions do not overlap.  */
14636
14637 static void
14638 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14639                                unsigned int interleave_factor,
14640                                HOST_WIDE_INT bytes_per_iter)
14641 {
14642   rtx src_reg, dest_reg, final_src, test;
14643   HOST_WIDE_INT leftover;
14644
14645   leftover = length % bytes_per_iter;
14646   length -= leftover;
14647
14648   /* Create registers and memory references for use within the loop.  */
14649   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14650   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14651
14652   /* Calculate the value that SRC_REG should have after the last iteration of
14653      the loop.  */
14654   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14655                                    0, 0, OPTAB_WIDEN);
14656
14657   /* Emit the start of the loop.  */
14658   rtx_code_label *label = gen_label_rtx ();
14659   emit_label (label);
14660
14661   /* Emit the loop body.  */
14662   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14663                                      interleave_factor);
14664
14665   /* Move on to the next block.  */
14666   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14667   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14668
14669   /* Emit the loop condition.  */
14670   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14671   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14672
14673   /* Mop up any left-over bytes.  */
14674   if (leftover)
14675     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14676 }
14677
14678 /* Emit a block move when either the source or destination is unaligned (not
14679    aligned to a four-byte boundary).  This may need further tuning depending on
14680    core type, optimize_size setting, etc.  */
14681
14682 static int
14683 arm_movmemqi_unaligned (rtx *operands)
14684 {
14685   HOST_WIDE_INT length = INTVAL (operands[2]);
14686
14687   if (optimize_size)
14688     {
14689       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14690       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14691       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14692          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14693          or dst_aligned though: allow more interleaving in those cases since the
14694          resulting code can be smaller.  */
14695       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14696       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14697
14698       if (length > 12)
14699         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14700                                        interleave_factor, bytes_per_iter);
14701       else
14702         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14703                                            interleave_factor);
14704     }
14705   else
14706     {
14707       /* Note that the loop created by arm_block_move_unaligned_loop may be
14708          subject to loop unrolling, which makes tuning this condition a little
14709          redundant.  */
14710       if (length > 32)
14711         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14712       else
14713         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14714     }
14715
14716   return 1;
14717 }
14718
14719 int
14720 arm_gen_movmemqi (rtx *operands)
14721 {
14722   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14723   HOST_WIDE_INT srcoffset, dstoffset;
14724   int i;
14725   rtx src, dst, srcbase, dstbase;
14726   rtx part_bytes_reg = NULL;
14727   rtx mem;
14728
14729   if (!CONST_INT_P (operands[2])
14730       || !CONST_INT_P (operands[3])
14731       || INTVAL (operands[2]) > 64)
14732     return 0;
14733
14734   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14735     return arm_movmemqi_unaligned (operands);
14736
14737   if (INTVAL (operands[3]) & 3)
14738     return 0;
14739
14740   dstbase = operands[0];
14741   srcbase = operands[1];
14742
14743   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14744   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14745
14746   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14747   out_words_to_go = INTVAL (operands[2]) / 4;
14748   last_bytes = INTVAL (operands[2]) & 3;
14749   dstoffset = srcoffset = 0;
14750
14751   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14752     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14753
14754   for (i = 0; in_words_to_go >= 2; i+=4)
14755     {
14756       if (in_words_to_go > 4)
14757         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14758                                           TRUE, srcbase, &srcoffset));
14759       else
14760         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14761                                           src, FALSE, srcbase,
14762                                           &srcoffset));
14763
14764       if (out_words_to_go)
14765         {
14766           if (out_words_to_go > 4)
14767             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14768                                                TRUE, dstbase, &dstoffset));
14769           else if (out_words_to_go != 1)
14770             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14771                                                out_words_to_go, dst,
14772                                                (last_bytes == 0
14773                                                 ? FALSE : TRUE),
14774                                                dstbase, &dstoffset));
14775           else
14776             {
14777               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14778               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14779               if (last_bytes != 0)
14780                 {
14781                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14782                   dstoffset += 4;
14783                 }
14784             }
14785         }
14786
14787       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14788       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14789     }
14790
14791   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14792   if (out_words_to_go)
14793     {
14794       rtx sreg;
14795
14796       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14797       sreg = copy_to_reg (mem);
14798
14799       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14800       emit_move_insn (mem, sreg);
14801       in_words_to_go--;
14802
14803       gcc_assert (!in_words_to_go);     /* Sanity check */
14804     }
14805
14806   if (in_words_to_go)
14807     {
14808       gcc_assert (in_words_to_go > 0);
14809
14810       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14811       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14812     }
14813
14814   gcc_assert (!last_bytes || part_bytes_reg);
14815
14816   if (BYTES_BIG_ENDIAN && last_bytes)
14817     {
14818       rtx tmp = gen_reg_rtx (SImode);
14819
14820       /* The bytes we want are in the top end of the word.  */
14821       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14822                               GEN_INT (8 * (4 - last_bytes))));
14823       part_bytes_reg = tmp;
14824
14825       while (last_bytes)
14826         {
14827           mem = adjust_automodify_address (dstbase, QImode,
14828                                            plus_constant (Pmode, dst,
14829                                                           last_bytes - 1),
14830                                            dstoffset + last_bytes - 1);
14831           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14832
14833           if (--last_bytes)
14834             {
14835               tmp = gen_reg_rtx (SImode);
14836               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14837               part_bytes_reg = tmp;
14838             }
14839         }
14840
14841     }
14842   else
14843     {
14844       if (last_bytes > 1)
14845         {
14846           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14847           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14848           last_bytes -= 2;
14849           if (last_bytes)
14850             {
14851               rtx tmp = gen_reg_rtx (SImode);
14852               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14853               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14854               part_bytes_reg = tmp;
14855               dstoffset += 2;
14856             }
14857         }
14858
14859       if (last_bytes)
14860         {
14861           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14862           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14863         }
14864     }
14865
14866   return 1;
14867 }
14868
14869 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14870 by mode size.  */
14871 inline static rtx
14872 next_consecutive_mem (rtx mem)
14873 {
14874   machine_mode mode = GET_MODE (mem);
14875   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14876   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14877
14878   return adjust_automodify_address (mem, mode, addr, offset);
14879 }
14880
14881 /* Copy using LDRD/STRD instructions whenever possible.
14882    Returns true upon success. */
14883 bool
14884 gen_movmem_ldrd_strd (rtx *operands)
14885 {
14886   unsigned HOST_WIDE_INT len;
14887   HOST_WIDE_INT align;
14888   rtx src, dst, base;
14889   rtx reg0;
14890   bool src_aligned, dst_aligned;
14891   bool src_volatile, dst_volatile;
14892
14893   gcc_assert (CONST_INT_P (operands[2]));
14894   gcc_assert (CONST_INT_P (operands[3]));
14895
14896   len = UINTVAL (operands[2]);
14897   if (len > 64)
14898     return false;
14899
14900   /* Maximum alignment we can assume for both src and dst buffers.  */
14901   align = INTVAL (operands[3]);
14902
14903   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14904     return false;
14905
14906   /* Place src and dst addresses in registers
14907      and update the corresponding mem rtx.  */
14908   dst = operands[0];
14909   dst_volatile = MEM_VOLATILE_P (dst);
14910   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14911   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14912   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14913
14914   src = operands[1];
14915   src_volatile = MEM_VOLATILE_P (src);
14916   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14917   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14918   src = adjust_automodify_address (src, VOIDmode, base, 0);
14919
14920   if (!unaligned_access && !(src_aligned && dst_aligned))
14921     return false;
14922
14923   if (src_volatile || dst_volatile)
14924     return false;
14925
14926   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14927   if (!(dst_aligned || src_aligned))
14928     return arm_gen_movmemqi (operands);
14929
14930   src = adjust_address (src, DImode, 0);
14931   dst = adjust_address (dst, DImode, 0);
14932   while (len >= 8)
14933     {
14934       len -= 8;
14935       reg0 = gen_reg_rtx (DImode);
14936       if (src_aligned)
14937         emit_move_insn (reg0, src);
14938       else
14939         emit_insn (gen_unaligned_loaddi (reg0, src));
14940
14941       if (dst_aligned)
14942         emit_move_insn (dst, reg0);
14943       else
14944         emit_insn (gen_unaligned_storedi (dst, reg0));
14945
14946       src = next_consecutive_mem (src);
14947       dst = next_consecutive_mem (dst);
14948     }
14949
14950   gcc_assert (len < 8);
14951   if (len >= 4)
14952     {
14953       /* More than a word but less than a double-word to copy.  Copy a word.  */
14954       reg0 = gen_reg_rtx (SImode);
14955       src = adjust_address (src, SImode, 0);
14956       dst = adjust_address (dst, SImode, 0);
14957       if (src_aligned)
14958         emit_move_insn (reg0, src);
14959       else
14960         emit_insn (gen_unaligned_loadsi (reg0, src));
14961
14962       if (dst_aligned)
14963         emit_move_insn (dst, reg0);
14964       else
14965         emit_insn (gen_unaligned_storesi (dst, reg0));
14966
14967       src = next_consecutive_mem (src);
14968       dst = next_consecutive_mem (dst);
14969       len -= 4;
14970     }
14971
14972   if (len == 0)
14973     return true;
14974
14975   /* Copy the remaining bytes.  */
14976   if (len >= 2)
14977     {
14978       dst = adjust_address (dst, HImode, 0);
14979       src = adjust_address (src, HImode, 0);
14980       reg0 = gen_reg_rtx (SImode);
14981       if (src_aligned)
14982         emit_insn (gen_zero_extendhisi2 (reg0, src));
14983       else
14984         emit_insn (gen_unaligned_loadhiu (reg0, src));
14985
14986       if (dst_aligned)
14987         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14988       else
14989         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14990
14991       src = next_consecutive_mem (src);
14992       dst = next_consecutive_mem (dst);
14993       if (len == 2)
14994         return true;
14995     }
14996
14997   dst = adjust_address (dst, QImode, 0);
14998   src = adjust_address (src, QImode, 0);
14999   reg0 = gen_reg_rtx (QImode);
15000   emit_move_insn (reg0, src);
15001   emit_move_insn (dst, reg0);
15002   return true;
15003 }
15004
15005 /* Select a dominance comparison mode if possible for a test of the general
15006    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
15007    COND_OR == DOM_CC_X_AND_Y => (X && Y)
15008    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15009    COND_OR == DOM_CC_X_OR_Y => (X || Y)
15010    In all cases OP will be either EQ or NE, but we don't need to know which
15011    here.  If we are unable to support a dominance comparison we return
15012    CC mode.  This will then fail to match for the RTL expressions that
15013    generate this call.  */
15014 machine_mode
15015 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15016 {
15017   enum rtx_code cond1, cond2;
15018   int swapped = 0;
15019
15020   /* Currently we will probably get the wrong result if the individual
15021      comparisons are not simple.  This also ensures that it is safe to
15022      reverse a comparison if necessary.  */
15023   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15024        != CCmode)
15025       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15026           != CCmode))
15027     return CCmode;
15028
15029   /* The if_then_else variant of this tests the second condition if the
15030      first passes, but is true if the first fails.  Reverse the first
15031      condition to get a true "inclusive-or" expression.  */
15032   if (cond_or == DOM_CC_NX_OR_Y)
15033     cond1 = reverse_condition (cond1);
15034
15035   /* If the comparisons are not equal, and one doesn't dominate the other,
15036      then we can't do this.  */
15037   if (cond1 != cond2
15038       && !comparison_dominates_p (cond1, cond2)
15039       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15040     return CCmode;
15041
15042   if (swapped)
15043     std::swap (cond1, cond2);
15044
15045   switch (cond1)
15046     {
15047     case EQ:
15048       if (cond_or == DOM_CC_X_AND_Y)
15049         return CC_DEQmode;
15050
15051       switch (cond2)
15052         {
15053         case EQ: return CC_DEQmode;
15054         case LE: return CC_DLEmode;
15055         case LEU: return CC_DLEUmode;
15056         case GE: return CC_DGEmode;
15057         case GEU: return CC_DGEUmode;
15058         default: gcc_unreachable ();
15059         }
15060
15061     case LT:
15062       if (cond_or == DOM_CC_X_AND_Y)
15063         return CC_DLTmode;
15064
15065       switch (cond2)
15066         {
15067         case  LT:
15068             return CC_DLTmode;
15069         case LE:
15070           return CC_DLEmode;
15071         case NE:
15072           return CC_DNEmode;
15073         default:
15074           gcc_unreachable ();
15075         }
15076
15077     case GT:
15078       if (cond_or == DOM_CC_X_AND_Y)
15079         return CC_DGTmode;
15080
15081       switch (cond2)
15082         {
15083         case GT:
15084           return CC_DGTmode;
15085         case GE:
15086           return CC_DGEmode;
15087         case NE:
15088           return CC_DNEmode;
15089         default:
15090           gcc_unreachable ();
15091         }
15092
15093     case LTU:
15094       if (cond_or == DOM_CC_X_AND_Y)
15095         return CC_DLTUmode;
15096
15097       switch (cond2)
15098         {
15099         case LTU:
15100           return CC_DLTUmode;
15101         case LEU:
15102           return CC_DLEUmode;
15103         case NE:
15104           return CC_DNEmode;
15105         default:
15106           gcc_unreachable ();
15107         }
15108
15109     case GTU:
15110       if (cond_or == DOM_CC_X_AND_Y)
15111         return CC_DGTUmode;
15112
15113       switch (cond2)
15114         {
15115         case GTU:
15116           return CC_DGTUmode;
15117         case GEU:
15118           return CC_DGEUmode;
15119         case NE:
15120           return CC_DNEmode;
15121         default:
15122           gcc_unreachable ();
15123         }
15124
15125     /* The remaining cases only occur when both comparisons are the
15126        same.  */
15127     case NE:
15128       gcc_assert (cond1 == cond2);
15129       return CC_DNEmode;
15130
15131     case LE:
15132       gcc_assert (cond1 == cond2);
15133       return CC_DLEmode;
15134
15135     case GE:
15136       gcc_assert (cond1 == cond2);
15137       return CC_DGEmode;
15138
15139     case LEU:
15140       gcc_assert (cond1 == cond2);
15141       return CC_DLEUmode;
15142
15143     case GEU:
15144       gcc_assert (cond1 == cond2);
15145       return CC_DGEUmode;
15146
15147     default:
15148       gcc_unreachable ();
15149     }
15150 }
15151
15152 machine_mode
15153 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15154 {
15155   /* All floating point compares return CCFP if it is an equality
15156      comparison, and CCFPE otherwise.  */
15157   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15158     {
15159       switch (op)
15160         {
15161         case EQ:
15162         case NE:
15163         case UNORDERED:
15164         case ORDERED:
15165         case UNLT:
15166         case UNLE:
15167         case UNGT:
15168         case UNGE:
15169         case UNEQ:
15170         case LTGT:
15171           return CCFPmode;
15172
15173         case LT:
15174         case LE:
15175         case GT:
15176         case GE:
15177           return CCFPEmode;
15178
15179         default:
15180           gcc_unreachable ();
15181         }
15182     }
15183
15184   /* A compare with a shifted operand.  Because of canonicalization, the
15185      comparison will have to be swapped when we emit the assembler.  */
15186   if (GET_MODE (y) == SImode
15187       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15188       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15189           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15190           || GET_CODE (x) == ROTATERT))
15191     return CC_SWPmode;
15192
15193   /* This operation is performed swapped, but since we only rely on the Z
15194      flag we don't need an additional mode.  */
15195   if (GET_MODE (y) == SImode
15196       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15197       && GET_CODE (x) == NEG
15198       && (op == EQ || op == NE))
15199     return CC_Zmode;
15200
15201   /* This is a special case that is used by combine to allow a
15202      comparison of a shifted byte load to be split into a zero-extend
15203      followed by a comparison of the shifted integer (only valid for
15204      equalities and unsigned inequalities).  */
15205   if (GET_MODE (x) == SImode
15206       && GET_CODE (x) == ASHIFT
15207       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15208       && GET_CODE (XEXP (x, 0)) == SUBREG
15209       && MEM_P (SUBREG_REG (XEXP (x, 0)))
15210       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15211       && (op == EQ || op == NE
15212           || op == GEU || op == GTU || op == LTU || op == LEU)
15213       && CONST_INT_P (y))
15214     return CC_Zmode;
15215
15216   /* A construct for a conditional compare, if the false arm contains
15217      0, then both conditions must be true, otherwise either condition
15218      must be true.  Not all conditions are possible, so CCmode is
15219      returned if it can't be done.  */
15220   if (GET_CODE (x) == IF_THEN_ELSE
15221       && (XEXP (x, 2) == const0_rtx
15222           || XEXP (x, 2) == const1_rtx)
15223       && COMPARISON_P (XEXP (x, 0))
15224       && COMPARISON_P (XEXP (x, 1)))
15225     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15226                                          INTVAL (XEXP (x, 2)));
15227
15228   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
15229   if (GET_CODE (x) == AND
15230       && (op == EQ || op == NE)
15231       && COMPARISON_P (XEXP (x, 0))
15232       && COMPARISON_P (XEXP (x, 1)))
15233     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15234                                          DOM_CC_X_AND_Y);
15235
15236   if (GET_CODE (x) == IOR
15237       && (op == EQ || op == NE)
15238       && COMPARISON_P (XEXP (x, 0))
15239       && COMPARISON_P (XEXP (x, 1)))
15240     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15241                                          DOM_CC_X_OR_Y);
15242
15243   /* An operation (on Thumb) where we want to test for a single bit.
15244      This is done by shifting that bit up into the top bit of a
15245      scratch register; we can then branch on the sign bit.  */
15246   if (TARGET_THUMB1
15247       && GET_MODE (x) == SImode
15248       && (op == EQ || op == NE)
15249       && GET_CODE (x) == ZERO_EXTRACT
15250       && XEXP (x, 1) == const1_rtx)
15251     return CC_Nmode;
15252
15253   /* An operation that sets the condition codes as a side-effect, the
15254      V flag is not set correctly, so we can only use comparisons where
15255      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15256      instead.)  */
15257   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15258   if (GET_MODE (x) == SImode
15259       && y == const0_rtx
15260       && (op == EQ || op == NE || op == LT || op == GE)
15261       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15262           || GET_CODE (x) == AND || GET_CODE (x) == IOR
15263           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15264           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15265           || GET_CODE (x) == LSHIFTRT
15266           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15267           || GET_CODE (x) == ROTATERT
15268           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15269     return CC_NOOVmode;
15270
15271   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15272     return CC_Zmode;
15273
15274   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15275       && GET_CODE (x) == PLUS
15276       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15277     return CC_Cmode;
15278
15279   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15280     {
15281       switch (op)
15282         {
15283         case EQ:
15284         case NE:
15285           /* A DImode comparison against zero can be implemented by
15286              or'ing the two halves together.  */
15287           if (y == const0_rtx)
15288             return CC_Zmode;
15289
15290           /* We can do an equality test in three Thumb instructions.  */
15291           if (!TARGET_32BIT)
15292             return CC_Zmode;
15293
15294           /* FALLTHROUGH */
15295
15296         case LTU:
15297         case LEU:
15298         case GTU:
15299         case GEU:
15300           /* DImode unsigned comparisons can be implemented by cmp +
15301              cmpeq without a scratch register.  Not worth doing in
15302              Thumb-2.  */
15303           if (TARGET_32BIT)
15304             return CC_CZmode;
15305
15306           /* FALLTHROUGH */
15307
15308         case LT:
15309         case LE:
15310         case GT:
15311         case GE:
15312           /* DImode signed and unsigned comparisons can be implemented
15313              by cmp + sbcs with a scratch register, but that does not
15314              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
15315           gcc_assert (op != EQ && op != NE);
15316           return CC_NCVmode;
15317
15318         default:
15319           gcc_unreachable ();
15320         }
15321     }
15322
15323   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15324     return GET_MODE (x);
15325
15326   return CCmode;
15327 }
15328
15329 /* X and Y are two things to compare using CODE.  Emit the compare insn and
15330    return the rtx for register 0 in the proper mode.  FP means this is a
15331    floating point compare: I don't think that it is needed on the arm.  */
15332 rtx
15333 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15334 {
15335   machine_mode mode;
15336   rtx cc_reg;
15337   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15338
15339   /* We might have X as a constant, Y as a register because of the predicates
15340      used for cmpdi.  If so, force X to a register here.  */
15341   if (dimode_comparison && !REG_P (x))
15342     x = force_reg (DImode, x);
15343
15344   mode = SELECT_CC_MODE (code, x, y);
15345   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15346
15347   if (dimode_comparison
15348       && mode != CC_CZmode)
15349     {
15350       rtx clobber, set;
15351
15352       /* To compare two non-zero values for equality, XOR them and
15353          then compare against zero.  Not used for ARM mode; there
15354          CC_CZmode is cheaper.  */
15355       if (mode == CC_Zmode && y != const0_rtx)
15356         {
15357           gcc_assert (!reload_completed);
15358           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15359           y = const0_rtx;
15360         }
15361
15362       /* A scratch register is required.  */
15363       if (reload_completed)
15364         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15365       else
15366         scratch = gen_rtx_SCRATCH (SImode);
15367
15368       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15369       set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15370       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15371     }
15372   else
15373     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15374
15375   return cc_reg;
15376 }
15377
15378 /* Generate a sequence of insns that will generate the correct return
15379    address mask depending on the physical architecture that the program
15380    is running on.  */
15381 rtx
15382 arm_gen_return_addr_mask (void)
15383 {
15384   rtx reg = gen_reg_rtx (Pmode);
15385
15386   emit_insn (gen_return_addr_mask (reg));
15387   return reg;
15388 }
15389
15390 void
15391 arm_reload_in_hi (rtx *operands)
15392 {
15393   rtx ref = operands[1];
15394   rtx base, scratch;
15395   HOST_WIDE_INT offset = 0;
15396
15397   if (GET_CODE (ref) == SUBREG)
15398     {
15399       offset = SUBREG_BYTE (ref);
15400       ref = SUBREG_REG (ref);
15401     }
15402
15403   if (REG_P (ref))
15404     {
15405       /* We have a pseudo which has been spilt onto the stack; there
15406          are two cases here: the first where there is a simple
15407          stack-slot replacement and a second where the stack-slot is
15408          out of range, or is used as a subreg.  */
15409       if (reg_equiv_mem (REGNO (ref)))
15410         {
15411           ref = reg_equiv_mem (REGNO (ref));
15412           base = find_replacement (&XEXP (ref, 0));
15413         }
15414       else
15415         /* The slot is out of range, or was dressed up in a SUBREG.  */
15416         base = reg_equiv_address (REGNO (ref));
15417     }
15418   else
15419     base = find_replacement (&XEXP (ref, 0));
15420
15421   /* Handle the case where the address is too complex to be offset by 1.  */
15422   if (GET_CODE (base) == MINUS
15423       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15424     {
15425       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15426
15427       emit_set_insn (base_plus, base);
15428       base = base_plus;
15429     }
15430   else if (GET_CODE (base) == PLUS)
15431     {
15432       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15433       HOST_WIDE_INT hi, lo;
15434
15435       offset += INTVAL (XEXP (base, 1));
15436       base = XEXP (base, 0);
15437
15438       /* Rework the address into a legal sequence of insns.  */
15439       /* Valid range for lo is -4095 -> 4095 */
15440       lo = (offset >= 0
15441             ? (offset & 0xfff)
15442             : -((-offset) & 0xfff));
15443
15444       /* Corner case, if lo is the max offset then we would be out of range
15445          once we have added the additional 1 below, so bump the msb into the
15446          pre-loading insn(s).  */
15447       if (lo == 4095)
15448         lo &= 0x7ff;
15449
15450       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15451              ^ (HOST_WIDE_INT) 0x80000000)
15452             - (HOST_WIDE_INT) 0x80000000);
15453
15454       gcc_assert (hi + lo == offset);
15455
15456       if (hi != 0)
15457         {
15458           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15459
15460           /* Get the base address; addsi3 knows how to handle constants
15461              that require more than one insn.  */
15462           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15463           base = base_plus;
15464           offset = lo;
15465         }
15466     }
15467
15468   /* Operands[2] may overlap operands[0] (though it won't overlap
15469      operands[1]), that's why we asked for a DImode reg -- so we can
15470      use the bit that does not overlap.  */
15471   if (REGNO (operands[2]) == REGNO (operands[0]))
15472     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15473   else
15474     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15475
15476   emit_insn (gen_zero_extendqisi2 (scratch,
15477                                    gen_rtx_MEM (QImode,
15478                                                 plus_constant (Pmode, base,
15479                                                                offset))));
15480   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15481                                    gen_rtx_MEM (QImode,
15482                                                 plus_constant (Pmode, base,
15483                                                                offset + 1))));
15484   if (!BYTES_BIG_ENDIAN)
15485     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15486                    gen_rtx_IOR (SImode,
15487                                 gen_rtx_ASHIFT
15488                                 (SImode,
15489                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15490                                  GEN_INT (8)),
15491                                 scratch));
15492   else
15493     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15494                    gen_rtx_IOR (SImode,
15495                                 gen_rtx_ASHIFT (SImode, scratch,
15496                                                 GEN_INT (8)),
15497                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15498 }
15499
15500 /* Handle storing a half-word to memory during reload by synthesizing as two
15501    byte stores.  Take care not to clobber the input values until after we
15502    have moved them somewhere safe.  This code assumes that if the DImode
15503    scratch in operands[2] overlaps either the input value or output address
15504    in some way, then that value must die in this insn (we absolutely need
15505    two scratch registers for some corner cases).  */
15506 void
15507 arm_reload_out_hi (rtx *operands)
15508 {
15509   rtx ref = operands[0];
15510   rtx outval = operands[1];
15511   rtx base, scratch;
15512   HOST_WIDE_INT offset = 0;
15513
15514   if (GET_CODE (ref) == SUBREG)
15515     {
15516       offset = SUBREG_BYTE (ref);
15517       ref = SUBREG_REG (ref);
15518     }
15519
15520   if (REG_P (ref))
15521     {
15522       /* We have a pseudo which has been spilt onto the stack; there
15523          are two cases here: the first where there is a simple
15524          stack-slot replacement and a second where the stack-slot is
15525          out of range, or is used as a subreg.  */
15526       if (reg_equiv_mem (REGNO (ref)))
15527         {
15528           ref = reg_equiv_mem (REGNO (ref));
15529           base = find_replacement (&XEXP (ref, 0));
15530         }
15531       else
15532         /* The slot is out of range, or was dressed up in a SUBREG.  */
15533         base = reg_equiv_address (REGNO (ref));
15534     }
15535   else
15536     base = find_replacement (&XEXP (ref, 0));
15537
15538   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15539
15540   /* Handle the case where the address is too complex to be offset by 1.  */
15541   if (GET_CODE (base) == MINUS
15542       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15543     {
15544       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15545
15546       /* Be careful not to destroy OUTVAL.  */
15547       if (reg_overlap_mentioned_p (base_plus, outval))
15548         {
15549           /* Updating base_plus might destroy outval, see if we can
15550              swap the scratch and base_plus.  */
15551           if (!reg_overlap_mentioned_p (scratch, outval))
15552             std::swap (scratch, base_plus);
15553           else
15554             {
15555               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15556
15557               /* Be conservative and copy OUTVAL into the scratch now,
15558                  this should only be necessary if outval is a subreg
15559                  of something larger than a word.  */
15560               /* XXX Might this clobber base?  I can't see how it can,
15561                  since scratch is known to overlap with OUTVAL, and
15562                  must be wider than a word.  */
15563               emit_insn (gen_movhi (scratch_hi, outval));
15564               outval = scratch_hi;
15565             }
15566         }
15567
15568       emit_set_insn (base_plus, base);
15569       base = base_plus;
15570     }
15571   else if (GET_CODE (base) == PLUS)
15572     {
15573       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15574       HOST_WIDE_INT hi, lo;
15575
15576       offset += INTVAL (XEXP (base, 1));
15577       base = XEXP (base, 0);
15578
15579       /* Rework the address into a legal sequence of insns.  */
15580       /* Valid range for lo is -4095 -> 4095 */
15581       lo = (offset >= 0
15582             ? (offset & 0xfff)
15583             : -((-offset) & 0xfff));
15584
15585       /* Corner case, if lo is the max offset then we would be out of range
15586          once we have added the additional 1 below, so bump the msb into the
15587          pre-loading insn(s).  */
15588       if (lo == 4095)
15589         lo &= 0x7ff;
15590
15591       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15592              ^ (HOST_WIDE_INT) 0x80000000)
15593             - (HOST_WIDE_INT) 0x80000000);
15594
15595       gcc_assert (hi + lo == offset);
15596
15597       if (hi != 0)
15598         {
15599           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15600
15601           /* Be careful not to destroy OUTVAL.  */
15602           if (reg_overlap_mentioned_p (base_plus, outval))
15603             {
15604               /* Updating base_plus might destroy outval, see if we
15605                  can swap the scratch and base_plus.  */
15606               if (!reg_overlap_mentioned_p (scratch, outval))
15607                 std::swap (scratch, base_plus);
15608               else
15609                 {
15610                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15611
15612                   /* Be conservative and copy outval into scratch now,
15613                      this should only be necessary if outval is a
15614                      subreg of something larger than a word.  */
15615                   /* XXX Might this clobber base?  I can't see how it
15616                      can, since scratch is known to overlap with
15617                      outval.  */
15618                   emit_insn (gen_movhi (scratch_hi, outval));
15619                   outval = scratch_hi;
15620                 }
15621             }
15622
15623           /* Get the base address; addsi3 knows how to handle constants
15624              that require more than one insn.  */
15625           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15626           base = base_plus;
15627           offset = lo;
15628         }
15629     }
15630
15631   if (BYTES_BIG_ENDIAN)
15632     {
15633       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15634                                          plus_constant (Pmode, base,
15635                                                         offset + 1)),
15636                             gen_lowpart (QImode, outval)));
15637       emit_insn (gen_lshrsi3 (scratch,
15638                               gen_rtx_SUBREG (SImode, outval, 0),
15639                               GEN_INT (8)));
15640       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15641                                                                 offset)),
15642                             gen_lowpart (QImode, scratch)));
15643     }
15644   else
15645     {
15646       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15647                                                                 offset)),
15648                             gen_lowpart (QImode, outval)));
15649       emit_insn (gen_lshrsi3 (scratch,
15650                               gen_rtx_SUBREG (SImode, outval, 0),
15651                               GEN_INT (8)));
15652       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15653                                          plus_constant (Pmode, base,
15654                                                         offset + 1)),
15655                             gen_lowpart (QImode, scratch)));
15656     }
15657 }
15658
15659 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15660    (padded to the size of a word) should be passed in a register.  */
15661
15662 static bool
15663 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15664 {
15665   if (TARGET_AAPCS_BASED)
15666     return must_pass_in_stack_var_size (mode, type);
15667   else
15668     return must_pass_in_stack_var_size_or_pad (mode, type);
15669 }
15670
15671
15672 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15673    Return true if an argument passed on the stack should be padded upwards,
15674    i.e. if the least-significant byte has useful data.
15675    For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
15676    aggregate types are placed in the lowest memory address.  */
15677
15678 bool
15679 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15680 {
15681   if (!TARGET_AAPCS_BASED)
15682     return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15683
15684   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15685     return false;
15686
15687   return true;
15688 }
15689
15690
15691 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15692    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15693    register has useful data, and return the opposite if the most
15694    significant byte does.  */
15695
15696 bool
15697 arm_pad_reg_upward (machine_mode mode,
15698                     tree type, int first ATTRIBUTE_UNUSED)
15699 {
15700   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15701     {
15702       /* For AAPCS, small aggregates, small fixed-point types,
15703          and small complex types are always padded upwards.  */
15704       if (type)
15705         {
15706           if ((AGGREGATE_TYPE_P (type)
15707                || TREE_CODE (type) == COMPLEX_TYPE
15708                || FIXED_POINT_TYPE_P (type))
15709               && int_size_in_bytes (type) <= 4)
15710             return true;
15711         }
15712       else
15713         {
15714           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15715               && GET_MODE_SIZE (mode) <= 4)
15716             return true;
15717         }
15718     }
15719
15720   /* Otherwise, use default padding.  */
15721   return !BYTES_BIG_ENDIAN;
15722 }
15723
15724 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15725    assuming that the address in the base register is word aligned.  */
15726 bool
15727 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15728 {
15729   HOST_WIDE_INT max_offset;
15730
15731   /* Offset must be a multiple of 4 in Thumb mode.  */
15732   if (TARGET_THUMB2 && ((offset & 3) != 0))
15733     return false;
15734
15735   if (TARGET_THUMB2)
15736     max_offset = 1020;
15737   else if (TARGET_ARM)
15738     max_offset = 255;
15739   else
15740     return false;
15741
15742   return ((offset <= max_offset) && (offset >= -max_offset));
15743 }
15744
15745 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15746    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15747    Assumes that the address in the base register RN is word aligned.  Pattern
15748    guarantees that both memory accesses use the same base register,
15749    the offsets are constants within the range, and the gap between the offsets is 4.
15750    If preload complete then check that registers are legal.  WBACK indicates whether
15751    address is updated.  LOAD indicates whether memory access is load or store.  */
15752 bool
15753 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15754                        bool wback, bool load)
15755 {
15756   unsigned int t, t2, n;
15757
15758   if (!reload_completed)
15759     return true;
15760
15761   if (!offset_ok_for_ldrd_strd (offset))
15762     return false;
15763
15764   t = REGNO (rt);
15765   t2 = REGNO (rt2);
15766   n = REGNO (rn);
15767
15768   if ((TARGET_THUMB2)
15769       && ((wback && (n == t || n == t2))
15770           || (t == SP_REGNUM)
15771           || (t == PC_REGNUM)
15772           || (t2 == SP_REGNUM)
15773           || (t2 == PC_REGNUM)
15774           || (!load && (n == PC_REGNUM))
15775           || (load && (t == t2))
15776           /* Triggers Cortex-M3 LDRD errata.  */
15777           || (!wback && load && fix_cm3_ldrd && (n == t))))
15778     return false;
15779
15780   if ((TARGET_ARM)
15781       && ((wback && (n == t || n == t2))
15782           || (t2 == PC_REGNUM)
15783           || (t % 2 != 0)   /* First destination register is not even.  */
15784           || (t2 != t + 1)
15785           /* PC can be used as base register (for offset addressing only),
15786              but it is depricated.  */
15787           || (n == PC_REGNUM)))
15788     return false;
15789
15790   return true;
15791 }
15792
15793 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15794    operand MEM's address contains an immediate offset from the base
15795    register and has no side effects, in which case it sets BASE and
15796    OFFSET accordingly.  */
15797 static bool
15798 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15799 {
15800   rtx addr;
15801
15802   gcc_assert (base != NULL && offset != NULL);
15803
15804   /* TODO: Handle more general memory operand patterns, such as
15805      PRE_DEC and PRE_INC.  */
15806
15807   if (side_effects_p (mem))
15808     return false;
15809
15810   /* Can't deal with subregs.  */
15811   if (GET_CODE (mem) == SUBREG)
15812     return false;
15813
15814   gcc_assert (MEM_P (mem));
15815
15816   *offset = const0_rtx;
15817
15818   addr = XEXP (mem, 0);
15819
15820   /* If addr isn't valid for DImode, then we can't handle it.  */
15821   if (!arm_legitimate_address_p (DImode, addr,
15822                                  reload_in_progress || reload_completed))
15823     return false;
15824
15825   if (REG_P (addr))
15826     {
15827       *base = addr;
15828       return true;
15829     }
15830   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15831     {
15832       *base = XEXP (addr, 0);
15833       *offset = XEXP (addr, 1);
15834       return (REG_P (*base) && CONST_INT_P (*offset));
15835     }
15836
15837   return false;
15838 }
15839
15840 /* Called from a peephole2 to replace two word-size accesses with a
15841    single LDRD/STRD instruction.  Returns true iff we can generate a
15842    new instruction sequence.  That is, both accesses use the same base
15843    register and the gap between constant offsets is 4.  This function
15844    may reorder its operands to match ldrd/strd RTL templates.
15845    OPERANDS are the operands found by the peephole matcher;
15846    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15847    corresponding memory operands.  LOAD indicaates whether the access
15848    is load or store.  CONST_STORE indicates a store of constant
15849    integer values held in OPERANDS[4,5] and assumes that the pattern
15850    is of length 4 insn, for the purpose of checking dead registers.
15851    COMMUTE indicates that register operands may be reordered.  */
15852 bool
15853 gen_operands_ldrd_strd (rtx *operands, bool load,
15854                         bool const_store, bool commute)
15855 {
15856   int nops = 2;
15857   HOST_WIDE_INT offsets[2], offset;
15858   rtx base = NULL_RTX;
15859   rtx cur_base, cur_offset, tmp;
15860   int i, gap;
15861   HARD_REG_SET regset;
15862
15863   gcc_assert (!const_store || !load);
15864   /* Check that the memory references are immediate offsets from the
15865      same base register.  Extract the base register, the destination
15866      registers, and the corresponding memory offsets.  */
15867   for (i = 0; i < nops; i++)
15868     {
15869       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15870         return false;
15871
15872       if (i == 0)
15873         base = cur_base;
15874       else if (REGNO (base) != REGNO (cur_base))
15875         return false;
15876
15877       offsets[i] = INTVAL (cur_offset);
15878       if (GET_CODE (operands[i]) == SUBREG)
15879         {
15880           tmp = SUBREG_REG (operands[i]);
15881           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15882           operands[i] = tmp;
15883         }
15884     }
15885
15886   /* Make sure there is no dependency between the individual loads.  */
15887   if (load && REGNO (operands[0]) == REGNO (base))
15888     return false; /* RAW */
15889
15890   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15891     return false; /* WAW */
15892
15893   /* If the same input register is used in both stores
15894      when storing different constants, try to find a free register.
15895      For example, the code
15896         mov r0, 0
15897         str r0, [r2]
15898         mov r0, 1
15899         str r0, [r2, #4]
15900      can be transformed into
15901         mov r1, 0
15902         strd r1, r0, [r2]
15903      in Thumb mode assuming that r1 is free.  */
15904   if (const_store
15905       && REGNO (operands[0]) == REGNO (operands[1])
15906       && INTVAL (operands[4]) != INTVAL (operands[5]))
15907     {
15908     if (TARGET_THUMB2)
15909       {
15910         CLEAR_HARD_REG_SET (regset);
15911         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15912         if (tmp == NULL_RTX)
15913           return false;
15914
15915         /* Use the new register in the first load to ensure that
15916            if the original input register is not dead after peephole,
15917            then it will have the correct constant value.  */
15918         operands[0] = tmp;
15919       }
15920     else if (TARGET_ARM)
15921       {
15922         return false;
15923         int regno = REGNO (operands[0]);
15924         if (!peep2_reg_dead_p (4, operands[0]))
15925           {
15926             /* When the input register is even and is not dead after the
15927                pattern, it has to hold the second constant but we cannot
15928                form a legal STRD in ARM mode with this register as the second
15929                register.  */
15930             if (regno % 2 == 0)
15931               return false;
15932
15933             /* Is regno-1 free? */
15934             SET_HARD_REG_SET (regset);
15935             CLEAR_HARD_REG_BIT(regset, regno - 1);
15936             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15937             if (tmp == NULL_RTX)
15938               return false;
15939
15940             operands[0] = tmp;
15941           }
15942         else
15943           {
15944             /* Find a DImode register.  */
15945             CLEAR_HARD_REG_SET (regset);
15946             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15947             if (tmp != NULL_RTX)
15948               {
15949                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15950                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15951               }
15952             else
15953               {
15954                 /* Can we use the input register to form a DI register?  */
15955                 SET_HARD_REG_SET (regset);
15956                 CLEAR_HARD_REG_BIT(regset,
15957                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15958                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15959                 if (tmp == NULL_RTX)
15960                   return false;
15961                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15962               }
15963           }
15964
15965         gcc_assert (operands[0] != NULL_RTX);
15966         gcc_assert (operands[1] != NULL_RTX);
15967         gcc_assert (REGNO (operands[0]) % 2 == 0);
15968         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15969       }
15970     }
15971
15972   /* Make sure the instructions are ordered with lower memory access first.  */
15973   if (offsets[0] > offsets[1])
15974     {
15975       gap = offsets[0] - offsets[1];
15976       offset = offsets[1];
15977
15978       /* Swap the instructions such that lower memory is accessed first.  */
15979       std::swap (operands[0], operands[1]);
15980       std::swap (operands[2], operands[3]);
15981       if (const_store)
15982         std::swap (operands[4], operands[5]);
15983     }
15984   else
15985     {
15986       gap = offsets[1] - offsets[0];
15987       offset = offsets[0];
15988     }
15989
15990   /* Make sure accesses are to consecutive memory locations.  */
15991   if (gap != 4)
15992     return false;
15993
15994   /* Make sure we generate legal instructions.  */
15995   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15996                              false, load))
15997     return true;
15998
15999   /* In Thumb state, where registers are almost unconstrained, there
16000      is little hope to fix it.  */
16001   if (TARGET_THUMB2)
16002     return false;
16003
16004   if (load && commute)
16005     {
16006       /* Try reordering registers.  */
16007       std::swap (operands[0], operands[1]);
16008       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16009                                  false, load))
16010         return true;
16011     }
16012
16013   if (const_store)
16014     {
16015       /* If input registers are dead after this pattern, they can be
16016          reordered or replaced by other registers that are free in the
16017          current pattern.  */
16018       if (!peep2_reg_dead_p (4, operands[0])
16019           || !peep2_reg_dead_p (4, operands[1]))
16020         return false;
16021
16022       /* Try to reorder the input registers.  */
16023       /* For example, the code
16024            mov r0, 0
16025            mov r1, 1
16026            str r1, [r2]
16027            str r0, [r2, #4]
16028          can be transformed into
16029            mov r1, 0
16030            mov r0, 1
16031            strd r0, [r2]
16032       */
16033       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16034                                   false, false))
16035         {
16036           std::swap (operands[0], operands[1]);
16037           return true;
16038         }
16039
16040       /* Try to find a free DI register.  */
16041       CLEAR_HARD_REG_SET (regset);
16042       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16043       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16044       while (true)
16045         {
16046           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16047           if (tmp == NULL_RTX)
16048             return false;
16049
16050           /* DREG must be an even-numbered register in DImode.
16051              Split it into SI registers.  */
16052           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16053           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16054           gcc_assert (operands[0] != NULL_RTX);
16055           gcc_assert (operands[1] != NULL_RTX);
16056           gcc_assert (REGNO (operands[0]) % 2 == 0);
16057           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16058
16059           return (operands_ok_ldrd_strd (operands[0], operands[1],
16060                                          base, offset,
16061                                          false, load));
16062         }
16063     }
16064
16065   return false;
16066 }
16067
16068
16069
16070 \f
16071 /* Print a symbolic form of X to the debug file, F.  */
16072 static void
16073 arm_print_value (FILE *f, rtx x)
16074 {
16075   switch (GET_CODE (x))
16076     {
16077     case CONST_INT:
16078       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16079       return;
16080
16081     case CONST_DOUBLE:
16082       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16083       return;
16084
16085     case CONST_VECTOR:
16086       {
16087         int i;
16088
16089         fprintf (f, "<");
16090         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16091           {
16092             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16093             if (i < (CONST_VECTOR_NUNITS (x) - 1))
16094               fputc (',', f);
16095           }
16096         fprintf (f, ">");
16097       }
16098       return;
16099
16100     case CONST_STRING:
16101       fprintf (f, "\"%s\"", XSTR (x, 0));
16102       return;
16103
16104     case SYMBOL_REF:
16105       fprintf (f, "`%s'", XSTR (x, 0));
16106       return;
16107
16108     case LABEL_REF:
16109       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16110       return;
16111
16112     case CONST:
16113       arm_print_value (f, XEXP (x, 0));
16114       return;
16115
16116     case PLUS:
16117       arm_print_value (f, XEXP (x, 0));
16118       fprintf (f, "+");
16119       arm_print_value (f, XEXP (x, 1));
16120       return;
16121
16122     case PC:
16123       fprintf (f, "pc");
16124       return;
16125
16126     default:
16127       fprintf (f, "????");
16128       return;
16129     }
16130 }
16131 \f
16132 /* Routines for manipulation of the constant pool.  */
16133
16134 /* Arm instructions cannot load a large constant directly into a
16135    register; they have to come from a pc relative load.  The constant
16136    must therefore be placed in the addressable range of the pc
16137    relative load.  Depending on the precise pc relative load
16138    instruction the range is somewhere between 256 bytes and 4k.  This
16139    means that we often have to dump a constant inside a function, and
16140    generate code to branch around it.
16141
16142    It is important to minimize this, since the branches will slow
16143    things down and make the code larger.
16144
16145    Normally we can hide the table after an existing unconditional
16146    branch so that there is no interruption of the flow, but in the
16147    worst case the code looks like this:
16148
16149         ldr     rn, L1
16150         ...
16151         b       L2
16152         align
16153         L1:     .long value
16154         L2:
16155         ...
16156
16157         ldr     rn, L3
16158         ...
16159         b       L4
16160         align
16161         L3:     .long value
16162         L4:
16163         ...
16164
16165    We fix this by performing a scan after scheduling, which notices
16166    which instructions need to have their operands fetched from the
16167    constant table and builds the table.
16168
16169    The algorithm starts by building a table of all the constants that
16170    need fixing up and all the natural barriers in the function (places
16171    where a constant table can be dropped without breaking the flow).
16172    For each fixup we note how far the pc-relative replacement will be
16173    able to reach and the offset of the instruction into the function.
16174
16175    Having built the table we then group the fixes together to form
16176    tables that are as large as possible (subject to addressing
16177    constraints) and emit each table of constants after the last
16178    barrier that is within range of all the instructions in the group.
16179    If a group does not contain a barrier, then we forcibly create one
16180    by inserting a jump instruction into the flow.  Once the table has
16181    been inserted, the insns are then modified to reference the
16182    relevant entry in the pool.
16183
16184    Possible enhancements to the algorithm (not implemented) are:
16185
16186    1) For some processors and object formats, there may be benefit in
16187    aligning the pools to the start of cache lines; this alignment
16188    would need to be taken into account when calculating addressability
16189    of a pool.  */
16190
16191 /* These typedefs are located at the start of this file, so that
16192    they can be used in the prototypes there.  This comment is to
16193    remind readers of that fact so that the following structures
16194    can be understood more easily.
16195
16196      typedef struct minipool_node    Mnode;
16197      typedef struct minipool_fixup   Mfix;  */
16198
16199 struct minipool_node
16200 {
16201   /* Doubly linked chain of entries.  */
16202   Mnode * next;
16203   Mnode * prev;
16204   /* The maximum offset into the code that this entry can be placed.  While
16205      pushing fixes for forward references, all entries are sorted in order
16206      of increasing max_address.  */
16207   HOST_WIDE_INT max_address;
16208   /* Similarly for an entry inserted for a backwards ref.  */
16209   HOST_WIDE_INT min_address;
16210   /* The number of fixes referencing this entry.  This can become zero
16211      if we "unpush" an entry.  In this case we ignore the entry when we
16212      come to emit the code.  */
16213   int refcount;
16214   /* The offset from the start of the minipool.  */
16215   HOST_WIDE_INT offset;
16216   /* The value in table.  */
16217   rtx value;
16218   /* The mode of value.  */
16219   machine_mode mode;
16220   /* The size of the value.  With iWMMXt enabled
16221      sizes > 4 also imply an alignment of 8-bytes.  */
16222   int fix_size;
16223 };
16224
16225 struct minipool_fixup
16226 {
16227   Mfix *            next;
16228   rtx_insn *        insn;
16229   HOST_WIDE_INT     address;
16230   rtx *             loc;
16231   machine_mode mode;
16232   int               fix_size;
16233   rtx               value;
16234   Mnode *           minipool;
16235   HOST_WIDE_INT     forwards;
16236   HOST_WIDE_INT     backwards;
16237 };
16238
16239 /* Fixes less than a word need padding out to a word boundary.  */
16240 #define MINIPOOL_FIX_SIZE(mode) \
16241   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16242
16243 static Mnode *  minipool_vector_head;
16244 static Mnode *  minipool_vector_tail;
16245 static rtx_code_label   *minipool_vector_label;
16246 static int      minipool_pad;
16247
16248 /* The linked list of all minipool fixes required for this function.  */
16249 Mfix *          minipool_fix_head;
16250 Mfix *          minipool_fix_tail;
16251 /* The fix entry for the current minipool, once it has been placed.  */
16252 Mfix *          minipool_barrier;
16253
16254 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16255 #define JUMP_TABLES_IN_TEXT_SECTION 0
16256 #endif
16257
16258 static HOST_WIDE_INT
16259 get_jump_table_size (rtx_jump_table_data *insn)
16260 {
16261   /* ADDR_VECs only take room if read-only data does into the text
16262      section.  */
16263   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16264     {
16265       rtx body = PATTERN (insn);
16266       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16267       HOST_WIDE_INT size;
16268       HOST_WIDE_INT modesize;
16269
16270       modesize = GET_MODE_SIZE (GET_MODE (body));
16271       size = modesize * XVECLEN (body, elt);
16272       switch (modesize)
16273         {
16274         case 1:
16275           /* Round up size  of TBB table to a halfword boundary.  */
16276           size = (size + 1) & ~(HOST_WIDE_INT)1;
16277           break;
16278         case 2:
16279           /* No padding necessary for TBH.  */
16280           break;
16281         case 4:
16282           /* Add two bytes for alignment on Thumb.  */
16283           if (TARGET_THUMB)
16284             size += 2;
16285           break;
16286         default:
16287           gcc_unreachable ();
16288         }
16289       return size;
16290     }
16291
16292   return 0;
16293 }
16294
16295 /* Return the maximum amount of padding that will be inserted before
16296    label LABEL.  */
16297
16298 static HOST_WIDE_INT
16299 get_label_padding (rtx label)
16300 {
16301   HOST_WIDE_INT align, min_insn_size;
16302
16303   align = 1 << label_to_alignment (label);
16304   min_insn_size = TARGET_THUMB ? 2 : 4;
16305   return align > min_insn_size ? align - min_insn_size : 0;
16306 }
16307
16308 /* Move a minipool fix MP from its current location to before MAX_MP.
16309    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16310    constraints may need updating.  */
16311 static Mnode *
16312 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16313                                HOST_WIDE_INT max_address)
16314 {
16315   /* The code below assumes these are different.  */
16316   gcc_assert (mp != max_mp);
16317
16318   if (max_mp == NULL)
16319     {
16320       if (max_address < mp->max_address)
16321         mp->max_address = max_address;
16322     }
16323   else
16324     {
16325       if (max_address > max_mp->max_address - mp->fix_size)
16326         mp->max_address = max_mp->max_address - mp->fix_size;
16327       else
16328         mp->max_address = max_address;
16329
16330       /* Unlink MP from its current position.  Since max_mp is non-null,
16331        mp->prev must be non-null.  */
16332       mp->prev->next = mp->next;
16333       if (mp->next != NULL)
16334         mp->next->prev = mp->prev;
16335       else
16336         minipool_vector_tail = mp->prev;
16337
16338       /* Re-insert it before MAX_MP.  */
16339       mp->next = max_mp;
16340       mp->prev = max_mp->prev;
16341       max_mp->prev = mp;
16342
16343       if (mp->prev != NULL)
16344         mp->prev->next = mp;
16345       else
16346         minipool_vector_head = mp;
16347     }
16348
16349   /* Save the new entry.  */
16350   max_mp = mp;
16351
16352   /* Scan over the preceding entries and adjust their addresses as
16353      required.  */
16354   while (mp->prev != NULL
16355          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16356     {
16357       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16358       mp = mp->prev;
16359     }
16360
16361   return max_mp;
16362 }
16363
16364 /* Add a constant to the minipool for a forward reference.  Returns the
16365    node added or NULL if the constant will not fit in this pool.  */
16366 static Mnode *
16367 add_minipool_forward_ref (Mfix *fix)
16368 {
16369   /* If set, max_mp is the first pool_entry that has a lower
16370      constraint than the one we are trying to add.  */
16371   Mnode *       max_mp = NULL;
16372   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16373   Mnode *       mp;
16374
16375   /* If the minipool starts before the end of FIX->INSN then this FIX
16376      can not be placed into the current pool.  Furthermore, adding the
16377      new constant pool entry may cause the pool to start FIX_SIZE bytes
16378      earlier.  */
16379   if (minipool_vector_head &&
16380       (fix->address + get_attr_length (fix->insn)
16381        >= minipool_vector_head->max_address - fix->fix_size))
16382     return NULL;
16383
16384   /* Scan the pool to see if a constant with the same value has
16385      already been added.  While we are doing this, also note the
16386      location where we must insert the constant if it doesn't already
16387      exist.  */
16388   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16389     {
16390       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16391           && fix->mode == mp->mode
16392           && (!LABEL_P (fix->value)
16393               || (CODE_LABEL_NUMBER (fix->value)
16394                   == CODE_LABEL_NUMBER (mp->value)))
16395           && rtx_equal_p (fix->value, mp->value))
16396         {
16397           /* More than one fix references this entry.  */
16398           mp->refcount++;
16399           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16400         }
16401
16402       /* Note the insertion point if necessary.  */
16403       if (max_mp == NULL
16404           && mp->max_address > max_address)
16405         max_mp = mp;
16406
16407       /* If we are inserting an 8-bytes aligned quantity and
16408          we have not already found an insertion point, then
16409          make sure that all such 8-byte aligned quantities are
16410          placed at the start of the pool.  */
16411       if (ARM_DOUBLEWORD_ALIGN
16412           && max_mp == NULL
16413           && fix->fix_size >= 8
16414           && mp->fix_size < 8)
16415         {
16416           max_mp = mp;
16417           max_address = mp->max_address;
16418         }
16419     }
16420
16421   /* The value is not currently in the minipool, so we need to create
16422      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16423      the end of the list since the placement is less constrained than
16424      any existing entry.  Otherwise, we insert the new fix before
16425      MAX_MP and, if necessary, adjust the constraints on the other
16426      entries.  */
16427   mp = XNEW (Mnode);
16428   mp->fix_size = fix->fix_size;
16429   mp->mode = fix->mode;
16430   mp->value = fix->value;
16431   mp->refcount = 1;
16432   /* Not yet required for a backwards ref.  */
16433   mp->min_address = -65536;
16434
16435   if (max_mp == NULL)
16436     {
16437       mp->max_address = max_address;
16438       mp->next = NULL;
16439       mp->prev = minipool_vector_tail;
16440
16441       if (mp->prev == NULL)
16442         {
16443           minipool_vector_head = mp;
16444           minipool_vector_label = gen_label_rtx ();
16445         }
16446       else
16447         mp->prev->next = mp;
16448
16449       minipool_vector_tail = mp;
16450     }
16451   else
16452     {
16453       if (max_address > max_mp->max_address - mp->fix_size)
16454         mp->max_address = max_mp->max_address - mp->fix_size;
16455       else
16456         mp->max_address = max_address;
16457
16458       mp->next = max_mp;
16459       mp->prev = max_mp->prev;
16460       max_mp->prev = mp;
16461       if (mp->prev != NULL)
16462         mp->prev->next = mp;
16463       else
16464         minipool_vector_head = mp;
16465     }
16466
16467   /* Save the new entry.  */
16468   max_mp = mp;
16469
16470   /* Scan over the preceding entries and adjust their addresses as
16471      required.  */
16472   while (mp->prev != NULL
16473          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16474     {
16475       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16476       mp = mp->prev;
16477     }
16478
16479   return max_mp;
16480 }
16481
16482 static Mnode *
16483 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16484                                 HOST_WIDE_INT  min_address)
16485 {
16486   HOST_WIDE_INT offset;
16487
16488   /* The code below assumes these are different.  */
16489   gcc_assert (mp != min_mp);
16490
16491   if (min_mp == NULL)
16492     {
16493       if (min_address > mp->min_address)
16494         mp->min_address = min_address;
16495     }
16496   else
16497     {
16498       /* We will adjust this below if it is too loose.  */
16499       mp->min_address = min_address;
16500
16501       /* Unlink MP from its current position.  Since min_mp is non-null,
16502          mp->next must be non-null.  */
16503       mp->next->prev = mp->prev;
16504       if (mp->prev != NULL)
16505         mp->prev->next = mp->next;
16506       else
16507         minipool_vector_head = mp->next;
16508
16509       /* Reinsert it after MIN_MP.  */
16510       mp->prev = min_mp;
16511       mp->next = min_mp->next;
16512       min_mp->next = mp;
16513       if (mp->next != NULL)
16514         mp->next->prev = mp;
16515       else
16516         minipool_vector_tail = mp;
16517     }
16518
16519   min_mp = mp;
16520
16521   offset = 0;
16522   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16523     {
16524       mp->offset = offset;
16525       if (mp->refcount > 0)
16526         offset += mp->fix_size;
16527
16528       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16529         mp->next->min_address = mp->min_address + mp->fix_size;
16530     }
16531
16532   return min_mp;
16533 }
16534
16535 /* Add a constant to the minipool for a backward reference.  Returns the
16536    node added or NULL if the constant will not fit in this pool.
16537
16538    Note that the code for insertion for a backwards reference can be
16539    somewhat confusing because the calculated offsets for each fix do
16540    not take into account the size of the pool (which is still under
16541    construction.  */
16542 static Mnode *
16543 add_minipool_backward_ref (Mfix *fix)
16544 {
16545   /* If set, min_mp is the last pool_entry that has a lower constraint
16546      than the one we are trying to add.  */
16547   Mnode *min_mp = NULL;
16548   /* This can be negative, since it is only a constraint.  */
16549   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16550   Mnode *mp;
16551
16552   /* If we can't reach the current pool from this insn, or if we can't
16553      insert this entry at the end of the pool without pushing other
16554      fixes out of range, then we don't try.  This ensures that we
16555      can't fail later on.  */
16556   if (min_address >= minipool_barrier->address
16557       || (minipool_vector_tail->min_address + fix->fix_size
16558           >= minipool_barrier->address))
16559     return NULL;
16560
16561   /* Scan the pool to see if a constant with the same value has
16562      already been added.  While we are doing this, also note the
16563      location where we must insert the constant if it doesn't already
16564      exist.  */
16565   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16566     {
16567       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16568           && fix->mode == mp->mode
16569           && (!LABEL_P (fix->value)
16570               || (CODE_LABEL_NUMBER (fix->value)
16571                   == CODE_LABEL_NUMBER (mp->value)))
16572           && rtx_equal_p (fix->value, mp->value)
16573           /* Check that there is enough slack to move this entry to the
16574              end of the table (this is conservative).  */
16575           && (mp->max_address
16576               > (minipool_barrier->address
16577                  + minipool_vector_tail->offset
16578                  + minipool_vector_tail->fix_size)))
16579         {
16580           mp->refcount++;
16581           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16582         }
16583
16584       if (min_mp != NULL)
16585         mp->min_address += fix->fix_size;
16586       else
16587         {
16588           /* Note the insertion point if necessary.  */
16589           if (mp->min_address < min_address)
16590             {
16591               /* For now, we do not allow the insertion of 8-byte alignment
16592                  requiring nodes anywhere but at the start of the pool.  */
16593               if (ARM_DOUBLEWORD_ALIGN
16594                   && fix->fix_size >= 8 && mp->fix_size < 8)
16595                 return NULL;
16596               else
16597                 min_mp = mp;
16598             }
16599           else if (mp->max_address
16600                    < minipool_barrier->address + mp->offset + fix->fix_size)
16601             {
16602               /* Inserting before this entry would push the fix beyond
16603                  its maximum address (which can happen if we have
16604                  re-located a forwards fix); force the new fix to come
16605                  after it.  */
16606               if (ARM_DOUBLEWORD_ALIGN
16607                   && fix->fix_size >= 8 && mp->fix_size < 8)
16608                 return NULL;
16609               else
16610                 {
16611                   min_mp = mp;
16612                   min_address = mp->min_address + fix->fix_size;
16613                 }
16614             }
16615           /* Do not insert a non-8-byte aligned quantity before 8-byte
16616              aligned quantities.  */
16617           else if (ARM_DOUBLEWORD_ALIGN
16618                    && fix->fix_size < 8
16619                    && mp->fix_size >= 8)
16620             {
16621               min_mp = mp;
16622               min_address = mp->min_address + fix->fix_size;
16623             }
16624         }
16625     }
16626
16627   /* We need to create a new entry.  */
16628   mp = XNEW (Mnode);
16629   mp->fix_size = fix->fix_size;
16630   mp->mode = fix->mode;
16631   mp->value = fix->value;
16632   mp->refcount = 1;
16633   mp->max_address = minipool_barrier->address + 65536;
16634
16635   mp->min_address = min_address;
16636
16637   if (min_mp == NULL)
16638     {
16639       mp->prev = NULL;
16640       mp->next = minipool_vector_head;
16641
16642       if (mp->next == NULL)
16643         {
16644           minipool_vector_tail = mp;
16645           minipool_vector_label = gen_label_rtx ();
16646         }
16647       else
16648         mp->next->prev = mp;
16649
16650       minipool_vector_head = mp;
16651     }
16652   else
16653     {
16654       mp->next = min_mp->next;
16655       mp->prev = min_mp;
16656       min_mp->next = mp;
16657
16658       if (mp->next != NULL)
16659         mp->next->prev = mp;
16660       else
16661         minipool_vector_tail = mp;
16662     }
16663
16664   /* Save the new entry.  */
16665   min_mp = mp;
16666
16667   if (mp->prev)
16668     mp = mp->prev;
16669   else
16670     mp->offset = 0;
16671
16672   /* Scan over the following entries and adjust their offsets.  */
16673   while (mp->next != NULL)
16674     {
16675       if (mp->next->min_address < mp->min_address + mp->fix_size)
16676         mp->next->min_address = mp->min_address + mp->fix_size;
16677
16678       if (mp->refcount)
16679         mp->next->offset = mp->offset + mp->fix_size;
16680       else
16681         mp->next->offset = mp->offset;
16682
16683       mp = mp->next;
16684     }
16685
16686   return min_mp;
16687 }
16688
16689 static void
16690 assign_minipool_offsets (Mfix *barrier)
16691 {
16692   HOST_WIDE_INT offset = 0;
16693   Mnode *mp;
16694
16695   minipool_barrier = barrier;
16696
16697   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16698     {
16699       mp->offset = offset;
16700
16701       if (mp->refcount > 0)
16702         offset += mp->fix_size;
16703     }
16704 }
16705
16706 /* Output the literal table */
16707 static void
16708 dump_minipool (rtx_insn *scan)
16709 {
16710   Mnode * mp;
16711   Mnode * nmp;
16712   int align64 = 0;
16713
16714   if (ARM_DOUBLEWORD_ALIGN)
16715     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16716       if (mp->refcount > 0 && mp->fix_size >= 8)
16717         {
16718           align64 = 1;
16719           break;
16720         }
16721
16722   if (dump_file)
16723     fprintf (dump_file,
16724              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16725              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16726
16727   scan = emit_label_after (gen_label_rtx (), scan);
16728   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16729   scan = emit_label_after (minipool_vector_label, scan);
16730
16731   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16732     {
16733       if (mp->refcount > 0)
16734         {
16735           if (dump_file)
16736             {
16737               fprintf (dump_file,
16738                        ";;  Offset %u, min %ld, max %ld ",
16739                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16740                        (unsigned long) mp->max_address);
16741               arm_print_value (dump_file, mp->value);
16742               fputc ('\n', dump_file);
16743             }
16744
16745           switch (GET_MODE_SIZE (mp->mode))
16746             {
16747 #ifdef HAVE_consttable_1
16748             case 1:
16749               scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16750               break;
16751
16752 #endif
16753 #ifdef HAVE_consttable_2
16754             case 2:
16755               scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16756               break;
16757
16758 #endif
16759 #ifdef HAVE_consttable_4
16760             case 4:
16761               scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16762               break;
16763
16764 #endif
16765 #ifdef HAVE_consttable_8
16766             case 8:
16767               scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16768               break;
16769
16770 #endif
16771 #ifdef HAVE_consttable_16
16772             case 16:
16773               scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16774               break;
16775
16776 #endif
16777             default:
16778               gcc_unreachable ();
16779             }
16780         }
16781
16782       nmp = mp->next;
16783       free (mp);
16784     }
16785
16786   minipool_vector_head = minipool_vector_tail = NULL;
16787   scan = emit_insn_after (gen_consttable_end (), scan);
16788   scan = emit_barrier_after (scan);
16789 }
16790
16791 /* Return the cost of forcibly inserting a barrier after INSN.  */
16792 static int
16793 arm_barrier_cost (rtx insn)
16794 {
16795   /* Basing the location of the pool on the loop depth is preferable,
16796      but at the moment, the basic block information seems to be
16797      corrupt by this stage of the compilation.  */
16798   int base_cost = 50;
16799   rtx next = next_nonnote_insn (insn);
16800
16801   if (next != NULL && LABEL_P (next))
16802     base_cost -= 20;
16803
16804   switch (GET_CODE (insn))
16805     {
16806     case CODE_LABEL:
16807       /* It will always be better to place the table before the label, rather
16808          than after it.  */
16809       return 50;
16810
16811     case INSN:
16812     case CALL_INSN:
16813       return base_cost;
16814
16815     case JUMP_INSN:
16816       return base_cost - 10;
16817
16818     default:
16819       return base_cost + 10;
16820     }
16821 }
16822
16823 /* Find the best place in the insn stream in the range
16824    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16825    Create the barrier by inserting a jump and add a new fix entry for
16826    it.  */
16827 static Mfix *
16828 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16829 {
16830   HOST_WIDE_INT count = 0;
16831   rtx_barrier *barrier;
16832   rtx_insn *from = fix->insn;
16833   /* The instruction after which we will insert the jump.  */
16834   rtx_insn *selected = NULL;
16835   int selected_cost;
16836   /* The address at which the jump instruction will be placed.  */
16837   HOST_WIDE_INT selected_address;
16838   Mfix * new_fix;
16839   HOST_WIDE_INT max_count = max_address - fix->address;
16840   rtx_code_label *label = gen_label_rtx ();
16841
16842   selected_cost = arm_barrier_cost (from);
16843   selected_address = fix->address;
16844
16845   while (from && count < max_count)
16846     {
16847       rtx_jump_table_data *tmp;
16848       int new_cost;
16849
16850       /* This code shouldn't have been called if there was a natural barrier
16851          within range.  */
16852       gcc_assert (!BARRIER_P (from));
16853
16854       /* Count the length of this insn.  This must stay in sync with the
16855          code that pushes minipool fixes.  */
16856       if (LABEL_P (from))
16857         count += get_label_padding (from);
16858       else
16859         count += get_attr_length (from);
16860
16861       /* If there is a jump table, add its length.  */
16862       if (tablejump_p (from, NULL, &tmp))
16863         {
16864           count += get_jump_table_size (tmp);
16865
16866           /* Jump tables aren't in a basic block, so base the cost on
16867              the dispatch insn.  If we select this location, we will
16868              still put the pool after the table.  */
16869           new_cost = arm_barrier_cost (from);
16870
16871           if (count < max_count
16872               && (!selected || new_cost <= selected_cost))
16873             {
16874               selected = tmp;
16875               selected_cost = new_cost;
16876               selected_address = fix->address + count;
16877             }
16878
16879           /* Continue after the dispatch table.  */
16880           from = NEXT_INSN (tmp);
16881           continue;
16882         }
16883
16884       new_cost = arm_barrier_cost (from);
16885
16886       if (count < max_count
16887           && (!selected || new_cost <= selected_cost))
16888         {
16889           selected = from;
16890           selected_cost = new_cost;
16891           selected_address = fix->address + count;
16892         }
16893
16894       from = NEXT_INSN (from);
16895     }
16896
16897   /* Make sure that we found a place to insert the jump.  */
16898   gcc_assert (selected);
16899
16900   /* Make sure we do not split a call and its corresponding
16901      CALL_ARG_LOCATION note.  */
16902   if (CALL_P (selected))
16903     {
16904       rtx_insn *next = NEXT_INSN (selected);
16905       if (next && NOTE_P (next)
16906           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16907           selected = next;
16908     }
16909
16910   /* Create a new JUMP_INSN that branches around a barrier.  */
16911   from = emit_jump_insn_after (gen_jump (label), selected);
16912   JUMP_LABEL (from) = label;
16913   barrier = emit_barrier_after (from);
16914   emit_label_after (label, barrier);
16915
16916   /* Create a minipool barrier entry for the new barrier.  */
16917   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16918   new_fix->insn = barrier;
16919   new_fix->address = selected_address;
16920   new_fix->next = fix->next;
16921   fix->next = new_fix;
16922
16923   return new_fix;
16924 }
16925
16926 /* Record that there is a natural barrier in the insn stream at
16927    ADDRESS.  */
16928 static void
16929 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16930 {
16931   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16932
16933   fix->insn = insn;
16934   fix->address = address;
16935
16936   fix->next = NULL;
16937   if (minipool_fix_head != NULL)
16938     minipool_fix_tail->next = fix;
16939   else
16940     minipool_fix_head = fix;
16941
16942   minipool_fix_tail = fix;
16943 }
16944
16945 /* Record INSN, which will need fixing up to load a value from the
16946    minipool.  ADDRESS is the offset of the insn since the start of the
16947    function; LOC is a pointer to the part of the insn which requires
16948    fixing; VALUE is the constant that must be loaded, which is of type
16949    MODE.  */
16950 static void
16951 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16952                    machine_mode mode, rtx value)
16953 {
16954   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16955
16956   fix->insn = insn;
16957   fix->address = address;
16958   fix->loc = loc;
16959   fix->mode = mode;
16960   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16961   fix->value = value;
16962   fix->forwards = get_attr_pool_range (insn);
16963   fix->backwards = get_attr_neg_pool_range (insn);
16964   fix->minipool = NULL;
16965
16966   /* If an insn doesn't have a range defined for it, then it isn't
16967      expecting to be reworked by this code.  Better to stop now than
16968      to generate duff assembly code.  */
16969   gcc_assert (fix->forwards || fix->backwards);
16970
16971   /* If an entry requires 8-byte alignment then assume all constant pools
16972      require 4 bytes of padding.  Trying to do this later on a per-pool
16973      basis is awkward because existing pool entries have to be modified.  */
16974   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16975     minipool_pad = 4;
16976
16977   if (dump_file)
16978     {
16979       fprintf (dump_file,
16980                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16981                GET_MODE_NAME (mode),
16982                INSN_UID (insn), (unsigned long) address,
16983                -1 * (long)fix->backwards, (long)fix->forwards);
16984       arm_print_value (dump_file, fix->value);
16985       fprintf (dump_file, "\n");
16986     }
16987
16988   /* Add it to the chain of fixes.  */
16989   fix->next = NULL;
16990
16991   if (minipool_fix_head != NULL)
16992     minipool_fix_tail->next = fix;
16993   else
16994     minipool_fix_head = fix;
16995
16996   minipool_fix_tail = fix;
16997 }
16998
16999 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17000    Returns the number of insns needed, or 99 if we always want to synthesize
17001    the value.  */
17002 int
17003 arm_max_const_double_inline_cost ()
17004 {
17005   /* Let the value get synthesized to avoid the use of literal pools.  */
17006   if (arm_disable_literal_pool)
17007     return 99;
17008
17009   return ((optimize_size || arm_ld_sched) ? 3 : 4);
17010 }
17011
17012 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17013    Returns the number of insns needed, or 99 if we don't know how to
17014    do it.  */
17015 int
17016 arm_const_double_inline_cost (rtx val)
17017 {
17018   rtx lowpart, highpart;
17019   machine_mode mode;
17020
17021   mode = GET_MODE (val);
17022
17023   if (mode == VOIDmode)
17024     mode = DImode;
17025
17026   gcc_assert (GET_MODE_SIZE (mode) == 8);
17027
17028   lowpart = gen_lowpart (SImode, val);
17029   highpart = gen_highpart_mode (SImode, mode, val);
17030
17031   gcc_assert (CONST_INT_P (lowpart));
17032   gcc_assert (CONST_INT_P (highpart));
17033
17034   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17035                             NULL_RTX, NULL_RTX, 0, 0)
17036           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17037                               NULL_RTX, NULL_RTX, 0, 0));
17038 }
17039
17040 /* Cost of loading a SImode constant.  */
17041 static inline int
17042 arm_const_inline_cost (enum rtx_code code, rtx val)
17043 {
17044   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17045                            NULL_RTX, NULL_RTX, 1, 0);
17046 }
17047
17048 /* Return true if it is worthwhile to split a 64-bit constant into two
17049    32-bit operations.  This is the case if optimizing for size, or
17050    if we have load delay slots, or if one 32-bit part can be done with
17051    a single data operation.  */
17052 bool
17053 arm_const_double_by_parts (rtx val)
17054 {
17055   machine_mode mode = GET_MODE (val);
17056   rtx part;
17057
17058   if (optimize_size || arm_ld_sched)
17059     return true;
17060
17061   if (mode == VOIDmode)
17062     mode = DImode;
17063
17064   part = gen_highpart_mode (SImode, mode, val);
17065
17066   gcc_assert (CONST_INT_P (part));
17067
17068   if (const_ok_for_arm (INTVAL (part))
17069       || const_ok_for_arm (~INTVAL (part)))
17070     return true;
17071
17072   part = gen_lowpart (SImode, val);
17073
17074   gcc_assert (CONST_INT_P (part));
17075
17076   if (const_ok_for_arm (INTVAL (part))
17077       || const_ok_for_arm (~INTVAL (part)))
17078     return true;
17079
17080   return false;
17081 }
17082
17083 /* Return true if it is possible to inline both the high and low parts
17084    of a 64-bit constant into 32-bit data processing instructions.  */
17085 bool
17086 arm_const_double_by_immediates (rtx val)
17087 {
17088   machine_mode mode = GET_MODE (val);
17089   rtx part;
17090
17091   if (mode == VOIDmode)
17092     mode = DImode;
17093
17094   part = gen_highpart_mode (SImode, mode, val);
17095
17096   gcc_assert (CONST_INT_P (part));
17097
17098   if (!const_ok_for_arm (INTVAL (part)))
17099     return false;
17100
17101   part = gen_lowpart (SImode, val);
17102
17103   gcc_assert (CONST_INT_P (part));
17104
17105   if (!const_ok_for_arm (INTVAL (part)))
17106     return false;
17107
17108   return true;
17109 }
17110
17111 /* Scan INSN and note any of its operands that need fixing.
17112    If DO_PUSHES is false we do not actually push any of the fixups
17113    needed.  */
17114 static void
17115 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17116 {
17117   int opno;
17118
17119   extract_constrain_insn (insn);
17120
17121   if (recog_data.n_alternatives == 0)
17122     return;
17123
17124   /* Fill in recog_op_alt with information about the constraints of
17125      this insn.  */
17126   preprocess_constraints (insn);
17127
17128   const operand_alternative *op_alt = which_op_alt ();
17129   for (opno = 0; opno < recog_data.n_operands; opno++)
17130     {
17131       /* Things we need to fix can only occur in inputs.  */
17132       if (recog_data.operand_type[opno] != OP_IN)
17133         continue;
17134
17135       /* If this alternative is a memory reference, then any mention
17136          of constants in this alternative is really to fool reload
17137          into allowing us to accept one there.  We need to fix them up
17138          now so that we output the right code.  */
17139       if (op_alt[opno].memory_ok)
17140         {
17141           rtx op = recog_data.operand[opno];
17142
17143           if (CONSTANT_P (op))
17144             {
17145               if (do_pushes)
17146                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17147                                    recog_data.operand_mode[opno], op);
17148             }
17149           else if (MEM_P (op)
17150                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17151                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17152             {
17153               if (do_pushes)
17154                 {
17155                   rtx cop = avoid_constant_pool_reference (op);
17156
17157                   /* Casting the address of something to a mode narrower
17158                      than a word can cause avoid_constant_pool_reference()
17159                      to return the pool reference itself.  That's no good to
17160                      us here.  Lets just hope that we can use the
17161                      constant pool value directly.  */
17162                   if (op == cop)
17163                     cop = get_pool_constant (XEXP (op, 0));
17164
17165                   push_minipool_fix (insn, address,
17166                                      recog_data.operand_loc[opno],
17167                                      recog_data.operand_mode[opno], cop);
17168                 }
17169
17170             }
17171         }
17172     }
17173
17174   return;
17175 }
17176
17177 /* Rewrite move insn into subtract of 0 if the condition codes will
17178    be useful in next conditional jump insn.  */
17179
17180 static void
17181 thumb1_reorg (void)
17182 {
17183   basic_block bb;
17184
17185   FOR_EACH_BB_FN (bb, cfun)
17186     {
17187       rtx dest, src;
17188       rtx pat, op0, set = NULL;
17189       rtx_insn *prev, *insn = BB_END (bb);
17190       bool insn_clobbered = false;
17191
17192       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17193         insn = PREV_INSN (insn);
17194
17195       /* Find the last cbranchsi4_insn in basic block BB.  */
17196       if (insn == BB_HEAD (bb)
17197           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17198         continue;
17199
17200       /* Get the register with which we are comparing.  */
17201       pat = PATTERN (insn);
17202       op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17203
17204       /* Find the first flag setting insn before INSN in basic block BB.  */
17205       gcc_assert (insn != BB_HEAD (bb));
17206       for (prev = PREV_INSN (insn);
17207            (!insn_clobbered
17208             && prev != BB_HEAD (bb)
17209             && (NOTE_P (prev)
17210                 || DEBUG_INSN_P (prev)
17211                 || ((set = single_set (prev)) != NULL
17212                     && get_attr_conds (prev) == CONDS_NOCOND)));
17213            prev = PREV_INSN (prev))
17214         {
17215           if (reg_set_p (op0, prev))
17216             insn_clobbered = true;
17217         }
17218
17219       /* Skip if op0 is clobbered by insn other than prev. */
17220       if (insn_clobbered)
17221         continue;
17222
17223       if (!set)
17224         continue;
17225
17226       dest = SET_DEST (set);
17227       src = SET_SRC (set);
17228       if (!low_register_operand (dest, SImode)
17229           || !low_register_operand (src, SImode))
17230         continue;
17231
17232       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17233          in INSN.  Both src and dest of the move insn are checked.  */
17234       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17235         {
17236           dest = copy_rtx (dest);
17237           src = copy_rtx (src);
17238           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17239           PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17240           INSN_CODE (prev) = -1;
17241           /* Set test register in INSN to dest.  */
17242           XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17243           INSN_CODE (insn) = -1;
17244         }
17245     }
17246 }
17247
17248 /* Convert instructions to their cc-clobbering variant if possible, since
17249    that allows us to use smaller encodings.  */
17250
17251 static void
17252 thumb2_reorg (void)
17253 {
17254   basic_block bb;
17255   regset_head live;
17256
17257   INIT_REG_SET (&live);
17258
17259   /* We are freeing block_for_insn in the toplev to keep compatibility
17260      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17261   compute_bb_for_insn ();
17262   df_analyze ();
17263
17264   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17265
17266   FOR_EACH_BB_FN (bb, cfun)
17267     {
17268       if (current_tune->disparage_flag_setting_t16_encodings
17269           && optimize_bb_for_speed_p (bb))
17270         continue;
17271
17272       rtx_insn *insn;
17273       Convert_Action action = SKIP;
17274       Convert_Action action_for_partial_flag_setting
17275         = (current_tune->disparage_partial_flag_setting_t16_encodings
17276            && optimize_bb_for_speed_p (bb))
17277           ? SKIP : CONV;
17278
17279       COPY_REG_SET (&live, DF_LR_OUT (bb));
17280       df_simulate_initialize_backwards (bb, &live);
17281       FOR_BB_INSNS_REVERSE (bb, insn)
17282         {
17283           if (NONJUMP_INSN_P (insn)
17284               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17285               && GET_CODE (PATTERN (insn)) == SET)
17286             {
17287               action = SKIP;
17288               rtx pat = PATTERN (insn);
17289               rtx dst = XEXP (pat, 0);
17290               rtx src = XEXP (pat, 1);
17291               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17292
17293               if (UNARY_P (src) || BINARY_P (src))
17294                   op0 = XEXP (src, 0);
17295
17296               if (BINARY_P (src))
17297                   op1 = XEXP (src, 1);
17298
17299               if (low_register_operand (dst, SImode))
17300                 {
17301                   switch (GET_CODE (src))
17302                     {
17303                     case PLUS:
17304                       /* Adding two registers and storing the result
17305                          in the first source is already a 16-bit
17306                          operation.  */
17307                       if (rtx_equal_p (dst, op0)
17308                           && register_operand (op1, SImode))
17309                         break;
17310
17311                       if (low_register_operand (op0, SImode))
17312                         {
17313                           /* ADDS <Rd>,<Rn>,<Rm>  */
17314                           if (low_register_operand (op1, SImode))
17315                             action = CONV;
17316                           /* ADDS <Rdn>,#<imm8>  */
17317                           /* SUBS <Rdn>,#<imm8>  */
17318                           else if (rtx_equal_p (dst, op0)
17319                                    && CONST_INT_P (op1)
17320                                    && IN_RANGE (INTVAL (op1), -255, 255))
17321                             action = CONV;
17322                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17323                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17324                           else if (CONST_INT_P (op1)
17325                                    && IN_RANGE (INTVAL (op1), -7, 7))
17326                             action = CONV;
17327                         }
17328                       /* ADCS <Rd>, <Rn>  */
17329                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17330                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17331                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17332                                                        SImode)
17333                               && COMPARISON_P (op1)
17334                               && cc_register (XEXP (op1, 0), VOIDmode)
17335                               && maybe_get_arm_condition_code (op1) == ARM_CS
17336                               && XEXP (op1, 1) == const0_rtx)
17337                         action = CONV;
17338                       break;
17339
17340                     case MINUS:
17341                       /* RSBS <Rd>,<Rn>,#0
17342                          Not handled here: see NEG below.  */
17343                       /* SUBS <Rd>,<Rn>,#<imm3>
17344                          SUBS <Rdn>,#<imm8>
17345                          Not handled here: see PLUS above.  */
17346                       /* SUBS <Rd>,<Rn>,<Rm>  */
17347                       if (low_register_operand (op0, SImode)
17348                           && low_register_operand (op1, SImode))
17349                             action = CONV;
17350                       break;
17351
17352                     case MULT:
17353                       /* MULS <Rdm>,<Rn>,<Rdm>
17354                          As an exception to the rule, this is only used
17355                          when optimizing for size since MULS is slow on all
17356                          known implementations.  We do not even want to use
17357                          MULS in cold code, if optimizing for speed, so we
17358                          test the global flag here.  */
17359                       if (!optimize_size)
17360                         break;
17361                       /* else fall through.  */
17362                     case AND:
17363                     case IOR:
17364                     case XOR:
17365                       /* ANDS <Rdn>,<Rm>  */
17366                       if (rtx_equal_p (dst, op0)
17367                           && low_register_operand (op1, SImode))
17368                         action = action_for_partial_flag_setting;
17369                       else if (rtx_equal_p (dst, op1)
17370                                && low_register_operand (op0, SImode))
17371                         action = action_for_partial_flag_setting == SKIP
17372                                  ? SKIP : SWAP_CONV;
17373                       break;
17374
17375                     case ASHIFTRT:
17376                     case ASHIFT:
17377                     case LSHIFTRT:
17378                       /* ASRS <Rdn>,<Rm> */
17379                       /* LSRS <Rdn>,<Rm> */
17380                       /* LSLS <Rdn>,<Rm> */
17381                       if (rtx_equal_p (dst, op0)
17382                           && low_register_operand (op1, SImode))
17383                         action = action_for_partial_flag_setting;
17384                       /* ASRS <Rd>,<Rm>,#<imm5> */
17385                       /* LSRS <Rd>,<Rm>,#<imm5> */
17386                       /* LSLS <Rd>,<Rm>,#<imm5> */
17387                       else if (low_register_operand (op0, SImode)
17388                                && CONST_INT_P (op1)
17389                                && IN_RANGE (INTVAL (op1), 0, 31))
17390                         action = action_for_partial_flag_setting;
17391                       break;
17392
17393                     case ROTATERT:
17394                       /* RORS <Rdn>,<Rm>  */
17395                       if (rtx_equal_p (dst, op0)
17396                           && low_register_operand (op1, SImode))
17397                         action = action_for_partial_flag_setting;
17398                       break;
17399
17400                     case NOT:
17401                       /* MVNS <Rd>,<Rm>  */
17402                       if (low_register_operand (op0, SImode))
17403                         action = action_for_partial_flag_setting;
17404                       break;
17405
17406                     case NEG:
17407                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17408                       if (low_register_operand (op0, SImode))
17409                         action = CONV;
17410                       break;
17411
17412                     case CONST_INT:
17413                       /* MOVS <Rd>,#<imm8>  */
17414                       if (CONST_INT_P (src)
17415                           && IN_RANGE (INTVAL (src), 0, 255))
17416                         action = action_for_partial_flag_setting;
17417                       break;
17418
17419                     case REG:
17420                       /* MOVS and MOV<c> with registers have different
17421                          encodings, so are not relevant here.  */
17422                       break;
17423
17424                     default:
17425                       break;
17426                     }
17427                 }
17428
17429               if (action != SKIP)
17430                 {
17431                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17432                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17433                   rtvec vec;
17434
17435                   if (action == SWAP_CONV)
17436                     {
17437                       src = copy_rtx (src);
17438                       XEXP (src, 0) = op1;
17439                       XEXP (src, 1) = op0;
17440                       pat = gen_rtx_SET (VOIDmode, dst, src);
17441                       vec = gen_rtvec (2, pat, clobber);
17442                     }
17443                   else /* action == CONV */
17444                     vec = gen_rtvec (2, pat, clobber);
17445
17446                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17447                   INSN_CODE (insn) = -1;
17448                 }
17449             }
17450
17451           if (NONDEBUG_INSN_P (insn))
17452             df_simulate_one_insn_backwards (bb, insn, &live);
17453         }
17454     }
17455
17456   CLEAR_REG_SET (&live);
17457 }
17458
17459 /* Gcc puts the pool in the wrong place for ARM, since we can only
17460    load addresses a limited distance around the pc.  We do some
17461    special munging to move the constant pool values to the correct
17462    point in the code.  */
17463 static void
17464 arm_reorg (void)
17465 {
17466   rtx_insn *insn;
17467   HOST_WIDE_INT address = 0;
17468   Mfix * fix;
17469
17470   if (TARGET_THUMB1)
17471     thumb1_reorg ();
17472   else if (TARGET_THUMB2)
17473     thumb2_reorg ();
17474
17475   /* Ensure all insns that must be split have been split at this point.
17476      Otherwise, the pool placement code below may compute incorrect
17477      insn lengths.  Note that when optimizing, all insns have already
17478      been split at this point.  */
17479   if (!optimize)
17480     split_all_insns_noflow ();
17481
17482   minipool_fix_head = minipool_fix_tail = NULL;
17483
17484   /* The first insn must always be a note, or the code below won't
17485      scan it properly.  */
17486   insn = get_insns ();
17487   gcc_assert (NOTE_P (insn));
17488   minipool_pad = 0;
17489
17490   /* Scan all the insns and record the operands that will need fixing.  */
17491   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17492     {
17493       if (BARRIER_P (insn))
17494         push_minipool_barrier (insn, address);
17495       else if (INSN_P (insn))
17496         {
17497           rtx_jump_table_data *table;
17498
17499           note_invalid_constants (insn, address, true);
17500           address += get_attr_length (insn);
17501
17502           /* If the insn is a vector jump, add the size of the table
17503              and skip the table.  */
17504           if (tablejump_p (insn, NULL, &table))
17505             {
17506               address += get_jump_table_size (table);
17507               insn = table;
17508             }
17509         }
17510       else if (LABEL_P (insn))
17511         /* Add the worst-case padding due to alignment.  We don't add
17512            the _current_ padding because the minipool insertions
17513            themselves might change it.  */
17514         address += get_label_padding (insn);
17515     }
17516
17517   fix = minipool_fix_head;
17518
17519   /* Now scan the fixups and perform the required changes.  */
17520   while (fix)
17521     {
17522       Mfix * ftmp;
17523       Mfix * fdel;
17524       Mfix *  last_added_fix;
17525       Mfix * last_barrier = NULL;
17526       Mfix * this_fix;
17527
17528       /* Skip any further barriers before the next fix.  */
17529       while (fix && BARRIER_P (fix->insn))
17530         fix = fix->next;
17531
17532       /* No more fixes.  */
17533       if (fix == NULL)
17534         break;
17535
17536       last_added_fix = NULL;
17537
17538       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17539         {
17540           if (BARRIER_P (ftmp->insn))
17541             {
17542               if (ftmp->address >= minipool_vector_head->max_address)
17543                 break;
17544
17545               last_barrier = ftmp;
17546             }
17547           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17548             break;
17549
17550           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17551         }
17552
17553       /* If we found a barrier, drop back to that; any fixes that we
17554          could have reached but come after the barrier will now go in
17555          the next mini-pool.  */
17556       if (last_barrier != NULL)
17557         {
17558           /* Reduce the refcount for those fixes that won't go into this
17559              pool after all.  */
17560           for (fdel = last_barrier->next;
17561                fdel && fdel != ftmp;
17562                fdel = fdel->next)
17563             {
17564               fdel->minipool->refcount--;
17565               fdel->minipool = NULL;
17566             }
17567
17568           ftmp = last_barrier;
17569         }
17570       else
17571         {
17572           /* ftmp is first fix that we can't fit into this pool and
17573              there no natural barriers that we could use.  Insert a
17574              new barrier in the code somewhere between the previous
17575              fix and this one, and arrange to jump around it.  */
17576           HOST_WIDE_INT max_address;
17577
17578           /* The last item on the list of fixes must be a barrier, so
17579              we can never run off the end of the list of fixes without
17580              last_barrier being set.  */
17581           gcc_assert (ftmp);
17582
17583           max_address = minipool_vector_head->max_address;
17584           /* Check that there isn't another fix that is in range that
17585              we couldn't fit into this pool because the pool was
17586              already too large: we need to put the pool before such an
17587              instruction.  The pool itself may come just after the
17588              fix because create_fix_barrier also allows space for a
17589              jump instruction.  */
17590           if (ftmp->address < max_address)
17591             max_address = ftmp->address + 1;
17592
17593           last_barrier = create_fix_barrier (last_added_fix, max_address);
17594         }
17595
17596       assign_minipool_offsets (last_barrier);
17597
17598       while (ftmp)
17599         {
17600           if (!BARRIER_P (ftmp->insn)
17601               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17602                   == NULL))
17603             break;
17604
17605           ftmp = ftmp->next;
17606         }
17607
17608       /* Scan over the fixes we have identified for this pool, fixing them
17609          up and adding the constants to the pool itself.  */
17610       for (this_fix = fix; this_fix && ftmp != this_fix;
17611            this_fix = this_fix->next)
17612         if (!BARRIER_P (this_fix->insn))
17613           {
17614             rtx addr
17615               = plus_constant (Pmode,
17616                                gen_rtx_LABEL_REF (VOIDmode,
17617                                                   minipool_vector_label),
17618                                this_fix->minipool->offset);
17619             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17620           }
17621
17622       dump_minipool (last_barrier->insn);
17623       fix = ftmp;
17624     }
17625
17626   /* From now on we must synthesize any constants that we can't handle
17627      directly.  This can happen if the RTL gets split during final
17628      instruction generation.  */
17629   cfun->machine->after_arm_reorg = 1;
17630
17631   /* Free the minipool memory.  */
17632   obstack_free (&minipool_obstack, minipool_startobj);
17633 }
17634 \f
17635 /* Routines to output assembly language.  */
17636
17637 /* Return string representation of passed in real value.  */
17638 static const char *
17639 fp_const_from_val (REAL_VALUE_TYPE *r)
17640 {
17641   if (!fp_consts_inited)
17642     init_fp_table ();
17643
17644   gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17645   return "0";
17646 }
17647
17648 /* OPERANDS[0] is the entire list of insns that constitute pop,
17649    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17650    is in the list, UPDATE is true iff the list contains explicit
17651    update of base register.  */
17652 void
17653 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17654                          bool update)
17655 {
17656   int i;
17657   char pattern[100];
17658   int offset;
17659   const char *conditional;
17660   int num_saves = XVECLEN (operands[0], 0);
17661   unsigned int regno;
17662   unsigned int regno_base = REGNO (operands[1]);
17663
17664   offset = 0;
17665   offset += update ? 1 : 0;
17666   offset += return_pc ? 1 : 0;
17667
17668   /* Is the base register in the list?  */
17669   for (i = offset; i < num_saves; i++)
17670     {
17671       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17672       /* If SP is in the list, then the base register must be SP.  */
17673       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17674       /* If base register is in the list, there must be no explicit update.  */
17675       if (regno == regno_base)
17676         gcc_assert (!update);
17677     }
17678
17679   conditional = reverse ? "%?%D0" : "%?%d0";
17680   if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17681     {
17682       /* Output pop (not stmfd) because it has a shorter encoding.  */
17683       gcc_assert (update);
17684       sprintf (pattern, "pop%s\t{", conditional);
17685     }
17686   else
17687     {
17688       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17689          It's just a convention, their semantics are identical.  */
17690       if (regno_base == SP_REGNUM)
17691         sprintf (pattern, "ldm%sfd\t", conditional);
17692       else if (TARGET_UNIFIED_ASM)
17693         sprintf (pattern, "ldmia%s\t", conditional);
17694       else
17695         sprintf (pattern, "ldm%sia\t", conditional);
17696
17697       strcat (pattern, reg_names[regno_base]);
17698       if (update)
17699         strcat (pattern, "!, {");
17700       else
17701         strcat (pattern, ", {");
17702     }
17703
17704   /* Output the first destination register.  */
17705   strcat (pattern,
17706           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17707
17708   /* Output the rest of the destination registers.  */
17709   for (i = offset + 1; i < num_saves; i++)
17710     {
17711       strcat (pattern, ", ");
17712       strcat (pattern,
17713               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17714     }
17715
17716   strcat (pattern, "}");
17717
17718   if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17719     strcat (pattern, "^");
17720
17721   output_asm_insn (pattern, &cond);
17722 }
17723
17724
17725 /* Output the assembly for a store multiple.  */
17726
17727 const char *
17728 vfp_output_vstmd (rtx * operands)
17729 {
17730   char pattern[100];
17731   int p;
17732   int base;
17733   int i;
17734   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17735                    ? XEXP (operands[0], 0)
17736                    : XEXP (XEXP (operands[0], 0), 0);
17737   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17738
17739   if (push_p)
17740     strcpy (pattern, "vpush%?.64\t{%P1");
17741   else
17742     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17743
17744   p = strlen (pattern);
17745
17746   gcc_assert (REG_P (operands[1]));
17747
17748   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17749   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17750     {
17751       p += sprintf (&pattern[p], ", d%d", base + i);
17752     }
17753   strcpy (&pattern[p], "}");
17754
17755   output_asm_insn (pattern, operands);
17756   return "";
17757 }
17758
17759
17760 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17761    number of bytes pushed.  */
17762
17763 static int
17764 vfp_emit_fstmd (int base_reg, int count)
17765 {
17766   rtx par;
17767   rtx dwarf;
17768   rtx tmp, reg;
17769   int i;
17770
17771   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17772      register pairs are stored by a store multiple insn.  We avoid this
17773      by pushing an extra pair.  */
17774   if (count == 2 && !arm_arch6)
17775     {
17776       if (base_reg == LAST_VFP_REGNUM - 3)
17777         base_reg -= 2;
17778       count++;
17779     }
17780
17781   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17782      larger stores into multiple parts (up to a maximum of two, in
17783      practice).  */
17784   if (count > 16)
17785     {
17786       int saved;
17787       /* NOTE: base_reg is an internal register number, so each D register
17788          counts as 2.  */
17789       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17790       saved += vfp_emit_fstmd (base_reg, 16);
17791       return saved;
17792     }
17793
17794   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17795   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17796
17797   reg = gen_rtx_REG (DFmode, base_reg);
17798   base_reg += 2;
17799
17800   XVECEXP (par, 0, 0)
17801     = gen_rtx_SET (VOIDmode,
17802                    gen_frame_mem
17803                    (BLKmode,
17804                     gen_rtx_PRE_MODIFY (Pmode,
17805                                         stack_pointer_rtx,
17806                                         plus_constant
17807                                         (Pmode, stack_pointer_rtx,
17808                                          - (count * 8)))
17809                     ),
17810                    gen_rtx_UNSPEC (BLKmode,
17811                                    gen_rtvec (1, reg),
17812                                    UNSPEC_PUSH_MULT));
17813
17814   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17815                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17816   RTX_FRAME_RELATED_P (tmp) = 1;
17817   XVECEXP (dwarf, 0, 0) = tmp;
17818
17819   tmp = gen_rtx_SET (VOIDmode,
17820                      gen_frame_mem (DFmode, stack_pointer_rtx),
17821                      reg);
17822   RTX_FRAME_RELATED_P (tmp) = 1;
17823   XVECEXP (dwarf, 0, 1) = tmp;
17824
17825   for (i = 1; i < count; i++)
17826     {
17827       reg = gen_rtx_REG (DFmode, base_reg);
17828       base_reg += 2;
17829       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17830
17831       tmp = gen_rtx_SET (VOIDmode,
17832                          gen_frame_mem (DFmode,
17833                                         plus_constant (Pmode,
17834                                                        stack_pointer_rtx,
17835                                                        i * 8)),
17836                          reg);
17837       RTX_FRAME_RELATED_P (tmp) = 1;
17838       XVECEXP (dwarf, 0, i + 1) = tmp;
17839     }
17840
17841   par = emit_insn (par);
17842   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17843   RTX_FRAME_RELATED_P (par) = 1;
17844
17845   return count * 8;
17846 }
17847
17848 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17849    the call target.  */
17850
17851 void
17852 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17853 {
17854   rtx insn;
17855
17856   insn = emit_call_insn (pat);
17857
17858   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17859      If the call might use such an entry, add a use of the PIC register
17860      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17861   if (TARGET_VXWORKS_RTP
17862       && flag_pic
17863       && !sibcall
17864       && GET_CODE (addr) == SYMBOL_REF
17865       && (SYMBOL_REF_DECL (addr)
17866           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17867           : !SYMBOL_REF_LOCAL_P (addr)))
17868     {
17869       require_pic_register ();
17870       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17871     }
17872
17873   if (TARGET_AAPCS_BASED)
17874     {
17875       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17876          linker.  We need to add an IP clobber to allow setting
17877          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17878          is not needed since it's a fixed register.  */
17879       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17880       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17881     }
17882 }
17883
17884 /* Output a 'call' insn.  */
17885 const char *
17886 output_call (rtx *operands)
17887 {
17888   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17889
17890   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17891   if (REGNO (operands[0]) == LR_REGNUM)
17892     {
17893       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17894       output_asm_insn ("mov%?\t%0, %|lr", operands);
17895     }
17896
17897   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17898
17899   if (TARGET_INTERWORK || arm_arch4t)
17900     output_asm_insn ("bx%?\t%0", operands);
17901   else
17902     output_asm_insn ("mov%?\t%|pc, %0", operands);
17903
17904   return "";
17905 }
17906
17907 /* Output a 'call' insn that is a reference in memory. This is
17908    disabled for ARMv5 and we prefer a blx instead because otherwise
17909    there's a significant performance overhead.  */
17910 const char *
17911 output_call_mem (rtx *operands)
17912 {
17913   gcc_assert (!arm_arch5);
17914   if (TARGET_INTERWORK)
17915     {
17916       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17917       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17918       output_asm_insn ("bx%?\t%|ip", operands);
17919     }
17920   else if (regno_use_in (LR_REGNUM, operands[0]))
17921     {
17922       /* LR is used in the memory address.  We load the address in the
17923          first instruction.  It's safe to use IP as the target of the
17924          load since the call will kill it anyway.  */
17925       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17926       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17927       if (arm_arch4t)
17928         output_asm_insn ("bx%?\t%|ip", operands);
17929       else
17930         output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17931     }
17932   else
17933     {
17934       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17935       output_asm_insn ("ldr%?\t%|pc, %0", operands);
17936     }
17937
17938   return "";
17939 }
17940
17941
17942 /* Output a move from arm registers to arm registers of a long double
17943    OPERANDS[0] is the destination.
17944    OPERANDS[1] is the source.  */
17945 const char *
17946 output_mov_long_double_arm_from_arm (rtx *operands)
17947 {
17948   /* We have to be careful here because the two might overlap.  */
17949   int dest_start = REGNO (operands[0]);
17950   int src_start = REGNO (operands[1]);
17951   rtx ops[2];
17952   int i;
17953
17954   if (dest_start < src_start)
17955     {
17956       for (i = 0; i < 3; i++)
17957         {
17958           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17959           ops[1] = gen_rtx_REG (SImode, src_start + i);
17960           output_asm_insn ("mov%?\t%0, %1", ops);
17961         }
17962     }
17963   else
17964     {
17965       for (i = 2; i >= 0; i--)
17966         {
17967           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17968           ops[1] = gen_rtx_REG (SImode, src_start + i);
17969           output_asm_insn ("mov%?\t%0, %1", ops);
17970         }
17971     }
17972
17973   return "";
17974 }
17975
17976 void
17977 arm_emit_movpair (rtx dest, rtx src)
17978  {
17979   /* If the src is an immediate, simplify it.  */
17980   if (CONST_INT_P (src))
17981     {
17982       HOST_WIDE_INT val = INTVAL (src);
17983       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17984       if ((val >> 16) & 0x0000ffff)
17985         emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17986                                              GEN_INT (16)),
17987                        GEN_INT ((val >> 16) & 0x0000ffff));
17988       return;
17989     }
17990    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17991    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17992  }
17993
17994 /* Output a move between double words.  It must be REG<-MEM
17995    or MEM<-REG.  */
17996 const char *
17997 output_move_double (rtx *operands, bool emit, int *count)
17998 {
17999   enum rtx_code code0 = GET_CODE (operands[0]);
18000   enum rtx_code code1 = GET_CODE (operands[1]);
18001   rtx otherops[3];
18002   if (count)
18003     *count = 1;
18004
18005   /* The only case when this might happen is when
18006      you are looking at the length of a DImode instruction
18007      that has an invalid constant in it.  */
18008   if (code0 == REG && code1 != MEM)
18009     {
18010       gcc_assert (!emit);
18011       *count = 2;
18012       return "";
18013     }
18014
18015   if (code0 == REG)
18016     {
18017       unsigned int reg0 = REGNO (operands[0]);
18018
18019       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18020
18021       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
18022
18023       switch (GET_CODE (XEXP (operands[1], 0)))
18024         {
18025         case REG:
18026
18027           if (emit)
18028             {
18029               if (TARGET_LDRD
18030                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18031                 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
18032               else
18033                 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18034             }
18035           break;
18036
18037         case PRE_INC:
18038           gcc_assert (TARGET_LDRD);
18039           if (emit)
18040             output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
18041           break;
18042
18043         case PRE_DEC:
18044           if (emit)
18045             {
18046               if (TARGET_LDRD)
18047                 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
18048               else
18049                 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
18050             }
18051           break;
18052
18053         case POST_INC:
18054           if (emit)
18055             {
18056               if (TARGET_LDRD)
18057                 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
18058               else
18059                 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
18060             }
18061           break;
18062
18063         case POST_DEC:
18064           gcc_assert (TARGET_LDRD);
18065           if (emit)
18066             output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
18067           break;
18068
18069         case PRE_MODIFY:
18070         case POST_MODIFY:
18071           /* Autoicrement addressing modes should never have overlapping
18072              base and destination registers, and overlapping index registers
18073              are already prohibited, so this doesn't need to worry about
18074              fix_cm3_ldrd.  */
18075           otherops[0] = operands[0];
18076           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18077           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18078
18079           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18080             {
18081               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18082                 {
18083                   /* Registers overlap so split out the increment.  */
18084                   if (emit)
18085                     {
18086                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18087                       output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
18088                     }
18089                   if (count)
18090                     *count = 2;
18091                 }
18092               else
18093                 {
18094                   /* Use a single insn if we can.
18095                      FIXME: IWMMXT allows offsets larger than ldrd can
18096                      handle, fix these up with a pair of ldr.  */
18097                   if (TARGET_THUMB2
18098                       || !CONST_INT_P (otherops[2])
18099                       || (INTVAL (otherops[2]) > -256
18100                           && INTVAL (otherops[2]) < 256))
18101                     {
18102                       if (emit)
18103                         output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18104                     }
18105                   else
18106                     {
18107                       if (emit)
18108                         {
18109                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18110                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18111                         }
18112                       if (count)
18113                         *count = 2;
18114
18115                     }
18116                 }
18117             }
18118           else
18119             {
18120               /* Use a single insn if we can.
18121                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18122                  fix these up with a pair of ldr.  */
18123               if (TARGET_THUMB2
18124                   || !CONST_INT_P (otherops[2])
18125                   || (INTVAL (otherops[2]) > -256
18126                       && INTVAL (otherops[2]) < 256))
18127                 {
18128                   if (emit)
18129                     output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18130                 }
18131               else
18132                 {
18133                   if (emit)
18134                     {
18135                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18136                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18137                     }
18138                   if (count)
18139                     *count = 2;
18140                 }
18141             }
18142           break;
18143
18144         case LABEL_REF:
18145         case CONST:
18146           /* We might be able to use ldrd %0, %1 here.  However the range is
18147              different to ldr/adr, and it is broken on some ARMv7-M
18148              implementations.  */
18149           /* Use the second register of the pair to avoid problematic
18150              overlap.  */
18151           otherops[1] = operands[1];
18152           if (emit)
18153             output_asm_insn ("adr%?\t%0, %1", otherops);
18154           operands[1] = otherops[0];
18155           if (emit)
18156             {
18157               if (TARGET_LDRD)
18158                 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18159               else
18160                 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18161             }
18162
18163           if (count)
18164             *count = 2;
18165           break;
18166
18167           /* ??? This needs checking for thumb2.  */
18168         default:
18169           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18170                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18171             {
18172               otherops[0] = operands[0];
18173               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18174               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18175
18176               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18177                 {
18178                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18179                     {
18180                       switch ((int) INTVAL (otherops[2]))
18181                         {
18182                         case -8:
18183                           if (emit)
18184                             output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18185                           return "";
18186                         case -4:
18187                           if (TARGET_THUMB2)
18188                             break;
18189                           if (emit)
18190                             output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18191                           return "";
18192                         case 4:
18193                           if (TARGET_THUMB2)
18194                             break;
18195                           if (emit)
18196                             output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18197                           return "";
18198                         }
18199                     }
18200                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18201                   operands[1] = otherops[0];
18202                   if (TARGET_LDRD
18203                       && (REG_P (otherops[2])
18204                           || TARGET_THUMB2
18205                           || (CONST_INT_P (otherops[2])
18206                               && INTVAL (otherops[2]) > -256
18207                               && INTVAL (otherops[2]) < 256)))
18208                     {
18209                       if (reg_overlap_mentioned_p (operands[0],
18210                                                    otherops[2]))
18211                         {
18212                           /* Swap base and index registers over to
18213                              avoid a conflict.  */
18214                           std::swap (otherops[1], otherops[2]);
18215                         }
18216                       /* If both registers conflict, it will usually
18217                          have been fixed by a splitter.  */
18218                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18219                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18220                         {
18221                           if (emit)
18222                             {
18223                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18224                               output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18225                             }
18226                           if (count)
18227                             *count = 2;
18228                         }
18229                       else
18230                         {
18231                           otherops[0] = operands[0];
18232                           if (emit)
18233                             output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18234                         }
18235                       return "";
18236                     }
18237
18238                   if (CONST_INT_P (otherops[2]))
18239                     {
18240                       if (emit)
18241                         {
18242                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18243                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18244                           else
18245                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18246                         }
18247                     }
18248                   else
18249                     {
18250                       if (emit)
18251                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18252                     }
18253                 }
18254               else
18255                 {
18256                   if (emit)
18257                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18258                 }
18259
18260               if (count)
18261                 *count = 2;
18262
18263               if (TARGET_LDRD)
18264                 return "ldr%(d%)\t%0, [%1]";
18265
18266               return "ldm%(ia%)\t%1, %M0";
18267             }
18268           else
18269             {
18270               otherops[1] = adjust_address (operands[1], SImode, 4);
18271               /* Take care of overlapping base/data reg.  */
18272               if (reg_mentioned_p (operands[0], operands[1]))
18273                 {
18274                   if (emit)
18275                     {
18276                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18277                       output_asm_insn ("ldr%?\t%0, %1", operands);
18278                     }
18279                   if (count)
18280                     *count = 2;
18281
18282                 }
18283               else
18284                 {
18285                   if (emit)
18286                     {
18287                       output_asm_insn ("ldr%?\t%0, %1", operands);
18288                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18289                     }
18290                   if (count)
18291                     *count = 2;
18292                 }
18293             }
18294         }
18295     }
18296   else
18297     {
18298       /* Constraints should ensure this.  */
18299       gcc_assert (code0 == MEM && code1 == REG);
18300       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18301                   || (TARGET_ARM && TARGET_LDRD));
18302
18303       switch (GET_CODE (XEXP (operands[0], 0)))
18304         {
18305         case REG:
18306           if (emit)
18307             {
18308               if (TARGET_LDRD)
18309                 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18310               else
18311                 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18312             }
18313           break;
18314
18315         case PRE_INC:
18316           gcc_assert (TARGET_LDRD);
18317           if (emit)
18318             output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18319           break;
18320
18321         case PRE_DEC:
18322           if (emit)
18323             {
18324               if (TARGET_LDRD)
18325                 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18326               else
18327                 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18328             }
18329           break;
18330
18331         case POST_INC:
18332           if (emit)
18333             {
18334               if (TARGET_LDRD)
18335                 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18336               else
18337                 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18338             }
18339           break;
18340
18341         case POST_DEC:
18342           gcc_assert (TARGET_LDRD);
18343           if (emit)
18344             output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18345           break;
18346
18347         case PRE_MODIFY:
18348         case POST_MODIFY:
18349           otherops[0] = operands[1];
18350           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18351           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18352
18353           /* IWMMXT allows offsets larger than ldrd can handle,
18354              fix these up with a pair of ldr.  */
18355           if (!TARGET_THUMB2
18356               && CONST_INT_P (otherops[2])
18357               && (INTVAL(otherops[2]) <= -256
18358                   || INTVAL(otherops[2]) >= 256))
18359             {
18360               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18361                 {
18362                   if (emit)
18363                     {
18364                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18365                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18366                     }
18367                   if (count)
18368                     *count = 2;
18369                 }
18370               else
18371                 {
18372                   if (emit)
18373                     {
18374                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18375                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18376                     }
18377                   if (count)
18378                     *count = 2;
18379                 }
18380             }
18381           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18382             {
18383               if (emit)
18384                 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18385             }
18386           else
18387             {
18388               if (emit)
18389                 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18390             }
18391           break;
18392
18393         case PLUS:
18394           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18395           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18396             {
18397               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18398                 {
18399                 case -8:
18400                   if (emit)
18401                     output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18402                   return "";
18403
18404                 case -4:
18405                   if (TARGET_THUMB2)
18406                     break;
18407                   if (emit)
18408                     output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18409                   return "";
18410
18411                 case 4:
18412                   if (TARGET_THUMB2)
18413                     break;
18414                   if (emit)
18415                     output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18416                   return "";
18417                 }
18418             }
18419           if (TARGET_LDRD
18420               && (REG_P (otherops[2])
18421                   || TARGET_THUMB2
18422                   || (CONST_INT_P (otherops[2])
18423                       && INTVAL (otherops[2]) > -256
18424                       && INTVAL (otherops[2]) < 256)))
18425             {
18426               otherops[0] = operands[1];
18427               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18428               if (emit)
18429                 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18430               return "";
18431             }
18432           /* Fall through */
18433
18434         default:
18435           otherops[0] = adjust_address (operands[0], SImode, 4);
18436           otherops[1] = operands[1];
18437           if (emit)
18438             {
18439               output_asm_insn ("str%?\t%1, %0", operands);
18440               output_asm_insn ("str%?\t%H1, %0", otherops);
18441             }
18442           if (count)
18443             *count = 2;
18444         }
18445     }
18446
18447   return "";
18448 }
18449
18450 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18451    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18452
18453 const char *
18454 output_move_quad (rtx *operands)
18455 {
18456   if (REG_P (operands[0]))
18457     {
18458       /* Load, or reg->reg move.  */
18459
18460       if (MEM_P (operands[1]))
18461         {
18462           switch (GET_CODE (XEXP (operands[1], 0)))
18463             {
18464             case REG:
18465               output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18466               break;
18467
18468             case LABEL_REF:
18469             case CONST:
18470               output_asm_insn ("adr%?\t%0, %1", operands);
18471               output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18472               break;
18473
18474             default:
18475               gcc_unreachable ();
18476             }
18477         }
18478       else
18479         {
18480           rtx ops[2];
18481           int dest, src, i;
18482
18483           gcc_assert (REG_P (operands[1]));
18484
18485           dest = REGNO (operands[0]);
18486           src = REGNO (operands[1]);
18487
18488           /* This seems pretty dumb, but hopefully GCC won't try to do it
18489              very often.  */
18490           if (dest < src)
18491             for (i = 0; i < 4; i++)
18492               {
18493                 ops[0] = gen_rtx_REG (SImode, dest + i);
18494                 ops[1] = gen_rtx_REG (SImode, src + i);
18495                 output_asm_insn ("mov%?\t%0, %1", ops);
18496               }
18497           else
18498             for (i = 3; i >= 0; i--)
18499               {
18500                 ops[0] = gen_rtx_REG (SImode, dest + i);
18501                 ops[1] = gen_rtx_REG (SImode, src + i);
18502                 output_asm_insn ("mov%?\t%0, %1", ops);
18503               }
18504         }
18505     }
18506   else
18507     {
18508       gcc_assert (MEM_P (operands[0]));
18509       gcc_assert (REG_P (operands[1]));
18510       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18511
18512       switch (GET_CODE (XEXP (operands[0], 0)))
18513         {
18514         case REG:
18515           output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18516           break;
18517
18518         default:
18519           gcc_unreachable ();
18520         }
18521     }
18522
18523   return "";
18524 }
18525
18526 /* Output a VFP load or store instruction.  */
18527
18528 const char *
18529 output_move_vfp (rtx *operands)
18530 {
18531   rtx reg, mem, addr, ops[2];
18532   int load = REG_P (operands[0]);
18533   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18534   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18535   const char *templ;
18536   char buff[50];
18537   machine_mode mode;
18538
18539   reg = operands[!load];
18540   mem = operands[load];
18541
18542   mode = GET_MODE (reg);
18543
18544   gcc_assert (REG_P (reg));
18545   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18546   gcc_assert (mode == SFmode
18547               || mode == DFmode
18548               || mode == SImode
18549               || mode == DImode
18550               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18551   gcc_assert (MEM_P (mem));
18552
18553   addr = XEXP (mem, 0);
18554
18555   switch (GET_CODE (addr))
18556     {
18557     case PRE_DEC:
18558       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18559       ops[0] = XEXP (addr, 0);
18560       ops[1] = reg;
18561       break;
18562
18563     case POST_INC:
18564       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18565       ops[0] = XEXP (addr, 0);
18566       ops[1] = reg;
18567       break;
18568
18569     default:
18570       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18571       ops[0] = reg;
18572       ops[1] = mem;
18573       break;
18574     }
18575
18576   sprintf (buff, templ,
18577            load ? "ld" : "st",
18578            dp ? "64" : "32",
18579            dp ? "P" : "",
18580            integer_p ? "\t%@ int" : "");
18581   output_asm_insn (buff, ops);
18582
18583   return "";
18584 }
18585
18586 /* Output a Neon double-word or quad-word load or store, or a load
18587    or store for larger structure modes.
18588
18589    WARNING: The ordering of elements is weird in big-endian mode,
18590    because the EABI requires that vectors stored in memory appear
18591    as though they were stored by a VSTM, as required by the EABI.
18592    GCC RTL defines element ordering based on in-memory order.
18593    This can be different from the architectural ordering of elements
18594    within a NEON register. The intrinsics defined in arm_neon.h use the
18595    NEON register element ordering, not the GCC RTL element ordering.
18596
18597    For example, the in-memory ordering of a big-endian a quadword
18598    vector with 16-bit elements when stored from register pair {d0,d1}
18599    will be (lowest address first, d0[N] is NEON register element N):
18600
18601      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18602
18603    When necessary, quadword registers (dN, dN+1) are moved to ARM
18604    registers from rN in the order:
18605
18606      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18607
18608    So that STM/LDM can be used on vectors in ARM registers, and the
18609    same memory layout will result as if VSTM/VLDM were used.
18610
18611    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18612    possible, which allows use of appropriate alignment tags.
18613    Note that the choice of "64" is independent of the actual vector
18614    element size; this size simply ensures that the behavior is
18615    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18616
18617    Due to limitations of those instructions, use of VST1.64/VLD1.64
18618    is not possible if:
18619     - the address contains PRE_DEC, or
18620     - the mode refers to more than 4 double-word registers
18621
18622    In those cases, it would be possible to replace VSTM/VLDM by a
18623    sequence of instructions; this is not currently implemented since
18624    this is not certain to actually improve performance.  */
18625
18626 const char *
18627 output_move_neon (rtx *operands)
18628 {
18629   rtx reg, mem, addr, ops[2];
18630   int regno, nregs, load = REG_P (operands[0]);
18631   const char *templ;
18632   char buff[50];
18633   machine_mode mode;
18634
18635   reg = operands[!load];
18636   mem = operands[load];
18637
18638   mode = GET_MODE (reg);
18639
18640   gcc_assert (REG_P (reg));
18641   regno = REGNO (reg);
18642   nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18643   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18644               || NEON_REGNO_OK_FOR_QUAD (regno));
18645   gcc_assert (VALID_NEON_DREG_MODE (mode)
18646               || VALID_NEON_QREG_MODE (mode)
18647               || VALID_NEON_STRUCT_MODE (mode));
18648   gcc_assert (MEM_P (mem));
18649
18650   addr = XEXP (mem, 0);
18651
18652   /* Strip off const from addresses like (const (plus (...))).  */
18653   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18654     addr = XEXP (addr, 0);
18655
18656   switch (GET_CODE (addr))
18657     {
18658     case POST_INC:
18659       /* We have to use vldm / vstm for too-large modes.  */
18660       if (nregs > 4)
18661         {
18662           templ = "v%smia%%?\t%%0!, %%h1";
18663           ops[0] = XEXP (addr, 0);
18664         }
18665       else
18666         {
18667           templ = "v%s1.64\t%%h1, %%A0";
18668           ops[0] = mem;
18669         }
18670       ops[1] = reg;
18671       break;
18672
18673     case PRE_DEC:
18674       /* We have to use vldm / vstm in this case, since there is no
18675          pre-decrement form of the vld1 / vst1 instructions.  */
18676       templ = "v%smdb%%?\t%%0!, %%h1";
18677       ops[0] = XEXP (addr, 0);
18678       ops[1] = reg;
18679       break;
18680
18681     case POST_MODIFY:
18682       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18683       gcc_unreachable ();
18684
18685     case REG:
18686       /* We have to use vldm / vstm for too-large modes.  */
18687       if (nregs > 1)
18688         {
18689           if (nregs > 4)
18690             templ = "v%smia%%?\t%%m0, %%h1";
18691           else
18692             templ = "v%s1.64\t%%h1, %%A0";
18693
18694           ops[0] = mem;
18695           ops[1] = reg;
18696           break;
18697         }
18698       /* Fall through.  */
18699     case LABEL_REF:
18700     case PLUS:
18701       {
18702         int i;
18703         int overlap = -1;
18704         for (i = 0; i < nregs; i++)
18705           {
18706             /* We're only using DImode here because it's a convenient size.  */
18707             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18708             ops[1] = adjust_address (mem, DImode, 8 * i);
18709             if (reg_overlap_mentioned_p (ops[0], mem))
18710               {
18711                 gcc_assert (overlap == -1);
18712                 overlap = i;
18713               }
18714             else
18715               {
18716                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18717                 output_asm_insn (buff, ops);
18718               }
18719           }
18720         if (overlap != -1)
18721           {
18722             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18723             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18724             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18725             output_asm_insn (buff, ops);
18726           }
18727
18728         return "";
18729       }
18730
18731     default:
18732       gcc_unreachable ();
18733     }
18734
18735   sprintf (buff, templ, load ? "ld" : "st");
18736   output_asm_insn (buff, ops);
18737
18738   return "";
18739 }
18740
18741 /* Compute and return the length of neon_mov<mode>, where <mode> is
18742    one of VSTRUCT modes: EI, OI, CI or XI.  */
18743 int
18744 arm_attr_length_move_neon (rtx_insn *insn)
18745 {
18746   rtx reg, mem, addr;
18747   int load;
18748   machine_mode mode;
18749
18750   extract_insn_cached (insn);
18751
18752   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18753     {
18754       mode = GET_MODE (recog_data.operand[0]);
18755       switch (mode)
18756         {
18757         case EImode:
18758         case OImode:
18759           return 8;
18760         case CImode:
18761           return 12;
18762         case XImode:
18763           return 16;
18764         default:
18765           gcc_unreachable ();
18766         }
18767     }
18768
18769   load = REG_P (recog_data.operand[0]);
18770   reg = recog_data.operand[!load];
18771   mem = recog_data.operand[load];
18772
18773   gcc_assert (MEM_P (mem));
18774
18775   mode = GET_MODE (reg);
18776   addr = XEXP (mem, 0);
18777
18778   /* Strip off const from addresses like (const (plus (...))).  */
18779   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18780     addr = XEXP (addr, 0);
18781
18782   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18783     {
18784       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18785       return insns * 4;
18786     }
18787   else
18788     return 4;
18789 }
18790
18791 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18792    return zero.  */
18793
18794 int
18795 arm_address_offset_is_imm (rtx_insn *insn)
18796 {
18797   rtx mem, addr;
18798
18799   extract_insn_cached (insn);
18800
18801   if (REG_P (recog_data.operand[0]))
18802     return 0;
18803
18804   mem = recog_data.operand[0];
18805
18806   gcc_assert (MEM_P (mem));
18807
18808   addr = XEXP (mem, 0);
18809
18810   if (REG_P (addr)
18811       || (GET_CODE (addr) == PLUS
18812           && REG_P (XEXP (addr, 0))
18813           && CONST_INT_P (XEXP (addr, 1))))
18814     return 1;
18815   else
18816     return 0;
18817 }
18818
18819 /* Output an ADD r, s, #n where n may be too big for one instruction.
18820    If adding zero to one register, output nothing.  */
18821 const char *
18822 output_add_immediate (rtx *operands)
18823 {
18824   HOST_WIDE_INT n = INTVAL (operands[2]);
18825
18826   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18827     {
18828       if (n < 0)
18829         output_multi_immediate (operands,
18830                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18831                                 -n);
18832       else
18833         output_multi_immediate (operands,
18834                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18835                                 n);
18836     }
18837
18838   return "";
18839 }
18840
18841 /* Output a multiple immediate operation.
18842    OPERANDS is the vector of operands referred to in the output patterns.
18843    INSTR1 is the output pattern to use for the first constant.
18844    INSTR2 is the output pattern to use for subsequent constants.
18845    IMMED_OP is the index of the constant slot in OPERANDS.
18846    N is the constant value.  */
18847 static const char *
18848 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18849                         int immed_op, HOST_WIDE_INT n)
18850 {
18851 #if HOST_BITS_PER_WIDE_INT > 32
18852   n &= 0xffffffff;
18853 #endif
18854
18855   if (n == 0)
18856     {
18857       /* Quick and easy output.  */
18858       operands[immed_op] = const0_rtx;
18859       output_asm_insn (instr1, operands);
18860     }
18861   else
18862     {
18863       int i;
18864       const char * instr = instr1;
18865
18866       /* Note that n is never zero here (which would give no output).  */
18867       for (i = 0; i < 32; i += 2)
18868         {
18869           if (n & (3 << i))
18870             {
18871               operands[immed_op] = GEN_INT (n & (255 << i));
18872               output_asm_insn (instr, operands);
18873               instr = instr2;
18874               i += 6;
18875             }
18876         }
18877     }
18878
18879   return "";
18880 }
18881
18882 /* Return the name of a shifter operation.  */
18883 static const char *
18884 arm_shift_nmem(enum rtx_code code)
18885 {
18886   switch (code)
18887     {
18888     case ASHIFT:
18889       return ARM_LSL_NAME;
18890
18891     case ASHIFTRT:
18892       return "asr";
18893
18894     case LSHIFTRT:
18895       return "lsr";
18896
18897     case ROTATERT:
18898       return "ror";
18899
18900     default:
18901       abort();
18902     }
18903 }
18904
18905 /* Return the appropriate ARM instruction for the operation code.
18906    The returned result should not be overwritten.  OP is the rtx of the
18907    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18908    was shifted.  */
18909 const char *
18910 arithmetic_instr (rtx op, int shift_first_arg)
18911 {
18912   switch (GET_CODE (op))
18913     {
18914     case PLUS:
18915       return "add";
18916
18917     case MINUS:
18918       return shift_first_arg ? "rsb" : "sub";
18919
18920     case IOR:
18921       return "orr";
18922
18923     case XOR:
18924       return "eor";
18925
18926     case AND:
18927       return "and";
18928
18929     case ASHIFT:
18930     case ASHIFTRT:
18931     case LSHIFTRT:
18932     case ROTATERT:
18933       return arm_shift_nmem(GET_CODE(op));
18934
18935     default:
18936       gcc_unreachable ();
18937     }
18938 }
18939
18940 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18941    for the operation code.  The returned result should not be overwritten.
18942    OP is the rtx code of the shift.
18943    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18944    shift.  */
18945 static const char *
18946 shift_op (rtx op, HOST_WIDE_INT *amountp)
18947 {
18948   const char * mnem;
18949   enum rtx_code code = GET_CODE (op);
18950
18951   switch (code)
18952     {
18953     case ROTATE:
18954       if (!CONST_INT_P (XEXP (op, 1)))
18955         {
18956           output_operand_lossage ("invalid shift operand");
18957           return NULL;
18958         }
18959
18960       code = ROTATERT;
18961       *amountp = 32 - INTVAL (XEXP (op, 1));
18962       mnem = "ror";
18963       break;
18964
18965     case ASHIFT:
18966     case ASHIFTRT:
18967     case LSHIFTRT:
18968     case ROTATERT:
18969       mnem = arm_shift_nmem(code);
18970       if (CONST_INT_P (XEXP (op, 1)))
18971         {
18972           *amountp = INTVAL (XEXP (op, 1));
18973         }
18974       else if (REG_P (XEXP (op, 1)))
18975         {
18976           *amountp = -1;
18977           return mnem;
18978         }
18979       else
18980         {
18981           output_operand_lossage ("invalid shift operand");
18982           return NULL;
18983         }
18984       break;
18985
18986     case MULT:
18987       /* We never have to worry about the amount being other than a
18988          power of 2, since this case can never be reloaded from a reg.  */
18989       if (!CONST_INT_P (XEXP (op, 1)))
18990         {
18991           output_operand_lossage ("invalid shift operand");
18992           return NULL;
18993         }
18994
18995       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18996
18997       /* Amount must be a power of two.  */
18998       if (*amountp & (*amountp - 1))
18999         {
19000           output_operand_lossage ("invalid shift operand");
19001           return NULL;
19002         }
19003
19004       *amountp = int_log2 (*amountp);
19005       return ARM_LSL_NAME;
19006
19007     default:
19008       output_operand_lossage ("invalid shift operand");
19009       return NULL;
19010     }
19011
19012   /* This is not 100% correct, but follows from the desire to merge
19013      multiplication by a power of 2 with the recognizer for a
19014      shift.  >=32 is not a valid shift for "lsl", so we must try and
19015      output a shift that produces the correct arithmetical result.
19016      Using lsr #32 is identical except for the fact that the carry bit
19017      is not set correctly if we set the flags; but we never use the
19018      carry bit from such an operation, so we can ignore that.  */
19019   if (code == ROTATERT)
19020     /* Rotate is just modulo 32.  */
19021     *amountp &= 31;
19022   else if (*amountp != (*amountp & 31))
19023     {
19024       if (code == ASHIFT)
19025         mnem = "lsr";
19026       *amountp = 32;
19027     }
19028
19029   /* Shifts of 0 are no-ops.  */
19030   if (*amountp == 0)
19031     return NULL;
19032
19033   return mnem;
19034 }
19035
19036 /* Obtain the shift from the POWER of two.  */
19037
19038 static HOST_WIDE_INT
19039 int_log2 (HOST_WIDE_INT power)
19040 {
19041   HOST_WIDE_INT shift = 0;
19042
19043   while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
19044     {
19045       gcc_assert (shift <= 31);
19046       shift++;
19047     }
19048
19049   return shift;
19050 }
19051
19052 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19053    because /bin/as is horribly restrictive.  The judgement about
19054    whether or not each character is 'printable' (and can be output as
19055    is) or not (and must be printed with an octal escape) must be made
19056    with reference to the *host* character set -- the situation is
19057    similar to that discussed in the comments above pp_c_char in
19058    c-pretty-print.c.  */
19059
19060 #define MAX_ASCII_LEN 51
19061
19062 void
19063 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19064 {
19065   int i;
19066   int len_so_far = 0;
19067
19068   fputs ("\t.ascii\t\"", stream);
19069
19070   for (i = 0; i < len; i++)
19071     {
19072       int c = p[i];
19073
19074       if (len_so_far >= MAX_ASCII_LEN)
19075         {
19076           fputs ("\"\n\t.ascii\t\"", stream);
19077           len_so_far = 0;
19078         }
19079
19080       if (ISPRINT (c))
19081         {
19082           if (c == '\\' || c == '\"')
19083             {
19084               putc ('\\', stream);
19085               len_so_far++;
19086             }
19087           putc (c, stream);
19088           len_so_far++;
19089         }
19090       else
19091         {
19092           fprintf (stream, "\\%03o", c);
19093           len_so_far += 4;
19094         }
19095     }
19096
19097   fputs ("\"\n", stream);
19098 }
19099 \f
19100 /* Whether a register is callee saved or not.  This is necessary because high
19101    registers are marked as caller saved when optimizing for size on Thumb-1
19102    targets despite being callee saved in order to avoid using them.  */
19103 #define callee_saved_reg_p(reg) \
19104   (!call_used_regs[reg] \
19105    || (TARGET_THUMB1 && optimize_size \
19106        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19107
19108 /* Compute the register save mask for registers 0 through 12
19109    inclusive.  This code is used by arm_compute_save_reg_mask.  */
19110
19111 static unsigned long
19112 arm_compute_save_reg0_reg12_mask (void)
19113 {
19114   unsigned long func_type = arm_current_func_type ();
19115   unsigned long save_reg_mask = 0;
19116   unsigned int reg;
19117
19118   if (IS_INTERRUPT (func_type))
19119     {
19120       unsigned int max_reg;
19121       /* Interrupt functions must not corrupt any registers,
19122          even call clobbered ones.  If this is a leaf function
19123          we can just examine the registers used by the RTL, but
19124          otherwise we have to assume that whatever function is
19125          called might clobber anything, and so we have to save
19126          all the call-clobbered registers as well.  */
19127       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19128         /* FIQ handlers have registers r8 - r12 banked, so
19129            we only need to check r0 - r7, Normal ISRs only
19130            bank r14 and r15, so we must check up to r12.
19131            r13 is the stack pointer which is always preserved,
19132            so we do not need to consider it here.  */
19133         max_reg = 7;
19134       else
19135         max_reg = 12;
19136
19137       for (reg = 0; reg <= max_reg; reg++)
19138         if (df_regs_ever_live_p (reg)
19139             || (! crtl->is_leaf && call_used_regs[reg]))
19140           save_reg_mask |= (1 << reg);
19141
19142       /* Also save the pic base register if necessary.  */
19143       if (flag_pic
19144           && !TARGET_SINGLE_PIC_BASE
19145           && arm_pic_register != INVALID_REGNUM
19146           && crtl->uses_pic_offset_table)
19147         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19148     }
19149   else if (IS_VOLATILE(func_type))
19150     {
19151       /* For noreturn functions we historically omitted register saves
19152          altogether.  However this really messes up debugging.  As a
19153          compromise save just the frame pointers.  Combined with the link
19154          register saved elsewhere this should be sufficient to get
19155          a backtrace.  */
19156       if (frame_pointer_needed)
19157         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19158       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19159         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19160       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19161         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19162     }
19163   else
19164     {
19165       /* In the normal case we only need to save those registers
19166          which are call saved and which are used by this function.  */
19167       for (reg = 0; reg <= 11; reg++)
19168         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19169           save_reg_mask |= (1 << reg);
19170
19171       /* Handle the frame pointer as a special case.  */
19172       if (frame_pointer_needed)
19173         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19174
19175       /* If we aren't loading the PIC register,
19176          don't stack it even though it may be live.  */
19177       if (flag_pic
19178           && !TARGET_SINGLE_PIC_BASE
19179           && arm_pic_register != INVALID_REGNUM
19180           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19181               || crtl->uses_pic_offset_table))
19182         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19183
19184       /* The prologue will copy SP into R0, so save it.  */
19185       if (IS_STACKALIGN (func_type))
19186         save_reg_mask |= 1;
19187     }
19188
19189   /* Save registers so the exception handler can modify them.  */
19190   if (crtl->calls_eh_return)
19191     {
19192       unsigned int i;
19193
19194       for (i = 0; ; i++)
19195         {
19196           reg = EH_RETURN_DATA_REGNO (i);
19197           if (reg == INVALID_REGNUM)
19198             break;
19199           save_reg_mask |= 1 << reg;
19200         }
19201     }
19202
19203   return save_reg_mask;
19204 }
19205
19206 /* Return true if r3 is live at the start of the function.  */
19207
19208 static bool
19209 arm_r3_live_at_start_p (void)
19210 {
19211   /* Just look at cfg info, which is still close enough to correct at this
19212      point.  This gives false positives for broken functions that might use
19213      uninitialized data that happens to be allocated in r3, but who cares?  */
19214   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19215 }
19216
19217 /* Compute the number of bytes used to store the static chain register on the
19218    stack, above the stack frame.  We need to know this accurately to get the
19219    alignment of the rest of the stack frame correct.  */
19220
19221 static int
19222 arm_compute_static_chain_stack_bytes (void)
19223 {
19224   /* See the defining assertion in arm_expand_prologue.  */
19225   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19226       && IS_NESTED (arm_current_func_type ())
19227       && arm_r3_live_at_start_p ()
19228       && crtl->args.pretend_args_size == 0)
19229     return 4;
19230
19231   return 0;
19232 }
19233
19234 /* Compute a bit mask of which registers need to be
19235    saved on the stack for the current function.
19236    This is used by arm_get_frame_offsets, which may add extra registers.  */
19237
19238 static unsigned long
19239 arm_compute_save_reg_mask (void)
19240 {
19241   unsigned int save_reg_mask = 0;
19242   unsigned long func_type = arm_current_func_type ();
19243   unsigned int reg;
19244
19245   if (IS_NAKED (func_type))
19246     /* This should never really happen.  */
19247     return 0;
19248
19249   /* If we are creating a stack frame, then we must save the frame pointer,
19250      IP (which will hold the old stack pointer), LR and the PC.  */
19251   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19252     save_reg_mask |=
19253       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19254       | (1 << IP_REGNUM)
19255       | (1 << LR_REGNUM)
19256       | (1 << PC_REGNUM);
19257
19258   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19259
19260   /* Decide if we need to save the link register.
19261      Interrupt routines have their own banked link register,
19262      so they never need to save it.
19263      Otherwise if we do not use the link register we do not need to save
19264      it.  If we are pushing other registers onto the stack however, we
19265      can save an instruction in the epilogue by pushing the link register
19266      now and then popping it back into the PC.  This incurs extra memory
19267      accesses though, so we only do it when optimizing for size, and only
19268      if we know that we will not need a fancy return sequence.  */
19269   if (df_regs_ever_live_p (LR_REGNUM)
19270       || (save_reg_mask
19271           && optimize_size
19272           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19273           && !crtl->tail_call_emit
19274           && !crtl->calls_eh_return))
19275     save_reg_mask |= 1 << LR_REGNUM;
19276
19277   if (cfun->machine->lr_save_eliminated)
19278     save_reg_mask &= ~ (1 << LR_REGNUM);
19279
19280   if (TARGET_REALLY_IWMMXT
19281       && ((bit_count (save_reg_mask)
19282            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19283                            arm_compute_static_chain_stack_bytes())
19284            ) % 2) != 0)
19285     {
19286       /* The total number of registers that are going to be pushed
19287          onto the stack is odd.  We need to ensure that the stack
19288          is 64-bit aligned before we start to save iWMMXt registers,
19289          and also before we start to create locals.  (A local variable
19290          might be a double or long long which we will load/store using
19291          an iWMMXt instruction).  Therefore we need to push another
19292          ARM register, so that the stack will be 64-bit aligned.  We
19293          try to avoid using the arg registers (r0 -r3) as they might be
19294          used to pass values in a tail call.  */
19295       for (reg = 4; reg <= 12; reg++)
19296         if ((save_reg_mask & (1 << reg)) == 0)
19297           break;
19298
19299       if (reg <= 12)
19300         save_reg_mask |= (1 << reg);
19301       else
19302         {
19303           cfun->machine->sibcall_blocked = 1;
19304           save_reg_mask |= (1 << 3);
19305         }
19306     }
19307
19308   /* We may need to push an additional register for use initializing the
19309      PIC base register.  */
19310   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19311       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19312     {
19313       reg = thumb_find_work_register (1 << 4);
19314       if (!call_used_regs[reg])
19315         save_reg_mask |= (1 << reg);
19316     }
19317
19318   return save_reg_mask;
19319 }
19320
19321
19322 /* Compute a bit mask of which registers need to be
19323    saved on the stack for the current function.  */
19324 static unsigned long
19325 thumb1_compute_save_reg_mask (void)
19326 {
19327   unsigned long mask;
19328   unsigned reg;
19329
19330   mask = 0;
19331   for (reg = 0; reg < 12; reg ++)
19332     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19333       mask |= 1 << reg;
19334
19335   if (flag_pic
19336       && !TARGET_SINGLE_PIC_BASE
19337       && arm_pic_register != INVALID_REGNUM
19338       && crtl->uses_pic_offset_table)
19339     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19340
19341   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19342   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19343     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19344
19345   /* LR will also be pushed if any lo regs are pushed.  */
19346   if (mask & 0xff || thumb_force_lr_save ())
19347     mask |= (1 << LR_REGNUM);
19348
19349   /* Make sure we have a low work register if we need one.
19350      We will need one if we are going to push a high register,
19351      but we are not currently intending to push a low register.  */
19352   if ((mask & 0xff) == 0
19353       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19354     {
19355       /* Use thumb_find_work_register to choose which register
19356          we will use.  If the register is live then we will
19357          have to push it.  Use LAST_LO_REGNUM as our fallback
19358          choice for the register to select.  */
19359       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19360       /* Make sure the register returned by thumb_find_work_register is
19361          not part of the return value.  */
19362       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19363         reg = LAST_LO_REGNUM;
19364
19365       if (callee_saved_reg_p (reg))
19366         mask |= 1 << reg;
19367     }
19368
19369   /* The 504 below is 8 bytes less than 512 because there are two possible
19370      alignment words.  We can't tell here if they will be present or not so we
19371      have to play it safe and assume that they are. */
19372   if ((CALLER_INTERWORKING_SLOT_SIZE +
19373        ROUND_UP_WORD (get_frame_size ()) +
19374        crtl->outgoing_args_size) >= 504)
19375     {
19376       /* This is the same as the code in thumb1_expand_prologue() which
19377          determines which register to use for stack decrement. */
19378       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19379         if (mask & (1 << reg))
19380           break;
19381
19382       if (reg > LAST_LO_REGNUM)
19383         {
19384           /* Make sure we have a register available for stack decrement. */
19385           mask |= 1 << LAST_LO_REGNUM;
19386         }
19387     }
19388
19389   return mask;
19390 }
19391
19392
19393 /* Return the number of bytes required to save VFP registers.  */
19394 static int
19395 arm_get_vfp_saved_size (void)
19396 {
19397   unsigned int regno;
19398   int count;
19399   int saved;
19400
19401   saved = 0;
19402   /* Space for saved VFP registers.  */
19403   if (TARGET_HARD_FLOAT && TARGET_VFP)
19404     {
19405       count = 0;
19406       for (regno = FIRST_VFP_REGNUM;
19407            regno < LAST_VFP_REGNUM;
19408            regno += 2)
19409         {
19410           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19411               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19412             {
19413               if (count > 0)
19414                 {
19415                   /* Workaround ARM10 VFPr1 bug.  */
19416                   if (count == 2 && !arm_arch6)
19417                     count++;
19418                   saved += count * 8;
19419                 }
19420               count = 0;
19421             }
19422           else
19423             count++;
19424         }
19425       if (count > 0)
19426         {
19427           if (count == 2 && !arm_arch6)
19428             count++;
19429           saved += count * 8;
19430         }
19431     }
19432   return saved;
19433 }
19434
19435
19436 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19437    everything bar the final return instruction.  If simple_return is true,
19438    then do not output epilogue, because it has already been emitted in RTL.  */
19439 const char *
19440 output_return_instruction (rtx operand, bool really_return, bool reverse,
19441                            bool simple_return)
19442 {
19443   char conditional[10];
19444   char instr[100];
19445   unsigned reg;
19446   unsigned long live_regs_mask;
19447   unsigned long func_type;
19448   arm_stack_offsets *offsets;
19449
19450   func_type = arm_current_func_type ();
19451
19452   if (IS_NAKED (func_type))
19453     return "";
19454
19455   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19456     {
19457       /* If this function was declared non-returning, and we have
19458          found a tail call, then we have to trust that the called
19459          function won't return.  */
19460       if (really_return)
19461         {
19462           rtx ops[2];
19463
19464           /* Otherwise, trap an attempted return by aborting.  */
19465           ops[0] = operand;
19466           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19467                                        : "abort");
19468           assemble_external_libcall (ops[1]);
19469           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19470         }
19471
19472       return "";
19473     }
19474
19475   gcc_assert (!cfun->calls_alloca || really_return);
19476
19477   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19478
19479   cfun->machine->return_used_this_function = 1;
19480
19481   offsets = arm_get_frame_offsets ();
19482   live_regs_mask = offsets->saved_regs_mask;
19483
19484   if (!simple_return && live_regs_mask)
19485     {
19486       const char * return_reg;
19487
19488       /* If we do not have any special requirements for function exit
19489          (e.g. interworking) then we can load the return address
19490          directly into the PC.  Otherwise we must load it into LR.  */
19491       if (really_return
19492           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19493         return_reg = reg_names[PC_REGNUM];
19494       else
19495         return_reg = reg_names[LR_REGNUM];
19496
19497       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19498         {
19499           /* There are three possible reasons for the IP register
19500              being saved.  1) a stack frame was created, in which case
19501              IP contains the old stack pointer, or 2) an ISR routine
19502              corrupted it, or 3) it was saved to align the stack on
19503              iWMMXt.  In case 1, restore IP into SP, otherwise just
19504              restore IP.  */
19505           if (frame_pointer_needed)
19506             {
19507               live_regs_mask &= ~ (1 << IP_REGNUM);
19508               live_regs_mask |=   (1 << SP_REGNUM);
19509             }
19510           else
19511             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19512         }
19513
19514       /* On some ARM architectures it is faster to use LDR rather than
19515          LDM to load a single register.  On other architectures, the
19516          cost is the same.  In 26 bit mode, or for exception handlers,
19517          we have to use LDM to load the PC so that the CPSR is also
19518          restored.  */
19519       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19520         if (live_regs_mask == (1U << reg))
19521           break;
19522
19523       if (reg <= LAST_ARM_REGNUM
19524           && (reg != LR_REGNUM
19525               || ! really_return
19526               || ! IS_INTERRUPT (func_type)))
19527         {
19528           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19529                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19530         }
19531       else
19532         {
19533           char *p;
19534           int first = 1;
19535
19536           /* Generate the load multiple instruction to restore the
19537              registers.  Note we can get here, even if
19538              frame_pointer_needed is true, but only if sp already
19539              points to the base of the saved core registers.  */
19540           if (live_regs_mask & (1 << SP_REGNUM))
19541             {
19542               unsigned HOST_WIDE_INT stack_adjust;
19543
19544               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19545               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19546
19547               if (stack_adjust && arm_arch5 && TARGET_ARM)
19548                 if (TARGET_UNIFIED_ASM)
19549                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19550                 else
19551                   sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19552               else
19553                 {
19554                   /* If we can't use ldmib (SA110 bug),
19555                      then try to pop r3 instead.  */
19556                   if (stack_adjust)
19557                     live_regs_mask |= 1 << 3;
19558
19559                   if (TARGET_UNIFIED_ASM)
19560                     sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19561                   else
19562                     sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19563                 }
19564             }
19565           else
19566             if (TARGET_UNIFIED_ASM)
19567               sprintf (instr, "pop%s\t{", conditional);
19568             else
19569               sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19570
19571           p = instr + strlen (instr);
19572
19573           for (reg = 0; reg <= SP_REGNUM; reg++)
19574             if (live_regs_mask & (1 << reg))
19575               {
19576                 int l = strlen (reg_names[reg]);
19577
19578                 if (first)
19579                   first = 0;
19580                 else
19581                   {
19582                     memcpy (p, ", ", 2);
19583                     p += 2;
19584                   }
19585
19586                 memcpy (p, "%|", 2);
19587                 memcpy (p + 2, reg_names[reg], l);
19588                 p += l + 2;
19589               }
19590
19591           if (live_regs_mask & (1 << LR_REGNUM))
19592             {
19593               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19594               /* If returning from an interrupt, restore the CPSR.  */
19595               if (IS_INTERRUPT (func_type))
19596                 strcat (p, "^");
19597             }
19598           else
19599             strcpy (p, "}");
19600         }
19601
19602       output_asm_insn (instr, & operand);
19603
19604       /* See if we need to generate an extra instruction to
19605          perform the actual function return.  */
19606       if (really_return
19607           && func_type != ARM_FT_INTERWORKED
19608           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19609         {
19610           /* The return has already been handled
19611              by loading the LR into the PC.  */
19612           return "";
19613         }
19614     }
19615
19616   if (really_return)
19617     {
19618       switch ((int) ARM_FUNC_TYPE (func_type))
19619         {
19620         case ARM_FT_ISR:
19621         case ARM_FT_FIQ:
19622           /* ??? This is wrong for unified assembly syntax.  */
19623           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19624           break;
19625
19626         case ARM_FT_INTERWORKED:
19627           sprintf (instr, "bx%s\t%%|lr", conditional);
19628           break;
19629
19630         case ARM_FT_EXCEPTION:
19631           /* ??? This is wrong for unified assembly syntax.  */
19632           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19633           break;
19634
19635         default:
19636           /* Use bx if it's available.  */
19637           if (arm_arch5 || arm_arch4t)
19638             sprintf (instr, "bx%s\t%%|lr", conditional);
19639           else
19640             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19641           break;
19642         }
19643
19644       output_asm_insn (instr, & operand);
19645     }
19646
19647   return "";
19648 }
19649
19650 /* Write the function name into the code section, directly preceding
19651    the function prologue.
19652
19653    Code will be output similar to this:
19654      t0
19655          .ascii "arm_poke_function_name", 0
19656          .align
19657      t1
19658          .word 0xff000000 + (t1 - t0)
19659      arm_poke_function_name
19660          mov     ip, sp
19661          stmfd   sp!, {fp, ip, lr, pc}
19662          sub     fp, ip, #4
19663
19664    When performing a stack backtrace, code can inspect the value
19665    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19666    at location pc - 12 and the top 8 bits are set, then we know
19667    that there is a function name embedded immediately preceding this
19668    location and has length ((pc[-3]) & 0xff000000).
19669
19670    We assume that pc is declared as a pointer to an unsigned long.
19671
19672    It is of no benefit to output the function name if we are assembling
19673    a leaf function.  These function types will not contain a stack
19674    backtrace structure, therefore it is not possible to determine the
19675    function name.  */
19676 void
19677 arm_poke_function_name (FILE *stream, const char *name)
19678 {
19679   unsigned long alignlength;
19680   unsigned long length;
19681   rtx           x;
19682
19683   length      = strlen (name) + 1;
19684   alignlength = ROUND_UP_WORD (length);
19685
19686   ASM_OUTPUT_ASCII (stream, name, length);
19687   ASM_OUTPUT_ALIGN (stream, 2);
19688   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19689   assemble_aligned_integer (UNITS_PER_WORD, x);
19690 }
19691
19692 /* Place some comments into the assembler stream
19693    describing the current function.  */
19694 static void
19695 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19696 {
19697   unsigned long func_type;
19698
19699   /* ??? Do we want to print some of the below anyway?  */
19700   if (TARGET_THUMB1)
19701     return;
19702
19703   /* Sanity check.  */
19704   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19705
19706   func_type = arm_current_func_type ();
19707
19708   switch ((int) ARM_FUNC_TYPE (func_type))
19709     {
19710     default:
19711     case ARM_FT_NORMAL:
19712       break;
19713     case ARM_FT_INTERWORKED:
19714       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19715       break;
19716     case ARM_FT_ISR:
19717       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19718       break;
19719     case ARM_FT_FIQ:
19720       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19721       break;
19722     case ARM_FT_EXCEPTION:
19723       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19724       break;
19725     }
19726
19727   if (IS_NAKED (func_type))
19728     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19729
19730   if (IS_VOLATILE (func_type))
19731     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19732
19733   if (IS_NESTED (func_type))
19734     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19735   if (IS_STACKALIGN (func_type))
19736     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19737
19738   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19739                crtl->args.size,
19740                crtl->args.pretend_args_size, frame_size);
19741
19742   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19743                frame_pointer_needed,
19744                cfun->machine->uses_anonymous_args);
19745
19746   if (cfun->machine->lr_save_eliminated)
19747     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19748
19749   if (crtl->calls_eh_return)
19750     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19751
19752 }
19753
19754 static void
19755 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19756                               HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19757 {
19758   arm_stack_offsets *offsets;
19759
19760   if (TARGET_THUMB1)
19761     {
19762       int regno;
19763
19764       /* Emit any call-via-reg trampolines that are needed for v4t support
19765          of call_reg and call_value_reg type insns.  */
19766       for (regno = 0; regno < LR_REGNUM; regno++)
19767         {
19768           rtx label = cfun->machine->call_via[regno];
19769
19770           if (label != NULL)
19771             {
19772               switch_to_section (function_section (current_function_decl));
19773               targetm.asm_out.internal_label (asm_out_file, "L",
19774                                               CODE_LABEL_NUMBER (label));
19775               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19776             }
19777         }
19778
19779       /* ??? Probably not safe to set this here, since it assumes that a
19780          function will be emitted as assembly immediately after we generate
19781          RTL for it.  This does not happen for inline functions.  */
19782       cfun->machine->return_used_this_function = 0;
19783     }
19784   else /* TARGET_32BIT */
19785     {
19786       /* We need to take into account any stack-frame rounding.  */
19787       offsets = arm_get_frame_offsets ();
19788
19789       gcc_assert (!use_return_insn (FALSE, NULL)
19790                   || (cfun->machine->return_used_this_function != 0)
19791                   || offsets->saved_regs == offsets->outgoing_args
19792                   || frame_pointer_needed);
19793     }
19794 }
19795
19796 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19797    STR and STRD.  If an even number of registers are being pushed, one
19798    or more STRD patterns are created for each register pair.  If an
19799    odd number of registers are pushed, emit an initial STR followed by
19800    as many STRD instructions as are needed.  This works best when the
19801    stack is initially 64-bit aligned (the normal case), since it
19802    ensures that each STRD is also 64-bit aligned.  */
19803 static void
19804 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19805 {
19806   int num_regs = 0;
19807   int i;
19808   int regno;
19809   rtx par = NULL_RTX;
19810   rtx dwarf = NULL_RTX;
19811   rtx tmp;
19812   bool first = true;
19813
19814   num_regs = bit_count (saved_regs_mask);
19815
19816   /* Must be at least one register to save, and can't save SP or PC.  */
19817   gcc_assert (num_regs > 0 && num_regs <= 14);
19818   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19819   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19820
19821   /* Create sequence for DWARF info.  All the frame-related data for
19822      debugging is held in this wrapper.  */
19823   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19824
19825   /* Describe the stack adjustment.  */
19826   tmp = gen_rtx_SET (VOIDmode,
19827                       stack_pointer_rtx,
19828                       plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19829   RTX_FRAME_RELATED_P (tmp) = 1;
19830   XVECEXP (dwarf, 0, 0) = tmp;
19831
19832   /* Find the first register.  */
19833   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19834     ;
19835
19836   i = 0;
19837
19838   /* If there's an odd number of registers to push.  Start off by
19839      pushing a single register.  This ensures that subsequent strd
19840      operations are dword aligned (assuming that SP was originally
19841      64-bit aligned).  */
19842   if ((num_regs & 1) != 0)
19843     {
19844       rtx reg, mem, insn;
19845
19846       reg = gen_rtx_REG (SImode, regno);
19847       if (num_regs == 1)
19848         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19849                                                      stack_pointer_rtx));
19850       else
19851         mem = gen_frame_mem (Pmode,
19852                              gen_rtx_PRE_MODIFY
19853                              (Pmode, stack_pointer_rtx,
19854                               plus_constant (Pmode, stack_pointer_rtx,
19855                                              -4 * num_regs)));
19856
19857       tmp = gen_rtx_SET (VOIDmode, mem, reg);
19858       RTX_FRAME_RELATED_P (tmp) = 1;
19859       insn = emit_insn (tmp);
19860       RTX_FRAME_RELATED_P (insn) = 1;
19861       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19862       tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19863                          reg);
19864       RTX_FRAME_RELATED_P (tmp) = 1;
19865       i++;
19866       regno++;
19867       XVECEXP (dwarf, 0, i) = tmp;
19868       first = false;
19869     }
19870
19871   while (i < num_regs)
19872     if (saved_regs_mask & (1 << regno))
19873       {
19874         rtx reg1, reg2, mem1, mem2;
19875         rtx tmp0, tmp1, tmp2;
19876         int regno2;
19877
19878         /* Find the register to pair with this one.  */
19879         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19880              regno2++)
19881           ;
19882
19883         reg1 = gen_rtx_REG (SImode, regno);
19884         reg2 = gen_rtx_REG (SImode, regno2);
19885
19886         if (first)
19887           {
19888             rtx insn;
19889
19890             first = false;
19891             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19892                                                         stack_pointer_rtx,
19893                                                         -4 * num_regs));
19894             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19895                                                         stack_pointer_rtx,
19896                                                         -4 * (num_regs - 1)));
19897             tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19898                                 plus_constant (Pmode, stack_pointer_rtx,
19899                                                -4 * (num_regs)));
19900             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19901             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19902             RTX_FRAME_RELATED_P (tmp0) = 1;
19903             RTX_FRAME_RELATED_P (tmp1) = 1;
19904             RTX_FRAME_RELATED_P (tmp2) = 1;
19905             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19906             XVECEXP (par, 0, 0) = tmp0;
19907             XVECEXP (par, 0, 1) = tmp1;
19908             XVECEXP (par, 0, 2) = tmp2;
19909             insn = emit_insn (par);
19910             RTX_FRAME_RELATED_P (insn) = 1;
19911             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19912           }
19913         else
19914           {
19915             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19916                                                         stack_pointer_rtx,
19917                                                         4 * i));
19918             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19919                                                         stack_pointer_rtx,
19920                                                         4 * (i + 1)));
19921             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19922             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19923             RTX_FRAME_RELATED_P (tmp1) = 1;
19924             RTX_FRAME_RELATED_P (tmp2) = 1;
19925             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19926             XVECEXP (par, 0, 0) = tmp1;
19927             XVECEXP (par, 0, 1) = tmp2;
19928             emit_insn (par);
19929           }
19930
19931         /* Create unwind information.  This is an approximation.  */
19932         tmp1 = gen_rtx_SET (VOIDmode,
19933                             gen_frame_mem (Pmode,
19934                                            plus_constant (Pmode,
19935                                                           stack_pointer_rtx,
19936                                                           4 * i)),
19937                             reg1);
19938         tmp2 = gen_rtx_SET (VOIDmode,
19939                             gen_frame_mem (Pmode,
19940                                            plus_constant (Pmode,
19941                                                           stack_pointer_rtx,
19942                                                           4 * (i + 1))),
19943                             reg2);
19944
19945         RTX_FRAME_RELATED_P (tmp1) = 1;
19946         RTX_FRAME_RELATED_P (tmp2) = 1;
19947         XVECEXP (dwarf, 0, i + 1) = tmp1;
19948         XVECEXP (dwarf, 0, i + 2) = tmp2;
19949         i += 2;
19950         regno = regno2 + 1;
19951       }
19952     else
19953       regno++;
19954
19955   return;
19956 }
19957
19958 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19959    whenever possible, otherwise it emits single-word stores.  The first store
19960    also allocates stack space for all saved registers, using writeback with
19961    post-addressing mode.  All other stores use offset addressing.  If no STRD
19962    can be emitted, this function emits a sequence of single-word stores,
19963    and not an STM as before, because single-word stores provide more freedom
19964    scheduling and can be turned into an STM by peephole optimizations.  */
19965 static void
19966 arm_emit_strd_push (unsigned long saved_regs_mask)
19967 {
19968   int num_regs = 0;
19969   int i, j, dwarf_index  = 0;
19970   int offset = 0;
19971   rtx dwarf = NULL_RTX;
19972   rtx insn = NULL_RTX;
19973   rtx tmp, mem;
19974
19975   /* TODO: A more efficient code can be emitted by changing the
19976      layout, e.g., first push all pairs that can use STRD to keep the
19977      stack aligned, and then push all other registers.  */
19978   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19979     if (saved_regs_mask & (1 << i))
19980       num_regs++;
19981
19982   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19983   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19984   gcc_assert (num_regs > 0);
19985
19986   /* Create sequence for DWARF info.  */
19987   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19988
19989   /* For dwarf info, we generate explicit stack update.  */
19990   tmp = gen_rtx_SET (VOIDmode,
19991                      stack_pointer_rtx,
19992                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19993   RTX_FRAME_RELATED_P (tmp) = 1;
19994   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19995
19996   /* Save registers.  */
19997   offset = - 4 * num_regs;
19998   j = 0;
19999   while (j <= LAST_ARM_REGNUM)
20000     if (saved_regs_mask & (1 << j))
20001       {
20002         if ((j % 2 == 0)
20003             && (saved_regs_mask & (1 << (j + 1))))
20004           {
20005             /* Current register and previous register form register pair for
20006                which STRD can be generated.  */
20007             if (offset < 0)
20008               {
20009                 /* Allocate stack space for all saved registers.  */
20010                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20011                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20012                 mem = gen_frame_mem (DImode, tmp);
20013                 offset = 0;
20014               }
20015             else if (offset > 0)
20016               mem = gen_frame_mem (DImode,
20017                                    plus_constant (Pmode,
20018                                                   stack_pointer_rtx,
20019                                                   offset));
20020             else
20021               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20022
20023             tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
20024             RTX_FRAME_RELATED_P (tmp) = 1;
20025             tmp = emit_insn (tmp);
20026
20027             /* Record the first store insn.  */
20028             if (dwarf_index == 1)
20029               insn = tmp;
20030
20031             /* Generate dwarf info.  */
20032             mem = gen_frame_mem (SImode,
20033                                  plus_constant (Pmode,
20034                                                 stack_pointer_rtx,
20035                                                 offset));
20036             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20037             RTX_FRAME_RELATED_P (tmp) = 1;
20038             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20039
20040             mem = gen_frame_mem (SImode,
20041                                  plus_constant (Pmode,
20042                                                 stack_pointer_rtx,
20043                                                 offset + 4));
20044             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
20045             RTX_FRAME_RELATED_P (tmp) = 1;
20046             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20047
20048             offset += 8;
20049             j += 2;
20050           }
20051         else
20052           {
20053             /* Emit a single word store.  */
20054             if (offset < 0)
20055               {
20056                 /* Allocate stack space for all saved registers.  */
20057                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20058                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20059                 mem = gen_frame_mem (SImode, tmp);
20060                 offset = 0;
20061               }
20062             else if (offset > 0)
20063               mem = gen_frame_mem (SImode,
20064                                    plus_constant (Pmode,
20065                                                   stack_pointer_rtx,
20066                                                   offset));
20067             else
20068               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20069
20070             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20071             RTX_FRAME_RELATED_P (tmp) = 1;
20072             tmp = emit_insn (tmp);
20073
20074             /* Record the first store insn.  */
20075             if (dwarf_index == 1)
20076               insn = tmp;
20077
20078             /* Generate dwarf info.  */
20079             mem = gen_frame_mem (SImode,
20080                                  plus_constant(Pmode,
20081                                                stack_pointer_rtx,
20082                                                offset));
20083             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20084             RTX_FRAME_RELATED_P (tmp) = 1;
20085             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20086
20087             offset += 4;
20088             j += 1;
20089           }
20090       }
20091     else
20092       j++;
20093
20094   /* Attach dwarf info to the first insn we generate.  */
20095   gcc_assert (insn != NULL_RTX);
20096   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20097   RTX_FRAME_RELATED_P (insn) = 1;
20098 }
20099
20100 /* Generate and emit an insn that we will recognize as a push_multi.
20101    Unfortunately, since this insn does not reflect very well the actual
20102    semantics of the operation, we need to annotate the insn for the benefit
20103    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20104    MASK for registers that should be annotated for DWARF2 frame unwind
20105    information.  */
20106 static rtx
20107 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20108 {
20109   int num_regs = 0;
20110   int num_dwarf_regs = 0;
20111   int i, j;
20112   rtx par;
20113   rtx dwarf;
20114   int dwarf_par_index;
20115   rtx tmp, reg;
20116
20117   /* We don't record the PC in the dwarf frame information.  */
20118   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20119
20120   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20121     {
20122       if (mask & (1 << i))
20123         num_regs++;
20124       if (dwarf_regs_mask & (1 << i))
20125         num_dwarf_regs++;
20126     }
20127
20128   gcc_assert (num_regs && num_regs <= 16);
20129   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20130
20131   /* For the body of the insn we are going to generate an UNSPEC in
20132      parallel with several USEs.  This allows the insn to be recognized
20133      by the push_multi pattern in the arm.md file.
20134
20135      The body of the insn looks something like this:
20136
20137        (parallel [
20138            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20139                                         (const_int:SI <num>)))
20140                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20141            (use (reg:SI XX))
20142            (use (reg:SI YY))
20143            ...
20144         ])
20145
20146      For the frame note however, we try to be more explicit and actually
20147      show each register being stored into the stack frame, plus a (single)
20148      decrement of the stack pointer.  We do it this way in order to be
20149      friendly to the stack unwinding code, which only wants to see a single
20150      stack decrement per instruction.  The RTL we generate for the note looks
20151      something like this:
20152
20153       (sequence [
20154            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20155            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20156            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20157            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20158            ...
20159         ])
20160
20161      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20162      instead we'd have a parallel expression detailing all
20163      the stores to the various memory addresses so that debug
20164      information is more up-to-date. Remember however while writing
20165      this to take care of the constraints with the push instruction.
20166
20167      Note also that this has to be taken care of for the VFP registers.
20168
20169      For more see PR43399.  */
20170
20171   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20172   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20173   dwarf_par_index = 1;
20174
20175   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20176     {
20177       if (mask & (1 << i))
20178         {
20179           reg = gen_rtx_REG (SImode, i);
20180
20181           XVECEXP (par, 0, 0)
20182             = gen_rtx_SET (VOIDmode,
20183                            gen_frame_mem
20184                            (BLKmode,
20185                             gen_rtx_PRE_MODIFY (Pmode,
20186                                                 stack_pointer_rtx,
20187                                                 plus_constant
20188                                                 (Pmode, stack_pointer_rtx,
20189                                                  -4 * num_regs))
20190                             ),
20191                            gen_rtx_UNSPEC (BLKmode,
20192                                            gen_rtvec (1, reg),
20193                                            UNSPEC_PUSH_MULT));
20194
20195           if (dwarf_regs_mask & (1 << i))
20196             {
20197               tmp = gen_rtx_SET (VOIDmode,
20198                                  gen_frame_mem (SImode, stack_pointer_rtx),
20199                                  reg);
20200               RTX_FRAME_RELATED_P (tmp) = 1;
20201               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20202             }
20203
20204           break;
20205         }
20206     }
20207
20208   for (j = 1, i++; j < num_regs; i++)
20209     {
20210       if (mask & (1 << i))
20211         {
20212           reg = gen_rtx_REG (SImode, i);
20213
20214           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20215
20216           if (dwarf_regs_mask & (1 << i))
20217             {
20218               tmp
20219                 = gen_rtx_SET (VOIDmode,
20220                                gen_frame_mem
20221                                (SImode,
20222                                 plus_constant (Pmode, stack_pointer_rtx,
20223                                                4 * j)),
20224                                reg);
20225               RTX_FRAME_RELATED_P (tmp) = 1;
20226               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20227             }
20228
20229           j++;
20230         }
20231     }
20232
20233   par = emit_insn (par);
20234
20235   tmp = gen_rtx_SET (VOIDmode,
20236                      stack_pointer_rtx,
20237                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20238   RTX_FRAME_RELATED_P (tmp) = 1;
20239   XVECEXP (dwarf, 0, 0) = tmp;
20240
20241   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20242
20243   return par;
20244 }
20245
20246 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20247    SIZE is the offset to be adjusted.
20248    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20249 static void
20250 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20251 {
20252   rtx dwarf;
20253
20254   RTX_FRAME_RELATED_P (insn) = 1;
20255   dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20256   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20257 }
20258
20259 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20260    SAVED_REGS_MASK shows which registers need to be restored.
20261
20262    Unfortunately, since this insn does not reflect very well the actual
20263    semantics of the operation, we need to annotate the insn for the benefit
20264    of DWARF2 frame unwind information.  */
20265 static void
20266 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20267 {
20268   int num_regs = 0;
20269   int i, j;
20270   rtx par;
20271   rtx dwarf = NULL_RTX;
20272   rtx tmp, reg;
20273   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20274   int offset_adj;
20275   int emit_update;
20276
20277   offset_adj = return_in_pc ? 1 : 0;
20278   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20279     if (saved_regs_mask & (1 << i))
20280       num_regs++;
20281
20282   gcc_assert (num_regs && num_regs <= 16);
20283
20284   /* If SP is in reglist, then we don't emit SP update insn.  */
20285   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20286
20287   /* The parallel needs to hold num_regs SETs
20288      and one SET for the stack update.  */
20289   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20290
20291   if (return_in_pc)
20292     XVECEXP (par, 0, 0) = ret_rtx;
20293
20294   if (emit_update)
20295     {
20296       /* Increment the stack pointer, based on there being
20297          num_regs 4-byte registers to restore.  */
20298       tmp = gen_rtx_SET (VOIDmode,
20299                          stack_pointer_rtx,
20300                          plus_constant (Pmode,
20301                                         stack_pointer_rtx,
20302                                         4 * num_regs));
20303       RTX_FRAME_RELATED_P (tmp) = 1;
20304       XVECEXP (par, 0, offset_adj) = tmp;
20305     }
20306
20307   /* Now restore every reg, which may include PC.  */
20308   for (j = 0, i = 0; j < num_regs; i++)
20309     if (saved_regs_mask & (1 << i))
20310       {
20311         reg = gen_rtx_REG (SImode, i);
20312         if ((num_regs == 1) && emit_update && !return_in_pc)
20313           {
20314             /* Emit single load with writeback.  */
20315             tmp = gen_frame_mem (SImode,
20316                                  gen_rtx_POST_INC (Pmode,
20317                                                    stack_pointer_rtx));
20318             tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20319             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20320             return;
20321           }
20322
20323         tmp = gen_rtx_SET (VOIDmode,
20324                            reg,
20325                            gen_frame_mem
20326                            (SImode,
20327                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20328         RTX_FRAME_RELATED_P (tmp) = 1;
20329         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20330
20331         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20332            should not have PC, skip PC.  */
20333         if (i != PC_REGNUM)
20334           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20335
20336         j++;
20337       }
20338
20339   if (return_in_pc)
20340     par = emit_jump_insn (par);
20341   else
20342     par = emit_insn (par);
20343
20344   REG_NOTES (par) = dwarf;
20345   if (!return_in_pc)
20346     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20347                                  stack_pointer_rtx, stack_pointer_rtx);
20348 }
20349
20350 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20351    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20352
20353    Unfortunately, since this insn does not reflect very well the actual
20354    semantics of the operation, we need to annotate the insn for the benefit
20355    of DWARF2 frame unwind information.  */
20356 static void
20357 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20358 {
20359   int i, j;
20360   rtx par;
20361   rtx dwarf = NULL_RTX;
20362   rtx tmp, reg;
20363
20364   gcc_assert (num_regs && num_regs <= 32);
20365
20366     /* Workaround ARM10 VFPr1 bug.  */
20367   if (num_regs == 2 && !arm_arch6)
20368     {
20369       if (first_reg == 15)
20370         first_reg--;
20371
20372       num_regs++;
20373     }
20374
20375   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20376      there could be up to 32 D-registers to restore.
20377      If there are more than 16 D-registers, make two recursive calls,
20378      each of which emits one pop_multi instruction.  */
20379   if (num_regs > 16)
20380     {
20381       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20382       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20383       return;
20384     }
20385
20386   /* The parallel needs to hold num_regs SETs
20387      and one SET for the stack update.  */
20388   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20389
20390   /* Increment the stack pointer, based on there being
20391      num_regs 8-byte registers to restore.  */
20392   tmp = gen_rtx_SET (VOIDmode,
20393                      base_reg,
20394                      plus_constant (Pmode, base_reg, 8 * num_regs));
20395   RTX_FRAME_RELATED_P (tmp) = 1;
20396   XVECEXP (par, 0, 0) = tmp;
20397
20398   /* Now show every reg that will be restored, using a SET for each.  */
20399   for (j = 0, i=first_reg; j < num_regs; i += 2)
20400     {
20401       reg = gen_rtx_REG (DFmode, i);
20402
20403       tmp = gen_rtx_SET (VOIDmode,
20404                          reg,
20405                          gen_frame_mem
20406                          (DFmode,
20407                           plus_constant (Pmode, base_reg, 8 * j)));
20408       RTX_FRAME_RELATED_P (tmp) = 1;
20409       XVECEXP (par, 0, j + 1) = tmp;
20410
20411       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20412
20413       j++;
20414     }
20415
20416   par = emit_insn (par);
20417   REG_NOTES (par) = dwarf;
20418
20419   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20420   if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20421     {
20422       RTX_FRAME_RELATED_P (par) = 1;
20423       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20424     }
20425   else
20426     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20427                                  base_reg, base_reg);
20428 }
20429
20430 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20431    number of registers are being popped, multiple LDRD patterns are created for
20432    all register pairs.  If odd number of registers are popped, last register is
20433    loaded by using LDR pattern.  */
20434 static void
20435 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20436 {
20437   int num_regs = 0;
20438   int i, j;
20439   rtx par = NULL_RTX;
20440   rtx dwarf = NULL_RTX;
20441   rtx tmp, reg, tmp1;
20442   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20443
20444   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20445     if (saved_regs_mask & (1 << i))
20446       num_regs++;
20447
20448   gcc_assert (num_regs && num_regs <= 16);
20449
20450   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20451      to be popped.  So, if num_regs is even, now it will become odd,
20452      and we can generate pop with PC.  If num_regs is odd, it will be
20453      even now, and ldr with return can be generated for PC.  */
20454   if (return_in_pc)
20455     num_regs--;
20456
20457   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20458
20459   /* Var j iterates over all the registers to gather all the registers in
20460      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20461      A PARALLEL RTX of register-pair is created here, so that pattern for
20462      LDRD can be matched.  As PC is always last register to be popped, and
20463      we have already decremented num_regs if PC, we don't have to worry
20464      about PC in this loop.  */
20465   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20466     if (saved_regs_mask & (1 << j))
20467       {
20468         /* Create RTX for memory load.  */
20469         reg = gen_rtx_REG (SImode, j);
20470         tmp = gen_rtx_SET (SImode,
20471                            reg,
20472                            gen_frame_mem (SImode,
20473                                plus_constant (Pmode,
20474                                               stack_pointer_rtx, 4 * i)));
20475         RTX_FRAME_RELATED_P (tmp) = 1;
20476
20477         if (i % 2 == 0)
20478           {
20479             /* When saved-register index (i) is even, the RTX to be emitted is
20480                yet to be created.  Hence create it first.  The LDRD pattern we
20481                are generating is :
20482                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20483                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20484                where target registers need not be consecutive.  */
20485             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20486             dwarf = NULL_RTX;
20487           }
20488
20489         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20490            added as 0th element and if i is odd, reg_i is added as 1st element
20491            of LDRD pattern shown above.  */
20492         XVECEXP (par, 0, (i % 2)) = tmp;
20493         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20494
20495         if ((i % 2) == 1)
20496           {
20497             /* When saved-register index (i) is odd, RTXs for both the registers
20498                to be loaded are generated in above given LDRD pattern, and the
20499                pattern can be emitted now.  */
20500             par = emit_insn (par);
20501             REG_NOTES (par) = dwarf;
20502             RTX_FRAME_RELATED_P (par) = 1;
20503           }
20504
20505         i++;
20506       }
20507
20508   /* If the number of registers pushed is odd AND return_in_pc is false OR
20509      number of registers are even AND return_in_pc is true, last register is
20510      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20511      then LDR with post increment.  */
20512
20513   /* Increment the stack pointer, based on there being
20514      num_regs 4-byte registers to restore.  */
20515   tmp = gen_rtx_SET (VOIDmode,
20516                      stack_pointer_rtx,
20517                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20518   RTX_FRAME_RELATED_P (tmp) = 1;
20519   tmp = emit_insn (tmp);
20520   if (!return_in_pc)
20521     {
20522       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20523                                    stack_pointer_rtx, stack_pointer_rtx);
20524     }
20525
20526   dwarf = NULL_RTX;
20527
20528   if (((num_regs % 2) == 1 && !return_in_pc)
20529       || ((num_regs % 2) == 0 && return_in_pc))
20530     {
20531       /* Scan for the single register to be popped.  Skip until the saved
20532          register is found.  */
20533       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20534
20535       /* Gen LDR with post increment here.  */
20536       tmp1 = gen_rtx_MEM (SImode,
20537                           gen_rtx_POST_INC (SImode,
20538                                             stack_pointer_rtx));
20539       set_mem_alias_set (tmp1, get_frame_alias_set ());
20540
20541       reg = gen_rtx_REG (SImode, j);
20542       tmp = gen_rtx_SET (SImode, reg, tmp1);
20543       RTX_FRAME_RELATED_P (tmp) = 1;
20544       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20545
20546       if (return_in_pc)
20547         {
20548           /* If return_in_pc, j must be PC_REGNUM.  */
20549           gcc_assert (j == PC_REGNUM);
20550           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20551           XVECEXP (par, 0, 0) = ret_rtx;
20552           XVECEXP (par, 0, 1) = tmp;
20553           par = emit_jump_insn (par);
20554         }
20555       else
20556         {
20557           par = emit_insn (tmp);
20558           REG_NOTES (par) = dwarf;
20559           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20560                                        stack_pointer_rtx, stack_pointer_rtx);
20561         }
20562
20563     }
20564   else if ((num_regs % 2) == 1 && return_in_pc)
20565     {
20566       /* There are 2 registers to be popped.  So, generate the pattern
20567          pop_multiple_with_stack_update_and_return to pop in PC.  */
20568       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20569     }
20570
20571   return;
20572 }
20573
20574 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20575    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20576    offset addressing and then generates one separate stack udpate. This provides
20577    more scheduling freedom, compared to writeback on every load.  However,
20578    if the function returns using load into PC directly
20579    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20580    before the last load.  TODO: Add a peephole optimization to recognize
20581    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20582    peephole optimization to merge the load at stack-offset zero
20583    with the stack update instruction using load with writeback
20584    in post-index addressing mode.  */
20585 static void
20586 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20587 {
20588   int j = 0;
20589   int offset = 0;
20590   rtx par = NULL_RTX;
20591   rtx dwarf = NULL_RTX;
20592   rtx tmp, mem;
20593
20594   /* Restore saved registers.  */
20595   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20596   j = 0;
20597   while (j <= LAST_ARM_REGNUM)
20598     if (saved_regs_mask & (1 << j))
20599       {
20600         if ((j % 2) == 0
20601             && (saved_regs_mask & (1 << (j + 1)))
20602             && (j + 1) != PC_REGNUM)
20603           {
20604             /* Current register and next register form register pair for which
20605                LDRD can be generated. PC is always the last register popped, and
20606                we handle it separately.  */
20607             if (offset > 0)
20608               mem = gen_frame_mem (DImode,
20609                                    plus_constant (Pmode,
20610                                                   stack_pointer_rtx,
20611                                                   offset));
20612             else
20613               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20614
20615             tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20616             tmp = emit_insn (tmp);
20617             RTX_FRAME_RELATED_P (tmp) = 1;
20618
20619             /* Generate dwarf info.  */
20620
20621             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20622                                     gen_rtx_REG (SImode, j),
20623                                     NULL_RTX);
20624             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20625                                     gen_rtx_REG (SImode, j + 1),
20626                                     dwarf);
20627
20628             REG_NOTES (tmp) = dwarf;
20629
20630             offset += 8;
20631             j += 2;
20632           }
20633         else if (j != PC_REGNUM)
20634           {
20635             /* Emit a single word load.  */
20636             if (offset > 0)
20637               mem = gen_frame_mem (SImode,
20638                                    plus_constant (Pmode,
20639                                                   stack_pointer_rtx,
20640                                                   offset));
20641             else
20642               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20643
20644             tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20645             tmp = emit_insn (tmp);
20646             RTX_FRAME_RELATED_P (tmp) = 1;
20647
20648             /* Generate dwarf info.  */
20649             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20650                                               gen_rtx_REG (SImode, j),
20651                                               NULL_RTX);
20652
20653             offset += 4;
20654             j += 1;
20655           }
20656         else /* j == PC_REGNUM */
20657           j++;
20658       }
20659     else
20660       j++;
20661
20662   /* Update the stack.  */
20663   if (offset > 0)
20664     {
20665       tmp = gen_rtx_SET (Pmode,
20666                          stack_pointer_rtx,
20667                          plus_constant (Pmode,
20668                                         stack_pointer_rtx,
20669                                         offset));
20670       tmp = emit_insn (tmp);
20671       arm_add_cfa_adjust_cfa_note (tmp, offset,
20672                                    stack_pointer_rtx, stack_pointer_rtx);
20673       offset = 0;
20674     }
20675
20676   if (saved_regs_mask & (1 << PC_REGNUM))
20677     {
20678       /* Only PC is to be popped.  */
20679       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20680       XVECEXP (par, 0, 0) = ret_rtx;
20681       tmp = gen_rtx_SET (SImode,
20682                          gen_rtx_REG (SImode, PC_REGNUM),
20683                          gen_frame_mem (SImode,
20684                                         gen_rtx_POST_INC (SImode,
20685                                                           stack_pointer_rtx)));
20686       RTX_FRAME_RELATED_P (tmp) = 1;
20687       XVECEXP (par, 0, 1) = tmp;
20688       par = emit_jump_insn (par);
20689
20690       /* Generate dwarf info.  */
20691       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20692                               gen_rtx_REG (SImode, PC_REGNUM),
20693                               NULL_RTX);
20694       REG_NOTES (par) = dwarf;
20695       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20696                                    stack_pointer_rtx, stack_pointer_rtx);
20697     }
20698 }
20699
20700 /* Calculate the size of the return value that is passed in registers.  */
20701 static unsigned
20702 arm_size_return_regs (void)
20703 {
20704   machine_mode mode;
20705
20706   if (crtl->return_rtx != 0)
20707     mode = GET_MODE (crtl->return_rtx);
20708   else
20709     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20710
20711   return GET_MODE_SIZE (mode);
20712 }
20713
20714 /* Return true if the current function needs to save/restore LR.  */
20715 static bool
20716 thumb_force_lr_save (void)
20717 {
20718   return !cfun->machine->lr_save_eliminated
20719          && (!leaf_function_p ()
20720              || thumb_far_jump_used_p ()
20721              || df_regs_ever_live_p (LR_REGNUM));
20722 }
20723
20724 /* We do not know if r3 will be available because
20725    we do have an indirect tailcall happening in this
20726    particular case.  */
20727 static bool
20728 is_indirect_tailcall_p (rtx call)
20729 {
20730   rtx pat = PATTERN (call);
20731
20732   /* Indirect tail call.  */
20733   pat = XVECEXP (pat, 0, 0);
20734   if (GET_CODE (pat) == SET)
20735     pat = SET_SRC (pat);
20736
20737   pat = XEXP (XEXP (pat, 0), 0);
20738   return REG_P (pat);
20739 }
20740
20741 /* Return true if r3 is used by any of the tail call insns in the
20742    current function.  */
20743 static bool
20744 any_sibcall_could_use_r3 (void)
20745 {
20746   edge_iterator ei;
20747   edge e;
20748
20749   if (!crtl->tail_call_emit)
20750     return false;
20751   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20752     if (e->flags & EDGE_SIBCALL)
20753       {
20754         rtx call = BB_END (e->src);
20755         if (!CALL_P (call))
20756           call = prev_nonnote_nondebug_insn (call);
20757         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20758         if (find_regno_fusage (call, USE, 3)
20759             || is_indirect_tailcall_p (call))
20760           return true;
20761       }
20762   return false;
20763 }
20764
20765
20766 /* Compute the distance from register FROM to register TO.
20767    These can be the arg pointer (26), the soft frame pointer (25),
20768    the stack pointer (13) or the hard frame pointer (11).
20769    In thumb mode r7 is used as the soft frame pointer, if needed.
20770    Typical stack layout looks like this:
20771
20772        old stack pointer -> |    |
20773                              ----
20774                             |    | \
20775                             |    |   saved arguments for
20776                             |    |   vararg functions
20777                             |    | /
20778                               --
20779    hard FP & arg pointer -> |    | \
20780                             |    |   stack
20781                             |    |   frame
20782                             |    | /
20783                               --
20784                             |    | \
20785                             |    |   call saved
20786                             |    |   registers
20787       soft frame pointer -> |    | /
20788                               --
20789                             |    | \
20790                             |    |   local
20791                             |    |   variables
20792      locals base pointer -> |    | /
20793                               --
20794                             |    | \
20795                             |    |   outgoing
20796                             |    |   arguments
20797    current stack pointer -> |    | /
20798                               --
20799
20800   For a given function some or all of these stack components
20801   may not be needed, giving rise to the possibility of
20802   eliminating some of the registers.
20803
20804   The values returned by this function must reflect the behavior
20805   of arm_expand_prologue() and arm_compute_save_reg_mask().
20806
20807   The sign of the number returned reflects the direction of stack
20808   growth, so the values are positive for all eliminations except
20809   from the soft frame pointer to the hard frame pointer.
20810
20811   SFP may point just inside the local variables block to ensure correct
20812   alignment.  */
20813
20814
20815 /* Calculate stack offsets.  These are used to calculate register elimination
20816    offsets and in prologue/epilogue code.  Also calculates which registers
20817    should be saved.  */
20818
20819 static arm_stack_offsets *
20820 arm_get_frame_offsets (void)
20821 {
20822   struct arm_stack_offsets *offsets;
20823   unsigned long func_type;
20824   int leaf;
20825   int saved;
20826   int core_saved;
20827   HOST_WIDE_INT frame_size;
20828   int i;
20829
20830   offsets = &cfun->machine->stack_offsets;
20831
20832   /* We need to know if we are a leaf function.  Unfortunately, it
20833      is possible to be called after start_sequence has been called,
20834      which causes get_insns to return the insns for the sequence,
20835      not the function, which will cause leaf_function_p to return
20836      the incorrect result.
20837
20838      to know about leaf functions once reload has completed, and the
20839      frame size cannot be changed after that time, so we can safely
20840      use the cached value.  */
20841
20842   if (reload_completed)
20843     return offsets;
20844
20845   /* Initially this is the size of the local variables.  It will translated
20846      into an offset once we have determined the size of preceding data.  */
20847   frame_size = ROUND_UP_WORD (get_frame_size ());
20848
20849   leaf = leaf_function_p ();
20850
20851   /* Space for variadic functions.  */
20852   offsets->saved_args = crtl->args.pretend_args_size;
20853
20854   /* In Thumb mode this is incorrect, but never used.  */
20855   offsets->frame
20856     = (offsets->saved_args
20857        + arm_compute_static_chain_stack_bytes ()
20858        + (frame_pointer_needed ? 4 : 0));
20859
20860   if (TARGET_32BIT)
20861     {
20862       unsigned int regno;
20863
20864       offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20865       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20866       saved = core_saved;
20867
20868       /* We know that SP will be doubleword aligned on entry, and we must
20869          preserve that condition at any subroutine call.  We also require the
20870          soft frame pointer to be doubleword aligned.  */
20871
20872       if (TARGET_REALLY_IWMMXT)
20873         {
20874           /* Check for the call-saved iWMMXt registers.  */
20875           for (regno = FIRST_IWMMXT_REGNUM;
20876                regno <= LAST_IWMMXT_REGNUM;
20877                regno++)
20878             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20879               saved += 8;
20880         }
20881
20882       func_type = arm_current_func_type ();
20883       /* Space for saved VFP registers.  */
20884       if (! IS_VOLATILE (func_type)
20885           && TARGET_HARD_FLOAT && TARGET_VFP)
20886         saved += arm_get_vfp_saved_size ();
20887     }
20888   else /* TARGET_THUMB1 */
20889     {
20890       offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20891       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20892       saved = core_saved;
20893       if (TARGET_BACKTRACE)
20894         saved += 16;
20895     }
20896
20897   /* Saved registers include the stack frame.  */
20898   offsets->saved_regs
20899     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20900   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20901
20902   /* A leaf function does not need any stack alignment if it has nothing
20903      on the stack.  */
20904   if (leaf && frame_size == 0
20905       /* However if it calls alloca(), we have a dynamically allocated
20906          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20907       && ! cfun->calls_alloca)
20908     {
20909       offsets->outgoing_args = offsets->soft_frame;
20910       offsets->locals_base = offsets->soft_frame;
20911       return offsets;
20912     }
20913
20914   /* Ensure SFP has the correct alignment.  */
20915   if (ARM_DOUBLEWORD_ALIGN
20916       && (offsets->soft_frame & 7))
20917     {
20918       offsets->soft_frame += 4;
20919       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20920          when there is a stack frame as the alignment will be rolled into
20921          the normal stack adjustment.  */
20922       if (frame_size + crtl->outgoing_args_size == 0)
20923         {
20924           int reg = -1;
20925
20926           /* Register r3 is caller-saved.  Normally it does not need to be
20927              saved on entry by the prologue.  However if we choose to save
20928              it for padding then we may confuse the compiler into thinking
20929              a prologue sequence is required when in fact it is not.  This
20930              will occur when shrink-wrapping if r3 is used as a scratch
20931              register and there are no other callee-saved writes.
20932
20933              This situation can be avoided when other callee-saved registers
20934              are available and r3 is not mandatory if we choose a callee-saved
20935              register for padding.  */
20936           bool prefer_callee_reg_p = false;
20937
20938           /* If it is safe to use r3, then do so.  This sometimes
20939              generates better code on Thumb-2 by avoiding the need to
20940              use 32-bit push/pop instructions.  */
20941           if (! any_sibcall_could_use_r3 ()
20942               && arm_size_return_regs () <= 12
20943               && (offsets->saved_regs_mask & (1 << 3)) == 0
20944               && (TARGET_THUMB2
20945                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20946             {
20947               reg = 3;
20948               if (!TARGET_THUMB2)
20949                 prefer_callee_reg_p = true;
20950             }
20951           if (reg == -1
20952               || prefer_callee_reg_p)
20953             {
20954               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20955                 {
20956                   /* Avoid fixed registers; they may be changed at
20957                      arbitrary times so it's unsafe to restore them
20958                      during the epilogue.  */
20959                   if (!fixed_regs[i]
20960                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20961                     {
20962                       reg = i;
20963                       break;
20964                     }
20965                 }
20966             }
20967
20968           if (reg != -1)
20969             {
20970               offsets->saved_regs += 4;
20971               offsets->saved_regs_mask |= (1 << reg);
20972             }
20973         }
20974     }
20975
20976   offsets->locals_base = offsets->soft_frame + frame_size;
20977   offsets->outgoing_args = (offsets->locals_base
20978                             + crtl->outgoing_args_size);
20979
20980   if (ARM_DOUBLEWORD_ALIGN)
20981     {
20982       /* Ensure SP remains doubleword aligned.  */
20983       if (offsets->outgoing_args & 7)
20984         offsets->outgoing_args += 4;
20985       gcc_assert (!(offsets->outgoing_args & 7));
20986     }
20987
20988   return offsets;
20989 }
20990
20991
20992 /* Calculate the relative offsets for the different stack pointers.  Positive
20993    offsets are in the direction of stack growth.  */
20994
20995 HOST_WIDE_INT
20996 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20997 {
20998   arm_stack_offsets *offsets;
20999
21000   offsets = arm_get_frame_offsets ();
21001
21002   /* OK, now we have enough information to compute the distances.
21003      There must be an entry in these switch tables for each pair
21004      of registers in ELIMINABLE_REGS, even if some of the entries
21005      seem to be redundant or useless.  */
21006   switch (from)
21007     {
21008     case ARG_POINTER_REGNUM:
21009       switch (to)
21010         {
21011         case THUMB_HARD_FRAME_POINTER_REGNUM:
21012           return 0;
21013
21014         case FRAME_POINTER_REGNUM:
21015           /* This is the reverse of the soft frame pointer
21016              to hard frame pointer elimination below.  */
21017           return offsets->soft_frame - offsets->saved_args;
21018
21019         case ARM_HARD_FRAME_POINTER_REGNUM:
21020           /* This is only non-zero in the case where the static chain register
21021              is stored above the frame.  */
21022           return offsets->frame - offsets->saved_args - 4;
21023
21024         case STACK_POINTER_REGNUM:
21025           /* If nothing has been pushed on the stack at all
21026              then this will return -4.  This *is* correct!  */
21027           return offsets->outgoing_args - (offsets->saved_args + 4);
21028
21029         default:
21030           gcc_unreachable ();
21031         }
21032       gcc_unreachable ();
21033
21034     case FRAME_POINTER_REGNUM:
21035       switch (to)
21036         {
21037         case THUMB_HARD_FRAME_POINTER_REGNUM:
21038           return 0;
21039
21040         case ARM_HARD_FRAME_POINTER_REGNUM:
21041           /* The hard frame pointer points to the top entry in the
21042              stack frame.  The soft frame pointer to the bottom entry
21043              in the stack frame.  If there is no stack frame at all,
21044              then they are identical.  */
21045
21046           return offsets->frame - offsets->soft_frame;
21047
21048         case STACK_POINTER_REGNUM:
21049           return offsets->outgoing_args - offsets->soft_frame;
21050
21051         default:
21052           gcc_unreachable ();
21053         }
21054       gcc_unreachable ();
21055
21056     default:
21057       /* You cannot eliminate from the stack pointer.
21058          In theory you could eliminate from the hard frame
21059          pointer to the stack pointer, but this will never
21060          happen, since if a stack frame is not needed the
21061          hard frame pointer will never be used.  */
21062       gcc_unreachable ();
21063     }
21064 }
21065
21066 /* Given FROM and TO register numbers, say whether this elimination is
21067    allowed.  Frame pointer elimination is automatically handled.
21068
21069    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21070    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21071    pointer, we must eliminate FRAME_POINTER_REGNUM into
21072    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21073    ARG_POINTER_REGNUM.  */
21074
21075 bool
21076 arm_can_eliminate (const int from, const int to)
21077 {
21078   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21079           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21080           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21081           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21082            true);
21083 }
21084
21085 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21086    number of bytes pushed.  */
21087
21088 static int
21089 arm_save_coproc_regs(void)
21090 {
21091   int saved_size = 0;
21092   unsigned reg;
21093   unsigned start_reg;
21094   rtx insn;
21095
21096   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21097     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21098       {
21099         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21100         insn = gen_rtx_MEM (V2SImode, insn);
21101         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21102         RTX_FRAME_RELATED_P (insn) = 1;
21103         saved_size += 8;
21104       }
21105
21106   if (TARGET_HARD_FLOAT && TARGET_VFP)
21107     {
21108       start_reg = FIRST_VFP_REGNUM;
21109
21110       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21111         {
21112           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21113               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21114             {
21115               if (start_reg != reg)
21116                 saved_size += vfp_emit_fstmd (start_reg,
21117                                               (reg - start_reg) / 2);
21118               start_reg = reg + 2;
21119             }
21120         }
21121       if (start_reg != reg)
21122         saved_size += vfp_emit_fstmd (start_reg,
21123                                       (reg - start_reg) / 2);
21124     }
21125   return saved_size;
21126 }
21127
21128
21129 /* Set the Thumb frame pointer from the stack pointer.  */
21130
21131 static void
21132 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21133 {
21134   HOST_WIDE_INT amount;
21135   rtx insn, dwarf;
21136
21137   amount = offsets->outgoing_args - offsets->locals_base;
21138   if (amount < 1024)
21139     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21140                                   stack_pointer_rtx, GEN_INT (amount)));
21141   else
21142     {
21143       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21144       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21145          expects the first two operands to be the same.  */
21146       if (TARGET_THUMB2)
21147         {
21148           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21149                                         stack_pointer_rtx,
21150                                         hard_frame_pointer_rtx));
21151         }
21152       else
21153         {
21154           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21155                                         hard_frame_pointer_rtx,
21156                                         stack_pointer_rtx));
21157         }
21158       dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21159                            plus_constant (Pmode, stack_pointer_rtx, amount));
21160       RTX_FRAME_RELATED_P (dwarf) = 1;
21161       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21162     }
21163
21164   RTX_FRAME_RELATED_P (insn) = 1;
21165 }
21166
21167 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21168    function.  */
21169 void
21170 arm_expand_prologue (void)
21171 {
21172   rtx amount;
21173   rtx insn;
21174   rtx ip_rtx;
21175   unsigned long live_regs_mask;
21176   unsigned long func_type;
21177   int fp_offset = 0;
21178   int saved_pretend_args = 0;
21179   int saved_regs = 0;
21180   unsigned HOST_WIDE_INT args_to_push;
21181   arm_stack_offsets *offsets;
21182
21183   func_type = arm_current_func_type ();
21184
21185   /* Naked functions don't have prologues.  */
21186   if (IS_NAKED (func_type))
21187     return;
21188
21189   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21190   args_to_push = crtl->args.pretend_args_size;
21191
21192   /* Compute which register we will have to save onto the stack.  */
21193   offsets = arm_get_frame_offsets ();
21194   live_regs_mask = offsets->saved_regs_mask;
21195
21196   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21197
21198   if (IS_STACKALIGN (func_type))
21199     {
21200       rtx r0, r1;
21201
21202       /* Handle a word-aligned stack pointer.  We generate the following:
21203
21204           mov r0, sp
21205           bic r1, r0, #7
21206           mov sp, r1
21207           <save and restore r0 in normal prologue/epilogue>
21208           mov sp, r0
21209           bx lr
21210
21211          The unwinder doesn't need to know about the stack realignment.
21212          Just tell it we saved SP in r0.  */
21213       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21214
21215       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21216       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21217
21218       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21219       RTX_FRAME_RELATED_P (insn) = 1;
21220       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21221
21222       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21223
21224       /* ??? The CFA changes here, which may cause GDB to conclude that it
21225          has entered a different function.  That said, the unwind info is
21226          correct, individually, before and after this instruction because
21227          we've described the save of SP, which will override the default
21228          handling of SP as restoring from the CFA.  */
21229       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21230     }
21231
21232   /* For APCS frames, if IP register is clobbered
21233      when creating frame, save that register in a special
21234      way.  */
21235   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21236     {
21237       if (IS_INTERRUPT (func_type))
21238         {
21239           /* Interrupt functions must not corrupt any registers.
21240              Creating a frame pointer however, corrupts the IP
21241              register, so we must push it first.  */
21242           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21243
21244           /* Do not set RTX_FRAME_RELATED_P on this insn.
21245              The dwarf stack unwinding code only wants to see one
21246              stack decrement per function, and this is not it.  If
21247              this instruction is labeled as being part of the frame
21248              creation sequence then dwarf2out_frame_debug_expr will
21249              die when it encounters the assignment of IP to FP
21250              later on, since the use of SP here establishes SP as
21251              the CFA register and not IP.
21252
21253              Anyway this instruction is not really part of the stack
21254              frame creation although it is part of the prologue.  */
21255         }
21256       else if (IS_NESTED (func_type))
21257         {
21258           /* The static chain register is the same as the IP register
21259              used as a scratch register during stack frame creation.
21260              To get around this need to find somewhere to store IP
21261              whilst the frame is being created.  We try the following
21262              places in order:
21263
21264                1. The last argument register r3 if it is available.
21265                2. A slot on the stack above the frame if there are no
21266                   arguments to push onto the stack.
21267                3. Register r3 again, after pushing the argument registers
21268                   onto the stack, if this is a varargs function.
21269                4. The last slot on the stack created for the arguments to
21270                   push, if this isn't a varargs function.
21271
21272              Note - we only need to tell the dwarf2 backend about the SP
21273              adjustment in the second variant; the static chain register
21274              doesn't need to be unwound, as it doesn't contain a value
21275              inherited from the caller.  */
21276
21277           if (!arm_r3_live_at_start_p ())
21278             insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21279           else if (args_to_push == 0)
21280             {
21281               rtx addr, dwarf;
21282
21283               gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21284               saved_regs += 4;
21285
21286               addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21287               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21288               fp_offset = 4;
21289
21290               /* Just tell the dwarf backend that we adjusted SP.  */
21291               dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21292                                    plus_constant (Pmode, stack_pointer_rtx,
21293                                                   -fp_offset));
21294               RTX_FRAME_RELATED_P (insn) = 1;
21295               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21296             }
21297           else
21298             {
21299               /* Store the args on the stack.  */
21300               if (cfun->machine->uses_anonymous_args)
21301                 {
21302                   insn
21303                     = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21304                                            (0xf0 >> (args_to_push / 4)) & 0xf);
21305                   emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21306                   saved_pretend_args = 1;
21307                 }
21308               else
21309                 {
21310                   rtx addr, dwarf;
21311
21312                   if (args_to_push == 4)
21313                     addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21314                   else
21315                     addr
21316                       = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21317                                             plus_constant (Pmode,
21318                                                            stack_pointer_rtx,
21319                                                            -args_to_push));
21320
21321                   insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21322
21323                   /* Just tell the dwarf backend that we adjusted SP.  */
21324                   dwarf
21325                     = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21326                                    plus_constant (Pmode, stack_pointer_rtx,
21327                                                   -args_to_push));
21328                   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21329                 }
21330
21331               RTX_FRAME_RELATED_P (insn) = 1;
21332               fp_offset = args_to_push;
21333               args_to_push = 0;
21334             }
21335         }
21336
21337       insn = emit_set_insn (ip_rtx,
21338                             plus_constant (Pmode, stack_pointer_rtx,
21339                                            fp_offset));
21340       RTX_FRAME_RELATED_P (insn) = 1;
21341     }
21342
21343   if (args_to_push)
21344     {
21345       /* Push the argument registers, or reserve space for them.  */
21346       if (cfun->machine->uses_anonymous_args)
21347         insn = emit_multi_reg_push
21348           ((0xf0 >> (args_to_push / 4)) & 0xf,
21349            (0xf0 >> (args_to_push / 4)) & 0xf);
21350       else
21351         insn = emit_insn
21352           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21353                        GEN_INT (- args_to_push)));
21354       RTX_FRAME_RELATED_P (insn) = 1;
21355     }
21356
21357   /* If this is an interrupt service routine, and the link register
21358      is going to be pushed, and we're not generating extra
21359      push of IP (needed when frame is needed and frame layout if apcs),
21360      subtracting four from LR now will mean that the function return
21361      can be done with a single instruction.  */
21362   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21363       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21364       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21365       && TARGET_ARM)
21366     {
21367       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21368
21369       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21370     }
21371
21372   if (live_regs_mask)
21373     {
21374       unsigned long dwarf_regs_mask = live_regs_mask;
21375
21376       saved_regs += bit_count (live_regs_mask) * 4;
21377       if (optimize_size && !frame_pointer_needed
21378           && saved_regs == offsets->saved_regs - offsets->saved_args)
21379         {
21380           /* If no coprocessor registers are being pushed and we don't have
21381              to worry about a frame pointer then push extra registers to
21382              create the stack frame.  This is done is a way that does not
21383              alter the frame layout, so is independent of the epilogue.  */
21384           int n;
21385           int frame;
21386           n = 0;
21387           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21388             n++;
21389           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21390           if (frame && n * 4 >= frame)
21391             {
21392               n = frame / 4;
21393               live_regs_mask |= (1 << n) - 1;
21394               saved_regs += frame;
21395             }
21396         }
21397
21398       if (TARGET_LDRD
21399           && current_tune->prefer_ldrd_strd
21400           && !optimize_function_for_size_p (cfun))
21401         {
21402           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21403           if (TARGET_THUMB2)
21404             thumb2_emit_strd_push (live_regs_mask);
21405           else if (TARGET_ARM
21406                    && !TARGET_APCS_FRAME
21407                    && !IS_INTERRUPT (func_type))
21408             arm_emit_strd_push (live_regs_mask);
21409           else
21410             {
21411               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21412               RTX_FRAME_RELATED_P (insn) = 1;
21413             }
21414         }
21415       else
21416         {
21417           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21418           RTX_FRAME_RELATED_P (insn) = 1;
21419         }
21420     }
21421
21422   if (! IS_VOLATILE (func_type))
21423     saved_regs += arm_save_coproc_regs ();
21424
21425   if (frame_pointer_needed && TARGET_ARM)
21426     {
21427       /* Create the new frame pointer.  */
21428       if (TARGET_APCS_FRAME)
21429         {
21430           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21431           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21432           RTX_FRAME_RELATED_P (insn) = 1;
21433
21434           if (IS_NESTED (func_type))
21435             {
21436               /* Recover the static chain register.  */
21437               if (!arm_r3_live_at_start_p () || saved_pretend_args)
21438                 insn = gen_rtx_REG (SImode, 3);
21439               else
21440                 {
21441                   insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21442                   insn = gen_frame_mem (SImode, insn);
21443                 }
21444               emit_set_insn (ip_rtx, insn);
21445               /* Add a USE to stop propagate_one_insn() from barfing.  */
21446               emit_insn (gen_force_register_use (ip_rtx));
21447             }
21448         }
21449       else
21450         {
21451           insn = GEN_INT (saved_regs - 4);
21452           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21453                                         stack_pointer_rtx, insn));
21454           RTX_FRAME_RELATED_P (insn) = 1;
21455         }
21456     }
21457
21458   if (flag_stack_usage_info)
21459     current_function_static_stack_size
21460       = offsets->outgoing_args - offsets->saved_args;
21461
21462   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21463     {
21464       /* This add can produce multiple insns for a large constant, so we
21465          need to get tricky.  */
21466       rtx_insn *last = get_last_insn ();
21467
21468       amount = GEN_INT (offsets->saved_args + saved_regs
21469                         - offsets->outgoing_args);
21470
21471       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21472                                     amount));
21473       do
21474         {
21475           last = last ? NEXT_INSN (last) : get_insns ();
21476           RTX_FRAME_RELATED_P (last) = 1;
21477         }
21478       while (last != insn);
21479
21480       /* If the frame pointer is needed, emit a special barrier that
21481          will prevent the scheduler from moving stores to the frame
21482          before the stack adjustment.  */
21483       if (frame_pointer_needed)
21484         insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21485                                          hard_frame_pointer_rtx));
21486     }
21487
21488
21489   if (frame_pointer_needed && TARGET_THUMB2)
21490     thumb_set_frame_pointer (offsets);
21491
21492   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21493     {
21494       unsigned long mask;
21495
21496       mask = live_regs_mask;
21497       mask &= THUMB2_WORK_REGS;
21498       if (!IS_NESTED (func_type))
21499         mask |= (1 << IP_REGNUM);
21500       arm_load_pic_register (mask);
21501     }
21502
21503   /* If we are profiling, make sure no instructions are scheduled before
21504      the call to mcount.  Similarly if the user has requested no
21505      scheduling in the prolog.  Similarly if we want non-call exceptions
21506      using the EABI unwinder, to prevent faulting instructions from being
21507      swapped with a stack adjustment.  */
21508   if (crtl->profile || !TARGET_SCHED_PROLOG
21509       || (arm_except_unwind_info (&global_options) == UI_TARGET
21510           && cfun->can_throw_non_call_exceptions))
21511     emit_insn (gen_blockage ());
21512
21513   /* If the link register is being kept alive, with the return address in it,
21514      then make sure that it does not get reused by the ce2 pass.  */
21515   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21516     cfun->machine->lr_save_eliminated = 1;
21517 }
21518 \f
21519 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21520 static void
21521 arm_print_condition (FILE *stream)
21522 {
21523   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21524     {
21525       /* Branch conversion is not implemented for Thumb-2.  */
21526       if (TARGET_THUMB)
21527         {
21528           output_operand_lossage ("predicated Thumb instruction");
21529           return;
21530         }
21531       if (current_insn_predicate != NULL)
21532         {
21533           output_operand_lossage
21534             ("predicated instruction in conditional sequence");
21535           return;
21536         }
21537
21538       fputs (arm_condition_codes[arm_current_cc], stream);
21539     }
21540   else if (current_insn_predicate)
21541     {
21542       enum arm_cond_code code;
21543
21544       if (TARGET_THUMB1)
21545         {
21546           output_operand_lossage ("predicated Thumb instruction");
21547           return;
21548         }
21549
21550       code = get_arm_condition_code (current_insn_predicate);
21551       fputs (arm_condition_codes[code], stream);
21552     }
21553 }
21554
21555
21556 /* Globally reserved letters: acln
21557    Puncutation letters currently used: @_|?().!#
21558    Lower case letters currently used: bcdefhimpqtvwxyz
21559    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21560    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21561
21562    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21563
21564    If CODE is 'd', then the X is a condition operand and the instruction
21565    should only be executed if the condition is true.
21566    if CODE is 'D', then the X is a condition operand and the instruction
21567    should only be executed if the condition is false: however, if the mode
21568    of the comparison is CCFPEmode, then always execute the instruction -- we
21569    do this because in these circumstances !GE does not necessarily imply LT;
21570    in these cases the instruction pattern will take care to make sure that
21571    an instruction containing %d will follow, thereby undoing the effects of
21572    doing this instruction unconditionally.
21573    If CODE is 'N' then X is a floating point operand that must be negated
21574    before output.
21575    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21576    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21577 static void
21578 arm_print_operand (FILE *stream, rtx x, int code)
21579 {
21580   switch (code)
21581     {
21582     case '@':
21583       fputs (ASM_COMMENT_START, stream);
21584       return;
21585
21586     case '_':
21587       fputs (user_label_prefix, stream);
21588       return;
21589
21590     case '|':
21591       fputs (REGISTER_PREFIX, stream);
21592       return;
21593
21594     case '?':
21595       arm_print_condition (stream);
21596       return;
21597
21598     case '(':
21599       /* Nothing in unified syntax, otherwise the current condition code.  */
21600       if (!TARGET_UNIFIED_ASM)
21601         arm_print_condition (stream);
21602       break;
21603
21604     case ')':
21605       /* The current condition code in unified syntax, otherwise nothing.  */
21606       if (TARGET_UNIFIED_ASM)
21607         arm_print_condition (stream);
21608       break;
21609
21610     case '.':
21611       /* The current condition code for a condition code setting instruction.
21612          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21613       if (TARGET_UNIFIED_ASM)
21614         {
21615           fputc('s', stream);
21616           arm_print_condition (stream);
21617         }
21618       else
21619         {
21620           arm_print_condition (stream);
21621           fputc('s', stream);
21622         }
21623       return;
21624
21625     case '!':
21626       /* If the instruction is conditionally executed then print
21627          the current condition code, otherwise print 's'.  */
21628       gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21629       if (current_insn_predicate)
21630         arm_print_condition (stream);
21631       else
21632         fputc('s', stream);
21633       break;
21634
21635     /* %# is a "break" sequence. It doesn't output anything, but is used to
21636        separate e.g. operand numbers from following text, if that text consists
21637        of further digits which we don't want to be part of the operand
21638        number.  */
21639     case '#':
21640       return;
21641
21642     case 'N':
21643       {
21644         REAL_VALUE_TYPE r;
21645         REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21646         r = real_value_negate (&r);
21647         fprintf (stream, "%s", fp_const_from_val (&r));
21648       }
21649       return;
21650
21651     /* An integer or symbol address without a preceding # sign.  */
21652     case 'c':
21653       switch (GET_CODE (x))
21654         {
21655         case CONST_INT:
21656           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21657           break;
21658
21659         case SYMBOL_REF:
21660           output_addr_const (stream, x);
21661           break;
21662
21663         case CONST:
21664           if (GET_CODE (XEXP (x, 0)) == PLUS
21665               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21666             {
21667               output_addr_const (stream, x);
21668               break;
21669             }
21670           /* Fall through.  */
21671
21672         default:
21673           output_operand_lossage ("Unsupported operand for code '%c'", code);
21674         }
21675       return;
21676
21677     /* An integer that we want to print in HEX.  */
21678     case 'x':
21679       switch (GET_CODE (x))
21680         {
21681         case CONST_INT:
21682           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21683           break;
21684
21685         default:
21686           output_operand_lossage ("Unsupported operand for code '%c'", code);
21687         }
21688       return;
21689
21690     case 'B':
21691       if (CONST_INT_P (x))
21692         {
21693           HOST_WIDE_INT val;
21694           val = ARM_SIGN_EXTEND (~INTVAL (x));
21695           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21696         }
21697       else
21698         {
21699           putc ('~', stream);
21700           output_addr_const (stream, x);
21701         }
21702       return;
21703
21704     case 'b':
21705       /* Print the log2 of a CONST_INT.  */
21706       {
21707         HOST_WIDE_INT val;
21708
21709         if (!CONST_INT_P (x)
21710             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21711           output_operand_lossage ("Unsupported operand for code '%c'", code);
21712         else
21713           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21714       }
21715       return;
21716
21717     case 'L':
21718       /* The low 16 bits of an immediate constant.  */
21719       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21720       return;
21721
21722     case 'i':
21723       fprintf (stream, "%s", arithmetic_instr (x, 1));
21724       return;
21725
21726     case 'I':
21727       fprintf (stream, "%s", arithmetic_instr (x, 0));
21728       return;
21729
21730     case 'S':
21731       {
21732         HOST_WIDE_INT val;
21733         const char *shift;
21734
21735         shift = shift_op (x, &val);
21736
21737         if (shift)
21738           {
21739             fprintf (stream, ", %s ", shift);
21740             if (val == -1)
21741               arm_print_operand (stream, XEXP (x, 1), 0);
21742             else
21743               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21744           }
21745       }
21746       return;
21747
21748       /* An explanation of the 'Q', 'R' and 'H' register operands:
21749
21750          In a pair of registers containing a DI or DF value the 'Q'
21751          operand returns the register number of the register containing
21752          the least significant part of the value.  The 'R' operand returns
21753          the register number of the register containing the most
21754          significant part of the value.
21755
21756          The 'H' operand returns the higher of the two register numbers.
21757          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21758          same as the 'Q' operand, since the most significant part of the
21759          value is held in the lower number register.  The reverse is true
21760          on systems where WORDS_BIG_ENDIAN is false.
21761
21762          The purpose of these operands is to distinguish between cases
21763          where the endian-ness of the values is important (for example
21764          when they are added together), and cases where the endian-ness
21765          is irrelevant, but the order of register operations is important.
21766          For example when loading a value from memory into a register
21767          pair, the endian-ness does not matter.  Provided that the value
21768          from the lower memory address is put into the lower numbered
21769          register, and the value from the higher address is put into the
21770          higher numbered register, the load will work regardless of whether
21771          the value being loaded is big-wordian or little-wordian.  The
21772          order of the two register loads can matter however, if the address
21773          of the memory location is actually held in one of the registers
21774          being overwritten by the load.
21775
21776          The 'Q' and 'R' constraints are also available for 64-bit
21777          constants.  */
21778     case 'Q':
21779       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21780         {
21781           rtx part = gen_lowpart (SImode, x);
21782           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21783           return;
21784         }
21785
21786       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21787         {
21788           output_operand_lossage ("invalid operand for code '%c'", code);
21789           return;
21790         }
21791
21792       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21793       return;
21794
21795     case 'R':
21796       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21797         {
21798           machine_mode mode = GET_MODE (x);
21799           rtx part;
21800
21801           if (mode == VOIDmode)
21802             mode = DImode;
21803           part = gen_highpart_mode (SImode, mode, x);
21804           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21805           return;
21806         }
21807
21808       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21809         {
21810           output_operand_lossage ("invalid operand for code '%c'", code);
21811           return;
21812         }
21813
21814       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21815       return;
21816
21817     case 'H':
21818       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21819         {
21820           output_operand_lossage ("invalid operand for code '%c'", code);
21821           return;
21822         }
21823
21824       asm_fprintf (stream, "%r", REGNO (x) + 1);
21825       return;
21826
21827     case 'J':
21828       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21829         {
21830           output_operand_lossage ("invalid operand for code '%c'", code);
21831           return;
21832         }
21833
21834       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21835       return;
21836
21837     case 'K':
21838       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21839         {
21840           output_operand_lossage ("invalid operand for code '%c'", code);
21841           return;
21842         }
21843
21844       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21845       return;
21846
21847     case 'm':
21848       asm_fprintf (stream, "%r",
21849                    REG_P (XEXP (x, 0))
21850                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21851       return;
21852
21853     case 'M':
21854       asm_fprintf (stream, "{%r-%r}",
21855                    REGNO (x),
21856                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21857       return;
21858
21859     /* Like 'M', but writing doubleword vector registers, for use by Neon
21860        insns.  */
21861     case 'h':
21862       {
21863         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21864         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21865         if (numregs == 1)
21866           asm_fprintf (stream, "{d%d}", regno);
21867         else
21868           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21869       }
21870       return;
21871
21872     case 'd':
21873       /* CONST_TRUE_RTX means always -- that's the default.  */
21874       if (x == const_true_rtx)
21875         return;
21876
21877       if (!COMPARISON_P (x))
21878         {
21879           output_operand_lossage ("invalid operand for code '%c'", code);
21880           return;
21881         }
21882
21883       fputs (arm_condition_codes[get_arm_condition_code (x)],
21884              stream);
21885       return;
21886
21887     case 'D':
21888       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
21889          want to do that.  */
21890       if (x == const_true_rtx)
21891         {
21892           output_operand_lossage ("instruction never executed");
21893           return;
21894         }
21895       if (!COMPARISON_P (x))
21896         {
21897           output_operand_lossage ("invalid operand for code '%c'", code);
21898           return;
21899         }
21900
21901       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21902                                  (get_arm_condition_code (x))],
21903              stream);
21904       return;
21905
21906     case 's':
21907     case 'V':
21908     case 'W':
21909     case 'X':
21910     case 'Y':
21911     case 'Z':
21912       /* Former Maverick support, removed after GCC-4.7.  */
21913       output_operand_lossage ("obsolete Maverick format code '%c'", code);
21914       return;
21915
21916     case 'U':
21917       if (!REG_P (x)
21918           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21919           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21920         /* Bad value for wCG register number.  */
21921         {
21922           output_operand_lossage ("invalid operand for code '%c'", code);
21923           return;
21924         }
21925
21926       else
21927         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21928       return;
21929
21930       /* Print an iWMMXt control register name.  */
21931     case 'w':
21932       if (!CONST_INT_P (x)
21933           || INTVAL (x) < 0
21934           || INTVAL (x) >= 16)
21935         /* Bad value for wC register number.  */
21936         {
21937           output_operand_lossage ("invalid operand for code '%c'", code);
21938           return;
21939         }
21940
21941       else
21942         {
21943           static const char * wc_reg_names [16] =
21944             {
21945               "wCID",  "wCon",  "wCSSF", "wCASF",
21946               "wC4",   "wC5",   "wC6",   "wC7",
21947               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21948               "wC12",  "wC13",  "wC14",  "wC15"
21949             };
21950
21951           fputs (wc_reg_names [INTVAL (x)], stream);
21952         }
21953       return;
21954
21955     /* Print the high single-precision register of a VFP double-precision
21956        register.  */
21957     case 'p':
21958       {
21959         machine_mode mode = GET_MODE (x);
21960         int regno;
21961
21962         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21963           {
21964             output_operand_lossage ("invalid operand for code '%c'", code);
21965             return;
21966           }
21967
21968         regno = REGNO (x);
21969         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21970           {
21971             output_operand_lossage ("invalid operand for code '%c'", code);
21972             return;
21973           }
21974
21975         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21976       }
21977       return;
21978
21979     /* Print a VFP/Neon double precision or quad precision register name.  */
21980     case 'P':
21981     case 'q':
21982       {
21983         machine_mode mode = GET_MODE (x);
21984         int is_quad = (code == 'q');
21985         int regno;
21986
21987         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21988           {
21989             output_operand_lossage ("invalid operand for code '%c'", code);
21990             return;
21991           }
21992
21993         if (!REG_P (x)
21994             || !IS_VFP_REGNUM (REGNO (x)))
21995           {
21996             output_operand_lossage ("invalid operand for code '%c'", code);
21997             return;
21998           }
21999
22000         regno = REGNO (x);
22001         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22002             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22003           {
22004             output_operand_lossage ("invalid operand for code '%c'", code);
22005             return;
22006           }
22007
22008         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22009           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22010       }
22011       return;
22012
22013     /* These two codes print the low/high doubleword register of a Neon quad
22014        register, respectively.  For pair-structure types, can also print
22015        low/high quadword registers.  */
22016     case 'e':
22017     case 'f':
22018       {
22019         machine_mode mode = GET_MODE (x);
22020         int regno;
22021
22022         if ((GET_MODE_SIZE (mode) != 16
22023              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22024           {
22025             output_operand_lossage ("invalid operand for code '%c'", code);
22026             return;
22027           }
22028
22029         regno = REGNO (x);
22030         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22031           {
22032             output_operand_lossage ("invalid operand for code '%c'", code);
22033             return;
22034           }
22035
22036         if (GET_MODE_SIZE (mode) == 16)
22037           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22038                                   + (code == 'f' ? 1 : 0));
22039         else
22040           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22041                                   + (code == 'f' ? 1 : 0));
22042       }
22043       return;
22044
22045     /* Print a VFPv3 floating-point constant, represented as an integer
22046        index.  */
22047     case 'G':
22048       {
22049         int index = vfp3_const_double_index (x);
22050         gcc_assert (index != -1);
22051         fprintf (stream, "%d", index);
22052       }
22053       return;
22054
22055     /* Print bits representing opcode features for Neon.
22056
22057        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22058        and polynomials as unsigned.
22059
22060        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22061
22062        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22063
22064     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22065     case 'T':
22066       {
22067         HOST_WIDE_INT bits = INTVAL (x);
22068         fputc ("uspf"[bits & 3], stream);
22069       }
22070       return;
22071
22072     /* Likewise, but signed and unsigned integers are both 'i'.  */
22073     case 'F':
22074       {
22075         HOST_WIDE_INT bits = INTVAL (x);
22076         fputc ("iipf"[bits & 3], stream);
22077       }
22078       return;
22079
22080     /* As for 'T', but emit 'u' instead of 'p'.  */
22081     case 't':
22082       {
22083         HOST_WIDE_INT bits = INTVAL (x);
22084         fputc ("usuf"[bits & 3], stream);
22085       }
22086       return;
22087
22088     /* Bit 2: rounding (vs none).  */
22089     case 'O':
22090       {
22091         HOST_WIDE_INT bits = INTVAL (x);
22092         fputs ((bits & 4) != 0 ? "r" : "", stream);
22093       }
22094       return;
22095
22096     /* Memory operand for vld1/vst1 instruction.  */
22097     case 'A':
22098       {
22099         rtx addr;
22100         bool postinc = FALSE;
22101         rtx postinc_reg = NULL;
22102         unsigned align, memsize, align_bits;
22103
22104         gcc_assert (MEM_P (x));
22105         addr = XEXP (x, 0);
22106         if (GET_CODE (addr) == POST_INC)
22107           {
22108             postinc = 1;
22109             addr = XEXP (addr, 0);
22110           }
22111         if (GET_CODE (addr) == POST_MODIFY)
22112           {
22113             postinc_reg = XEXP( XEXP (addr, 1), 1);
22114             addr = XEXP (addr, 0);
22115           }
22116         asm_fprintf (stream, "[%r", REGNO (addr));
22117
22118         /* We know the alignment of this access, so we can emit a hint in the
22119            instruction (for some alignments) as an aid to the memory subsystem
22120            of the target.  */
22121         align = MEM_ALIGN (x) >> 3;
22122         memsize = MEM_SIZE (x);
22123
22124         /* Only certain alignment specifiers are supported by the hardware.  */
22125         if (memsize == 32 && (align % 32) == 0)
22126           align_bits = 256;
22127         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22128           align_bits = 128;
22129         else if (memsize >= 8 && (align % 8) == 0)
22130           align_bits = 64;
22131         else
22132           align_bits = 0;
22133
22134         if (align_bits != 0)
22135           asm_fprintf (stream, ":%d", align_bits);
22136
22137         asm_fprintf (stream, "]");
22138
22139         if (postinc)
22140           fputs("!", stream);
22141         if (postinc_reg)
22142           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22143       }
22144       return;
22145
22146     case 'C':
22147       {
22148         rtx addr;
22149
22150         gcc_assert (MEM_P (x));
22151         addr = XEXP (x, 0);
22152         gcc_assert (REG_P (addr));
22153         asm_fprintf (stream, "[%r]", REGNO (addr));
22154       }
22155       return;
22156
22157     /* Translate an S register number into a D register number and element index.  */
22158     case 'y':
22159       {
22160         machine_mode mode = GET_MODE (x);
22161         int regno;
22162
22163         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22164           {
22165             output_operand_lossage ("invalid operand for code '%c'", code);
22166             return;
22167           }
22168
22169         regno = REGNO (x);
22170         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22171           {
22172             output_operand_lossage ("invalid operand for code '%c'", code);
22173             return;
22174           }
22175
22176         regno = regno - FIRST_VFP_REGNUM;
22177         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22178       }
22179       return;
22180
22181     case 'v':
22182         gcc_assert (CONST_DOUBLE_P (x));
22183         int result;
22184         result = vfp3_const_double_for_fract_bits (x);
22185         if (result == 0)
22186           result = vfp3_const_double_for_bits (x);
22187         fprintf (stream, "#%d", result);
22188         return;
22189
22190     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22191        number into a D register number and element index.  */
22192     case 'z':
22193       {
22194         machine_mode mode = GET_MODE (x);
22195         int regno;
22196
22197         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22198           {
22199             output_operand_lossage ("invalid operand for code '%c'", code);
22200             return;
22201           }
22202
22203         regno = REGNO (x);
22204         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22205           {
22206             output_operand_lossage ("invalid operand for code '%c'", code);
22207             return;
22208           }
22209
22210         regno = regno - FIRST_VFP_REGNUM;
22211         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22212       }
22213       return;
22214
22215     default:
22216       if (x == 0)
22217         {
22218           output_operand_lossage ("missing operand");
22219           return;
22220         }
22221
22222       switch (GET_CODE (x))
22223         {
22224         case REG:
22225           asm_fprintf (stream, "%r", REGNO (x));
22226           break;
22227
22228         case MEM:
22229           output_memory_reference_mode = GET_MODE (x);
22230           output_address (XEXP (x, 0));
22231           break;
22232
22233         case CONST_DOUBLE:
22234           {
22235             char fpstr[20];
22236             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22237                               sizeof (fpstr), 0, 1);
22238             fprintf (stream, "#%s", fpstr);
22239           }
22240           break;
22241
22242         default:
22243           gcc_assert (GET_CODE (x) != NEG);
22244           fputc ('#', stream);
22245           if (GET_CODE (x) == HIGH)
22246             {
22247               fputs (":lower16:", stream);
22248               x = XEXP (x, 0);
22249             }
22250
22251           output_addr_const (stream, x);
22252           break;
22253         }
22254     }
22255 }
22256 \f
22257 /* Target hook for printing a memory address.  */
22258 static void
22259 arm_print_operand_address (FILE *stream, rtx x)
22260 {
22261   if (TARGET_32BIT)
22262     {
22263       int is_minus = GET_CODE (x) == MINUS;
22264
22265       if (REG_P (x))
22266         asm_fprintf (stream, "[%r]", REGNO (x));
22267       else if (GET_CODE (x) == PLUS || is_minus)
22268         {
22269           rtx base = XEXP (x, 0);
22270           rtx index = XEXP (x, 1);
22271           HOST_WIDE_INT offset = 0;
22272           if (!REG_P (base)
22273               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22274             {
22275               /* Ensure that BASE is a register.  */
22276               /* (one of them must be).  */
22277               /* Also ensure the SP is not used as in index register.  */
22278               std::swap (base, index);
22279             }
22280           switch (GET_CODE (index))
22281             {
22282             case CONST_INT:
22283               offset = INTVAL (index);
22284               if (is_minus)
22285                 offset = -offset;
22286               asm_fprintf (stream, "[%r, #%wd]",
22287                            REGNO (base), offset);
22288               break;
22289
22290             case REG:
22291               asm_fprintf (stream, "[%r, %s%r]",
22292                            REGNO (base), is_minus ? "-" : "",
22293                            REGNO (index));
22294               break;
22295
22296             case MULT:
22297             case ASHIFTRT:
22298             case LSHIFTRT:
22299             case ASHIFT:
22300             case ROTATERT:
22301               {
22302                 asm_fprintf (stream, "[%r, %s%r",
22303                              REGNO (base), is_minus ? "-" : "",
22304                              REGNO (XEXP (index, 0)));
22305                 arm_print_operand (stream, index, 'S');
22306                 fputs ("]", stream);
22307                 break;
22308               }
22309
22310             default:
22311               gcc_unreachable ();
22312             }
22313         }
22314       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22315                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22316         {
22317           extern machine_mode output_memory_reference_mode;
22318
22319           gcc_assert (REG_P (XEXP (x, 0)));
22320
22321           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22322             asm_fprintf (stream, "[%r, #%s%d]!",
22323                          REGNO (XEXP (x, 0)),
22324                          GET_CODE (x) == PRE_DEC ? "-" : "",
22325                          GET_MODE_SIZE (output_memory_reference_mode));
22326           else
22327             asm_fprintf (stream, "[%r], #%s%d",
22328                          REGNO (XEXP (x, 0)),
22329                          GET_CODE (x) == POST_DEC ? "-" : "",
22330                          GET_MODE_SIZE (output_memory_reference_mode));
22331         }
22332       else if (GET_CODE (x) == PRE_MODIFY)
22333         {
22334           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22335           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22336             asm_fprintf (stream, "#%wd]!",
22337                          INTVAL (XEXP (XEXP (x, 1), 1)));
22338           else
22339             asm_fprintf (stream, "%r]!",
22340                          REGNO (XEXP (XEXP (x, 1), 1)));
22341         }
22342       else if (GET_CODE (x) == POST_MODIFY)
22343         {
22344           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22345           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22346             asm_fprintf (stream, "#%wd",
22347                          INTVAL (XEXP (XEXP (x, 1), 1)));
22348           else
22349             asm_fprintf (stream, "%r",
22350                          REGNO (XEXP (XEXP (x, 1), 1)));
22351         }
22352       else output_addr_const (stream, x);
22353     }
22354   else
22355     {
22356       if (REG_P (x))
22357         asm_fprintf (stream, "[%r]", REGNO (x));
22358       else if (GET_CODE (x) == POST_INC)
22359         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22360       else if (GET_CODE (x) == PLUS)
22361         {
22362           gcc_assert (REG_P (XEXP (x, 0)));
22363           if (CONST_INT_P (XEXP (x, 1)))
22364             asm_fprintf (stream, "[%r, #%wd]",
22365                          REGNO (XEXP (x, 0)),
22366                          INTVAL (XEXP (x, 1)));
22367           else
22368             asm_fprintf (stream, "[%r, %r]",
22369                          REGNO (XEXP (x, 0)),
22370                          REGNO (XEXP (x, 1)));
22371         }
22372       else
22373         output_addr_const (stream, x);
22374     }
22375 }
22376 \f
22377 /* Target hook for indicating whether a punctuation character for
22378    TARGET_PRINT_OPERAND is valid.  */
22379 static bool
22380 arm_print_operand_punct_valid_p (unsigned char code)
22381 {
22382   return (code == '@' || code == '|' || code == '.'
22383           || code == '(' || code == ')' || code == '#'
22384           || (TARGET_32BIT && (code == '?'))
22385           || (TARGET_THUMB2 && (code == '!'))
22386           || (TARGET_THUMB && (code == '_')));
22387 }
22388 \f
22389 /* Target hook for assembling integer objects.  The ARM version needs to
22390    handle word-sized values specially.  */
22391 static bool
22392 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22393 {
22394   machine_mode mode;
22395
22396   if (size == UNITS_PER_WORD && aligned_p)
22397     {
22398       fputs ("\t.word\t", asm_out_file);
22399       output_addr_const (asm_out_file, x);
22400
22401       /* Mark symbols as position independent.  We only do this in the
22402          .text segment, not in the .data segment.  */
22403       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22404           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22405         {
22406           /* See legitimize_pic_address for an explanation of the
22407              TARGET_VXWORKS_RTP check.  */
22408           if (!arm_pic_data_is_text_relative
22409               || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22410             fputs ("(GOT)", asm_out_file);
22411           else
22412             fputs ("(GOTOFF)", asm_out_file);
22413         }
22414       fputc ('\n', asm_out_file);
22415       return true;
22416     }
22417
22418   mode = GET_MODE (x);
22419
22420   if (arm_vector_mode_supported_p (mode))
22421     {
22422       int i, units;
22423
22424       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22425
22426       units = CONST_VECTOR_NUNITS (x);
22427       size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22428
22429       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22430         for (i = 0; i < units; i++)
22431           {
22432             rtx elt = CONST_VECTOR_ELT (x, i);
22433             assemble_integer
22434               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22435           }
22436       else
22437         for (i = 0; i < units; i++)
22438           {
22439             rtx elt = CONST_VECTOR_ELT (x, i);
22440             REAL_VALUE_TYPE rval;
22441
22442             REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22443
22444             assemble_real
22445               (rval, GET_MODE_INNER (mode),
22446               i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22447           }
22448
22449       return true;
22450     }
22451
22452   return default_assemble_integer (x, size, aligned_p);
22453 }
22454
22455 static void
22456 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22457 {
22458   section *s;
22459
22460   if (!TARGET_AAPCS_BASED)
22461     {
22462       (is_ctor ?
22463        default_named_section_asm_out_constructor
22464        : default_named_section_asm_out_destructor) (symbol, priority);
22465       return;
22466     }
22467
22468   /* Put these in the .init_array section, using a special relocation.  */
22469   if (priority != DEFAULT_INIT_PRIORITY)
22470     {
22471       char buf[18];
22472       sprintf (buf, "%s.%.5u",
22473                is_ctor ? ".init_array" : ".fini_array",
22474                priority);
22475       s = get_section (buf, SECTION_WRITE, NULL_TREE);
22476     }
22477   else if (is_ctor)
22478     s = ctors_section;
22479   else
22480     s = dtors_section;
22481
22482   switch_to_section (s);
22483   assemble_align (POINTER_SIZE);
22484   fputs ("\t.word\t", asm_out_file);
22485   output_addr_const (asm_out_file, symbol);
22486   fputs ("(target1)\n", asm_out_file);
22487 }
22488
22489 /* Add a function to the list of static constructors.  */
22490
22491 static void
22492 arm_elf_asm_constructor (rtx symbol, int priority)
22493 {
22494   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22495 }
22496
22497 /* Add a function to the list of static destructors.  */
22498
22499 static void
22500 arm_elf_asm_destructor (rtx symbol, int priority)
22501 {
22502   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22503 }
22504 \f
22505 /* A finite state machine takes care of noticing whether or not instructions
22506    can be conditionally executed, and thus decrease execution time and code
22507    size by deleting branch instructions.  The fsm is controlled by
22508    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22509
22510 /* The state of the fsm controlling condition codes are:
22511    0: normal, do nothing special
22512    1: make ASM_OUTPUT_OPCODE not output this instruction
22513    2: make ASM_OUTPUT_OPCODE not output this instruction
22514    3: make instructions conditional
22515    4: make instructions conditional
22516
22517    State transitions (state->state by whom under condition):
22518    0 -> 1 final_prescan_insn if the `target' is a label
22519    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22520    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22521    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22522    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22523           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22524    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22525           (the target insn is arm_target_insn).
22526
22527    If the jump clobbers the conditions then we use states 2 and 4.
22528
22529    A similar thing can be done with conditional return insns.
22530
22531    XXX In case the `target' is an unconditional branch, this conditionalising
22532    of the instructions always reduces code size, but not always execution
22533    time.  But then, I want to reduce the code size to somewhere near what
22534    /bin/cc produces.  */
22535
22536 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22537    instructions.  When a COND_EXEC instruction is seen the subsequent
22538    instructions are scanned so that multiple conditional instructions can be
22539    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22540    specify the length and true/false mask for the IT block.  These will be
22541    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22542
22543 /* Returns the index of the ARM condition code string in
22544    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22545    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22546
22547 enum arm_cond_code
22548 maybe_get_arm_condition_code (rtx comparison)
22549 {
22550   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22551   enum arm_cond_code code;
22552   enum rtx_code comp_code = GET_CODE (comparison);
22553
22554   if (GET_MODE_CLASS (mode) != MODE_CC)
22555     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22556                            XEXP (comparison, 1));
22557
22558   switch (mode)
22559     {
22560     case CC_DNEmode: code = ARM_NE; goto dominance;
22561     case CC_DEQmode: code = ARM_EQ; goto dominance;
22562     case CC_DGEmode: code = ARM_GE; goto dominance;
22563     case CC_DGTmode: code = ARM_GT; goto dominance;
22564     case CC_DLEmode: code = ARM_LE; goto dominance;
22565     case CC_DLTmode: code = ARM_LT; goto dominance;
22566     case CC_DGEUmode: code = ARM_CS; goto dominance;
22567     case CC_DGTUmode: code = ARM_HI; goto dominance;
22568     case CC_DLEUmode: code = ARM_LS; goto dominance;
22569     case CC_DLTUmode: code = ARM_CC;
22570
22571     dominance:
22572       if (comp_code == EQ)
22573         return ARM_INVERSE_CONDITION_CODE (code);
22574       if (comp_code == NE)
22575         return code;
22576       return ARM_NV;
22577
22578     case CC_NOOVmode:
22579       switch (comp_code)
22580         {
22581         case NE: return ARM_NE;
22582         case EQ: return ARM_EQ;
22583         case GE: return ARM_PL;
22584         case LT: return ARM_MI;
22585         default: return ARM_NV;
22586         }
22587
22588     case CC_Zmode:
22589       switch (comp_code)
22590         {
22591         case NE: return ARM_NE;
22592         case EQ: return ARM_EQ;
22593         default: return ARM_NV;
22594         }
22595
22596     case CC_Nmode:
22597       switch (comp_code)
22598         {
22599         case NE: return ARM_MI;
22600         case EQ: return ARM_PL;
22601         default: return ARM_NV;
22602         }
22603
22604     case CCFPEmode:
22605     case CCFPmode:
22606       /* We can handle all cases except UNEQ and LTGT.  */
22607       switch (comp_code)
22608         {
22609         case GE: return ARM_GE;
22610         case GT: return ARM_GT;
22611         case LE: return ARM_LS;
22612         case LT: return ARM_MI;
22613         case NE: return ARM_NE;
22614         case EQ: return ARM_EQ;
22615         case ORDERED: return ARM_VC;
22616         case UNORDERED: return ARM_VS;
22617         case UNLT: return ARM_LT;
22618         case UNLE: return ARM_LE;
22619         case UNGT: return ARM_HI;
22620         case UNGE: return ARM_PL;
22621           /* UNEQ and LTGT do not have a representation.  */
22622         case UNEQ: /* Fall through.  */
22623         case LTGT: /* Fall through.  */
22624         default: return ARM_NV;
22625         }
22626
22627     case CC_SWPmode:
22628       switch (comp_code)
22629         {
22630         case NE: return ARM_NE;
22631         case EQ: return ARM_EQ;
22632         case GE: return ARM_LE;
22633         case GT: return ARM_LT;
22634         case LE: return ARM_GE;
22635         case LT: return ARM_GT;
22636         case GEU: return ARM_LS;
22637         case GTU: return ARM_CC;
22638         case LEU: return ARM_CS;
22639         case LTU: return ARM_HI;
22640         default: return ARM_NV;
22641         }
22642
22643     case CC_Cmode:
22644       switch (comp_code)
22645         {
22646         case LTU: return ARM_CS;
22647         case GEU: return ARM_CC;
22648         default: return ARM_NV;
22649         }
22650
22651     case CC_CZmode:
22652       switch (comp_code)
22653         {
22654         case NE: return ARM_NE;
22655         case EQ: return ARM_EQ;
22656         case GEU: return ARM_CS;
22657         case GTU: return ARM_HI;
22658         case LEU: return ARM_LS;
22659         case LTU: return ARM_CC;
22660         default: return ARM_NV;
22661         }
22662
22663     case CC_NCVmode:
22664       switch (comp_code)
22665         {
22666         case GE: return ARM_GE;
22667         case LT: return ARM_LT;
22668         case GEU: return ARM_CS;
22669         case LTU: return ARM_CC;
22670         default: return ARM_NV;
22671         }
22672
22673     case CCmode:
22674       switch (comp_code)
22675         {
22676         case NE: return ARM_NE;
22677         case EQ: return ARM_EQ;
22678         case GE: return ARM_GE;
22679         case GT: return ARM_GT;
22680         case LE: return ARM_LE;
22681         case LT: return ARM_LT;
22682         case GEU: return ARM_CS;
22683         case GTU: return ARM_HI;
22684         case LEU: return ARM_LS;
22685         case LTU: return ARM_CC;
22686         default: return ARM_NV;
22687         }
22688
22689     default: gcc_unreachable ();
22690     }
22691 }
22692
22693 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22694 static enum arm_cond_code
22695 get_arm_condition_code (rtx comparison)
22696 {
22697   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22698   gcc_assert (code != ARM_NV);
22699   return code;
22700 }
22701
22702 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22703    instructions.  */
22704 void
22705 thumb2_final_prescan_insn (rtx_insn *insn)
22706 {
22707   rtx_insn *first_insn = insn;
22708   rtx body = PATTERN (insn);
22709   rtx predicate;
22710   enum arm_cond_code code;
22711   int n;
22712   int mask;
22713   int max;
22714
22715   /* max_insns_skipped in the tune was already taken into account in the
22716      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22717      just emit the IT blocks as we can.  It does not make sense to split
22718      the IT blocks.  */
22719   max = MAX_INSN_PER_IT_BLOCK;
22720
22721   /* Remove the previous insn from the count of insns to be output.  */
22722   if (arm_condexec_count)
22723       arm_condexec_count--;
22724
22725   /* Nothing to do if we are already inside a conditional block.  */
22726   if (arm_condexec_count)
22727     return;
22728
22729   if (GET_CODE (body) != COND_EXEC)
22730     return;
22731
22732   /* Conditional jumps are implemented directly.  */
22733   if (JUMP_P (insn))
22734     return;
22735
22736   predicate = COND_EXEC_TEST (body);
22737   arm_current_cc = get_arm_condition_code (predicate);
22738
22739   n = get_attr_ce_count (insn);
22740   arm_condexec_count = 1;
22741   arm_condexec_mask = (1 << n) - 1;
22742   arm_condexec_masklen = n;
22743   /* See if subsequent instructions can be combined into the same block.  */
22744   for (;;)
22745     {
22746       insn = next_nonnote_insn (insn);
22747
22748       /* Jumping into the middle of an IT block is illegal, so a label or
22749          barrier terminates the block.  */
22750       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22751         break;
22752
22753       body = PATTERN (insn);
22754       /* USE and CLOBBER aren't really insns, so just skip them.  */
22755       if (GET_CODE (body) == USE
22756           || GET_CODE (body) == CLOBBER)
22757         continue;
22758
22759       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
22760       if (GET_CODE (body) != COND_EXEC)
22761         break;
22762       /* Maximum number of conditionally executed instructions in a block.  */
22763       n = get_attr_ce_count (insn);
22764       if (arm_condexec_masklen + n > max)
22765         break;
22766
22767       predicate = COND_EXEC_TEST (body);
22768       code = get_arm_condition_code (predicate);
22769       mask = (1 << n) - 1;
22770       if (arm_current_cc == code)
22771         arm_condexec_mask |= (mask << arm_condexec_masklen);
22772       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22773         break;
22774
22775       arm_condexec_count++;
22776       arm_condexec_masklen += n;
22777
22778       /* A jump must be the last instruction in a conditional block.  */
22779       if (JUMP_P (insn))
22780         break;
22781     }
22782   /* Restore recog_data (getting the attributes of other insns can
22783      destroy this array, but final.c assumes that it remains intact
22784      across this call).  */
22785   extract_constrain_insn_cached (first_insn);
22786 }
22787
22788 void
22789 arm_final_prescan_insn (rtx_insn *insn)
22790 {
22791   /* BODY will hold the body of INSN.  */
22792   rtx body = PATTERN (insn);
22793
22794   /* This will be 1 if trying to repeat the trick, and things need to be
22795      reversed if it appears to fail.  */
22796   int reverse = 0;
22797
22798   /* If we start with a return insn, we only succeed if we find another one.  */
22799   int seeking_return = 0;
22800   enum rtx_code return_code = UNKNOWN;
22801
22802   /* START_INSN will hold the insn from where we start looking.  This is the
22803      first insn after the following code_label if REVERSE is true.  */
22804   rtx_insn *start_insn = insn;
22805
22806   /* If in state 4, check if the target branch is reached, in order to
22807      change back to state 0.  */
22808   if (arm_ccfsm_state == 4)
22809     {
22810       if (insn == arm_target_insn)
22811         {
22812           arm_target_insn = NULL;
22813           arm_ccfsm_state = 0;
22814         }
22815       return;
22816     }
22817
22818   /* If in state 3, it is possible to repeat the trick, if this insn is an
22819      unconditional branch to a label, and immediately following this branch
22820      is the previous target label which is only used once, and the label this
22821      branch jumps to is not too far off.  */
22822   if (arm_ccfsm_state == 3)
22823     {
22824       if (simplejump_p (insn))
22825         {
22826           start_insn = next_nonnote_insn (start_insn);
22827           if (BARRIER_P (start_insn))
22828             {
22829               /* XXX Isn't this always a barrier?  */
22830               start_insn = next_nonnote_insn (start_insn);
22831             }
22832           if (LABEL_P (start_insn)
22833               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22834               && LABEL_NUSES (start_insn) == 1)
22835             reverse = TRUE;
22836           else
22837             return;
22838         }
22839       else if (ANY_RETURN_P (body))
22840         {
22841           start_insn = next_nonnote_insn (start_insn);
22842           if (BARRIER_P (start_insn))
22843             start_insn = next_nonnote_insn (start_insn);
22844           if (LABEL_P (start_insn)
22845               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22846               && LABEL_NUSES (start_insn) == 1)
22847             {
22848               reverse = TRUE;
22849               seeking_return = 1;
22850               return_code = GET_CODE (body);
22851             }
22852           else
22853             return;
22854         }
22855       else
22856         return;
22857     }
22858
22859   gcc_assert (!arm_ccfsm_state || reverse);
22860   if (!JUMP_P (insn))
22861     return;
22862
22863   /* This jump might be paralleled with a clobber of the condition codes
22864      the jump should always come first */
22865   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22866     body = XVECEXP (body, 0, 0);
22867
22868   if (reverse
22869       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22870           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22871     {
22872       int insns_skipped;
22873       int fail = FALSE, succeed = FALSE;
22874       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
22875       int then_not_else = TRUE;
22876       rtx_insn *this_insn = start_insn;
22877       rtx label = 0;
22878
22879       /* Register the insn jumped to.  */
22880       if (reverse)
22881         {
22882           if (!seeking_return)
22883             label = XEXP (SET_SRC (body), 0);
22884         }
22885       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22886         label = XEXP (XEXP (SET_SRC (body), 1), 0);
22887       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22888         {
22889           label = XEXP (XEXP (SET_SRC (body), 2), 0);
22890           then_not_else = FALSE;
22891         }
22892       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22893         {
22894           seeking_return = 1;
22895           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22896         }
22897       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22898         {
22899           seeking_return = 1;
22900           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22901           then_not_else = FALSE;
22902         }
22903       else
22904         gcc_unreachable ();
22905
22906       /* See how many insns this branch skips, and what kind of insns.  If all
22907          insns are okay, and the label or unconditional branch to the same
22908          label is not too far away, succeed.  */
22909       for (insns_skipped = 0;
22910            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22911         {
22912           rtx scanbody;
22913
22914           this_insn = next_nonnote_insn (this_insn);
22915           if (!this_insn)
22916             break;
22917
22918           switch (GET_CODE (this_insn))
22919             {
22920             case CODE_LABEL:
22921               /* Succeed if it is the target label, otherwise fail since
22922                  control falls in from somewhere else.  */
22923               if (this_insn == label)
22924                 {
22925                   arm_ccfsm_state = 1;
22926                   succeed = TRUE;
22927                 }
22928               else
22929                 fail = TRUE;
22930               break;
22931
22932             case BARRIER:
22933               /* Succeed if the following insn is the target label.
22934                  Otherwise fail.
22935                  If return insns are used then the last insn in a function
22936                  will be a barrier.  */
22937               this_insn = next_nonnote_insn (this_insn);
22938               if (this_insn && this_insn == label)
22939                 {
22940                   arm_ccfsm_state = 1;
22941                   succeed = TRUE;
22942                 }
22943               else
22944                 fail = TRUE;
22945               break;
22946
22947             case CALL_INSN:
22948               /* The AAPCS says that conditional calls should not be
22949                  used since they make interworking inefficient (the
22950                  linker can't transform BL<cond> into BLX).  That's
22951                  only a problem if the machine has BLX.  */
22952               if (arm_arch5)
22953                 {
22954                   fail = TRUE;
22955                   break;
22956                 }
22957
22958               /* Succeed if the following insn is the target label, or
22959                  if the following two insns are a barrier and the
22960                  target label.  */
22961               this_insn = next_nonnote_insn (this_insn);
22962               if (this_insn && BARRIER_P (this_insn))
22963                 this_insn = next_nonnote_insn (this_insn);
22964
22965               if (this_insn && this_insn == label
22966                   && insns_skipped < max_insns_skipped)
22967                 {
22968                   arm_ccfsm_state = 1;
22969                   succeed = TRUE;
22970                 }
22971               else
22972                 fail = TRUE;
22973               break;
22974
22975             case JUMP_INSN:
22976               /* If this is an unconditional branch to the same label, succeed.
22977                  If it is to another label, do nothing.  If it is conditional,
22978                  fail.  */
22979               /* XXX Probably, the tests for SET and the PC are
22980                  unnecessary.  */
22981
22982               scanbody = PATTERN (this_insn);
22983               if (GET_CODE (scanbody) == SET
22984                   && GET_CODE (SET_DEST (scanbody)) == PC)
22985                 {
22986                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22987                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22988                     {
22989                       arm_ccfsm_state = 2;
22990                       succeed = TRUE;
22991                     }
22992                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22993                     fail = TRUE;
22994                 }
22995               /* Fail if a conditional return is undesirable (e.g. on a
22996                  StrongARM), but still allow this if optimizing for size.  */
22997               else if (GET_CODE (scanbody) == return_code
22998                        && !use_return_insn (TRUE, NULL)
22999                        && !optimize_size)
23000                 fail = TRUE;
23001               else if (GET_CODE (scanbody) == return_code)
23002                 {
23003                   arm_ccfsm_state = 2;
23004                   succeed = TRUE;
23005                 }
23006               else if (GET_CODE (scanbody) == PARALLEL)
23007                 {
23008                   switch (get_attr_conds (this_insn))
23009                     {
23010                     case CONDS_NOCOND:
23011                       break;
23012                     default:
23013                       fail = TRUE;
23014                       break;
23015                     }
23016                 }
23017               else
23018                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23019
23020               break;
23021
23022             case INSN:
23023               /* Instructions using or affecting the condition codes make it
23024                  fail.  */
23025               scanbody = PATTERN (this_insn);
23026               if (!(GET_CODE (scanbody) == SET
23027                     || GET_CODE (scanbody) == PARALLEL)
23028                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23029                 fail = TRUE;
23030               break;
23031
23032             default:
23033               break;
23034             }
23035         }
23036       if (succeed)
23037         {
23038           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23039             arm_target_label = CODE_LABEL_NUMBER (label);
23040           else
23041             {
23042               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23043
23044               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23045                 {
23046                   this_insn = next_nonnote_insn (this_insn);
23047                   gcc_assert (!this_insn
23048                               || (!BARRIER_P (this_insn)
23049                                   && !LABEL_P (this_insn)));
23050                 }
23051               if (!this_insn)
23052                 {
23053                   /* Oh, dear! we ran off the end.. give up.  */
23054                   extract_constrain_insn_cached (insn);
23055                   arm_ccfsm_state = 0;
23056                   arm_target_insn = NULL;
23057                   return;
23058                 }
23059               arm_target_insn = this_insn;
23060             }
23061
23062           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23063              what it was.  */
23064           if (!reverse)
23065             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23066
23067           if (reverse || then_not_else)
23068             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23069         }
23070
23071       /* Restore recog_data (getting the attributes of other insns can
23072          destroy this array, but final.c assumes that it remains intact
23073          across this call.  */
23074       extract_constrain_insn_cached (insn);
23075     }
23076 }
23077
23078 /* Output IT instructions.  */
23079 void
23080 thumb2_asm_output_opcode (FILE * stream)
23081 {
23082   char buff[5];
23083   int n;
23084
23085   if (arm_condexec_mask)
23086     {
23087       for (n = 0; n < arm_condexec_masklen; n++)
23088         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23089       buff[n] = 0;
23090       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23091                   arm_condition_codes[arm_current_cc]);
23092       arm_condexec_mask = 0;
23093     }
23094 }
23095
23096 /* Returns true if REGNO is a valid register
23097    for holding a quantity of type MODE.  */
23098 int
23099 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23100 {
23101   if (GET_MODE_CLASS (mode) == MODE_CC)
23102     return (regno == CC_REGNUM
23103             || (TARGET_HARD_FLOAT && TARGET_VFP
23104                 && regno == VFPCC_REGNUM));
23105
23106   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23107     return false;
23108
23109   if (TARGET_THUMB1)
23110     /* For the Thumb we only allow values bigger than SImode in
23111        registers 0 - 6, so that there is always a second low
23112        register available to hold the upper part of the value.
23113        We probably we ought to ensure that the register is the
23114        start of an even numbered register pair.  */
23115     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23116
23117   if (TARGET_HARD_FLOAT && TARGET_VFP
23118       && IS_VFP_REGNUM (regno))
23119     {
23120       if (mode == SFmode || mode == SImode)
23121         return VFP_REGNO_OK_FOR_SINGLE (regno);
23122
23123       if (mode == DFmode)
23124         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23125
23126       /* VFP registers can hold HFmode values, but there is no point in
23127          putting them there unless we have hardware conversion insns. */
23128       if (mode == HFmode)
23129         return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23130
23131       if (TARGET_NEON)
23132         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23133                || (VALID_NEON_QREG_MODE (mode)
23134                    && NEON_REGNO_OK_FOR_QUAD (regno))
23135                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23136                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23137                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23138                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23139                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23140
23141       return FALSE;
23142     }
23143
23144   if (TARGET_REALLY_IWMMXT)
23145     {
23146       if (IS_IWMMXT_GR_REGNUM (regno))
23147         return mode == SImode;
23148
23149       if (IS_IWMMXT_REGNUM (regno))
23150         return VALID_IWMMXT_REG_MODE (mode);
23151     }
23152
23153   /* We allow almost any value to be stored in the general registers.
23154      Restrict doubleword quantities to even register pairs in ARM state
23155      so that we can use ldrd.  Do not allow very large Neon structure
23156      opaque modes in general registers; they would use too many.  */
23157   if (regno <= LAST_ARM_REGNUM)
23158     {
23159       if (ARM_NUM_REGS (mode) > 4)
23160           return FALSE;
23161
23162       if (TARGET_THUMB2)
23163         return TRUE;
23164
23165       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23166     }
23167
23168   if (regno == FRAME_POINTER_REGNUM
23169       || regno == ARG_POINTER_REGNUM)
23170     /* We only allow integers in the fake hard registers.  */
23171     return GET_MODE_CLASS (mode) == MODE_INT;
23172
23173   return FALSE;
23174 }
23175
23176 /* Implement MODES_TIEABLE_P.  */
23177
23178 bool
23179 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23180 {
23181   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23182     return true;
23183
23184   /* We specifically want to allow elements of "structure" modes to
23185      be tieable to the structure.  This more general condition allows
23186      other rarer situations too.  */
23187   if (TARGET_NEON
23188       && (VALID_NEON_DREG_MODE (mode1)
23189           || VALID_NEON_QREG_MODE (mode1)
23190           || VALID_NEON_STRUCT_MODE (mode1))
23191       && (VALID_NEON_DREG_MODE (mode2)
23192           || VALID_NEON_QREG_MODE (mode2)
23193           || VALID_NEON_STRUCT_MODE (mode2)))
23194     return true;
23195
23196   return false;
23197 }
23198
23199 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23200    not used in arm mode.  */
23201
23202 enum reg_class
23203 arm_regno_class (int regno)
23204 {
23205   if (regno == PC_REGNUM)
23206     return NO_REGS;
23207
23208   if (TARGET_THUMB1)
23209     {
23210       if (regno == STACK_POINTER_REGNUM)
23211         return STACK_REG;
23212       if (regno == CC_REGNUM)
23213         return CC_REG;
23214       if (regno < 8)
23215         return LO_REGS;
23216       return HI_REGS;
23217     }
23218
23219   if (TARGET_THUMB2 && regno < 8)
23220     return LO_REGS;
23221
23222   if (   regno <= LAST_ARM_REGNUM
23223       || regno == FRAME_POINTER_REGNUM
23224       || regno == ARG_POINTER_REGNUM)
23225     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23226
23227   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23228     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23229
23230   if (IS_VFP_REGNUM (regno))
23231     {
23232       if (regno <= D7_VFP_REGNUM)
23233         return VFP_D0_D7_REGS;
23234       else if (regno <= LAST_LO_VFP_REGNUM)
23235         return VFP_LO_REGS;
23236       else
23237         return VFP_HI_REGS;
23238     }
23239
23240   if (IS_IWMMXT_REGNUM (regno))
23241     return IWMMXT_REGS;
23242
23243   if (IS_IWMMXT_GR_REGNUM (regno))
23244     return IWMMXT_GR_REGS;
23245
23246   return NO_REGS;
23247 }
23248
23249 /* Handle a special case when computing the offset
23250    of an argument from the frame pointer.  */
23251 int
23252 arm_debugger_arg_offset (int value, rtx addr)
23253 {
23254   rtx_insn *insn;
23255
23256   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23257   if (value != 0)
23258     return 0;
23259
23260   /* We can only cope with the case where the address is held in a register.  */
23261   if (!REG_P (addr))
23262     return 0;
23263
23264   /* If we are using the frame pointer to point at the argument, then
23265      an offset of 0 is correct.  */
23266   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23267     return 0;
23268
23269   /* If we are using the stack pointer to point at the
23270      argument, then an offset of 0 is correct.  */
23271   /* ??? Check this is consistent with thumb2 frame layout.  */
23272   if ((TARGET_THUMB || !frame_pointer_needed)
23273       && REGNO (addr) == SP_REGNUM)
23274     return 0;
23275
23276   /* Oh dear.  The argument is pointed to by a register rather
23277      than being held in a register, or being stored at a known
23278      offset from the frame pointer.  Since GDB only understands
23279      those two kinds of argument we must translate the address
23280      held in the register into an offset from the frame pointer.
23281      We do this by searching through the insns for the function
23282      looking to see where this register gets its value.  If the
23283      register is initialized from the frame pointer plus an offset
23284      then we are in luck and we can continue, otherwise we give up.
23285
23286      This code is exercised by producing debugging information
23287      for a function with arguments like this:
23288
23289            double func (double a, double b, int c, double d) {return d;}
23290
23291      Without this code the stab for parameter 'd' will be set to
23292      an offset of 0 from the frame pointer, rather than 8.  */
23293
23294   /* The if() statement says:
23295
23296      If the insn is a normal instruction
23297      and if the insn is setting the value in a register
23298      and if the register being set is the register holding the address of the argument
23299      and if the address is computing by an addition
23300      that involves adding to a register
23301      which is the frame pointer
23302      a constant integer
23303
23304      then...  */
23305
23306   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23307     {
23308       if (   NONJUMP_INSN_P (insn)
23309           && GET_CODE (PATTERN (insn)) == SET
23310           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23311           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23312           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23313           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23314           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23315              )
23316         {
23317           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23318
23319           break;
23320         }
23321     }
23322
23323   if (value == 0)
23324     {
23325       debug_rtx (addr);
23326       warning (0, "unable to compute real location of stacked parameter");
23327       value = 8; /* XXX magic hack */
23328     }
23329
23330   return value;
23331 }
23332 \f
23333 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
23334
23335 static const char *
23336 arm_invalid_parameter_type (const_tree t)
23337 {
23338   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23339     return N_("function parameters cannot have __fp16 type");
23340   return NULL;
23341 }
23342
23343 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
23344
23345 static const char *
23346 arm_invalid_return_type (const_tree t)
23347 {
23348   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23349     return N_("functions cannot return __fp16 type");
23350   return NULL;
23351 }
23352
23353 /* Implement TARGET_PROMOTED_TYPE.  */
23354
23355 static tree
23356 arm_promoted_type (const_tree t)
23357 {
23358   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23359     return float_type_node;
23360   return NULL_TREE;
23361 }
23362
23363 /* Implement TARGET_CONVERT_TO_TYPE.
23364    Specifically, this hook implements the peculiarity of the ARM
23365    half-precision floating-point C semantics that requires conversions between
23366    __fp16 to or from double to do an intermediate conversion to float.  */
23367
23368 static tree
23369 arm_convert_to_type (tree type, tree expr)
23370 {
23371   tree fromtype = TREE_TYPE (expr);
23372   if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23373     return NULL_TREE;
23374   if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23375       || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23376     return convert (type, convert (float_type_node, expr));
23377   return NULL_TREE;
23378 }
23379
23380 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23381    This simply adds HFmode as a supported mode; even though we don't
23382    implement arithmetic on this type directly, it's supported by
23383    optabs conversions, much the way the double-word arithmetic is
23384    special-cased in the default hook.  */
23385
23386 static bool
23387 arm_scalar_mode_supported_p (machine_mode mode)
23388 {
23389   if (mode == HFmode)
23390     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23391   else if (ALL_FIXED_POINT_MODE_P (mode))
23392     return true;
23393   else
23394     return default_scalar_mode_supported_p (mode);
23395 }
23396
23397 /* Emit code to reinterpret one Neon type as another, without altering bits.  */
23398 void
23399 neon_reinterpret (rtx dest, rtx src)
23400 {
23401   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23402 }
23403
23404 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23405    not to early-clobber SRC registers in the process.
23406
23407    We assume that the operands described by SRC and DEST represent a
23408    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23409    number of components into which the copy has been decomposed.  */
23410 void
23411 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23412 {
23413   unsigned int i;
23414
23415   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23416       || REGNO (operands[0]) < REGNO (operands[1]))
23417     {
23418       for (i = 0; i < count; i++)
23419         {
23420           operands[2 * i] = dest[i];
23421           operands[2 * i + 1] = src[i];
23422         }
23423     }
23424   else
23425     {
23426       for (i = 0; i < count; i++)
23427         {
23428           operands[2 * i] = dest[count - i - 1];
23429           operands[2 * i + 1] = src[count - i - 1];
23430         }
23431     }
23432 }
23433
23434 /* Split operands into moves from op[1] + op[2] into op[0].  */
23435
23436 void
23437 neon_split_vcombine (rtx operands[3])
23438 {
23439   unsigned int dest = REGNO (operands[0]);
23440   unsigned int src1 = REGNO (operands[1]);
23441   unsigned int src2 = REGNO (operands[2]);
23442   machine_mode halfmode = GET_MODE (operands[1]);
23443   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23444   rtx destlo, desthi;
23445
23446   if (src1 == dest && src2 == dest + halfregs)
23447     {
23448       /* No-op move.  Can't split to nothing; emit something.  */
23449       emit_note (NOTE_INSN_DELETED);
23450       return;
23451     }
23452
23453   /* Preserve register attributes for variable tracking.  */
23454   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23455   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23456                                GET_MODE_SIZE (halfmode));
23457
23458   /* Special case of reversed high/low parts.  Use VSWP.  */
23459   if (src2 == dest && src1 == dest + halfregs)
23460     {
23461       rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
23462       rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
23463       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23464       return;
23465     }
23466
23467   if (!reg_overlap_mentioned_p (operands[2], destlo))
23468     {
23469       /* Try to avoid unnecessary moves if part of the result
23470          is in the right place already.  */
23471       if (src1 != dest)
23472         emit_move_insn (destlo, operands[1]);
23473       if (src2 != dest + halfregs)
23474         emit_move_insn (desthi, operands[2]);
23475     }
23476   else
23477     {
23478       if (src2 != dest + halfregs)
23479         emit_move_insn (desthi, operands[2]);
23480       if (src1 != dest)
23481         emit_move_insn (destlo, operands[1]);
23482     }
23483 }
23484 \f
23485 /* Return the number (counting from 0) of
23486    the least significant set bit in MASK.  */
23487
23488 inline static int
23489 number_of_first_bit_set (unsigned mask)
23490 {
23491   return ctz_hwi (mask);
23492 }
23493
23494 /* Like emit_multi_reg_push, but allowing for a different set of
23495    registers to be described as saved.  MASK is the set of registers
23496    to be saved; REAL_REGS is the set of registers to be described as
23497    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23498
23499 static rtx_insn *
23500 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23501 {
23502   unsigned long regno;
23503   rtx par[10], tmp, reg;
23504   rtx_insn *insn;
23505   int i, j;
23506
23507   /* Build the parallel of the registers actually being stored.  */
23508   for (i = 0; mask; ++i, mask &= mask - 1)
23509     {
23510       regno = ctz_hwi (mask);
23511       reg = gen_rtx_REG (SImode, regno);
23512
23513       if (i == 0)
23514         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23515       else
23516         tmp = gen_rtx_USE (VOIDmode, reg);
23517
23518       par[i] = tmp;
23519     }
23520
23521   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23522   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23523   tmp = gen_frame_mem (BLKmode, tmp);
23524   tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23525   par[0] = tmp;
23526
23527   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23528   insn = emit_insn (tmp);
23529
23530   /* Always build the stack adjustment note for unwind info.  */
23531   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23532   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23533   par[0] = tmp;
23534
23535   /* Build the parallel of the registers recorded as saved for unwind.  */
23536   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23537     {
23538       regno = ctz_hwi (real_regs);
23539       reg = gen_rtx_REG (SImode, regno);
23540
23541       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23542       tmp = gen_frame_mem (SImode, tmp);
23543       tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23544       RTX_FRAME_RELATED_P (tmp) = 1;
23545       par[j + 1] = tmp;
23546     }
23547
23548   if (j == 0)
23549     tmp = par[0];
23550   else
23551     {
23552       RTX_FRAME_RELATED_P (par[0]) = 1;
23553       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23554     }
23555
23556   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23557
23558   return insn;
23559 }
23560
23561 /* Emit code to push or pop registers to or from the stack.  F is the
23562    assembly file.  MASK is the registers to pop.  */
23563 static void
23564 thumb_pop (FILE *f, unsigned long mask)
23565 {
23566   int regno;
23567   int lo_mask = mask & 0xFF;
23568   int pushed_words = 0;
23569
23570   gcc_assert (mask);
23571
23572   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23573     {
23574       /* Special case.  Do not generate a POP PC statement here, do it in
23575          thumb_exit() */
23576       thumb_exit (f, -1);
23577       return;
23578     }
23579
23580   fprintf (f, "\tpop\t{");
23581
23582   /* Look at the low registers first.  */
23583   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23584     {
23585       if (lo_mask & 1)
23586         {
23587           asm_fprintf (f, "%r", regno);
23588
23589           if ((lo_mask & ~1) != 0)
23590             fprintf (f, ", ");
23591
23592           pushed_words++;
23593         }
23594     }
23595
23596   if (mask & (1 << PC_REGNUM))
23597     {
23598       /* Catch popping the PC.  */
23599       if (TARGET_INTERWORK || TARGET_BACKTRACE
23600           || crtl->calls_eh_return)
23601         {
23602           /* The PC is never poped directly, instead
23603              it is popped into r3 and then BX is used.  */
23604           fprintf (f, "}\n");
23605
23606           thumb_exit (f, -1);
23607
23608           return;
23609         }
23610       else
23611         {
23612           if (mask & 0xFF)
23613             fprintf (f, ", ");
23614
23615           asm_fprintf (f, "%r", PC_REGNUM);
23616         }
23617     }
23618
23619   fprintf (f, "}\n");
23620 }
23621
23622 /* Generate code to return from a thumb function.
23623    If 'reg_containing_return_addr' is -1, then the return address is
23624    actually on the stack, at the stack pointer.  */
23625 static void
23626 thumb_exit (FILE *f, int reg_containing_return_addr)
23627 {
23628   unsigned regs_available_for_popping;
23629   unsigned regs_to_pop;
23630   int pops_needed;
23631   unsigned available;
23632   unsigned required;
23633   machine_mode mode;
23634   int size;
23635   int restore_a4 = FALSE;
23636
23637   /* Compute the registers we need to pop.  */
23638   regs_to_pop = 0;
23639   pops_needed = 0;
23640
23641   if (reg_containing_return_addr == -1)
23642     {
23643       regs_to_pop |= 1 << LR_REGNUM;
23644       ++pops_needed;
23645     }
23646
23647   if (TARGET_BACKTRACE)
23648     {
23649       /* Restore the (ARM) frame pointer and stack pointer.  */
23650       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23651       pops_needed += 2;
23652     }
23653
23654   /* If there is nothing to pop then just emit the BX instruction and
23655      return.  */
23656   if (pops_needed == 0)
23657     {
23658       if (crtl->calls_eh_return)
23659         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23660
23661       asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23662       return;
23663     }
23664   /* Otherwise if we are not supporting interworking and we have not created
23665      a backtrace structure and the function was not entered in ARM mode then
23666      just pop the return address straight into the PC.  */
23667   else if (!TARGET_INTERWORK
23668            && !TARGET_BACKTRACE
23669            && !is_called_in_ARM_mode (current_function_decl)
23670            && !crtl->calls_eh_return)
23671     {
23672       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23673       return;
23674     }
23675
23676   /* Find out how many of the (return) argument registers we can corrupt.  */
23677   regs_available_for_popping = 0;
23678
23679   /* If returning via __builtin_eh_return, the bottom three registers
23680      all contain information needed for the return.  */
23681   if (crtl->calls_eh_return)
23682     size = 12;
23683   else
23684     {
23685       /* If we can deduce the registers used from the function's
23686          return value.  This is more reliable that examining
23687          df_regs_ever_live_p () because that will be set if the register is
23688          ever used in the function, not just if the register is used
23689          to hold a return value.  */
23690
23691       if (crtl->return_rtx != 0)
23692         mode = GET_MODE (crtl->return_rtx);
23693       else
23694         mode = DECL_MODE (DECL_RESULT (current_function_decl));
23695
23696       size = GET_MODE_SIZE (mode);
23697
23698       if (size == 0)
23699         {
23700           /* In a void function we can use any argument register.
23701              In a function that returns a structure on the stack
23702              we can use the second and third argument registers.  */
23703           if (mode == VOIDmode)
23704             regs_available_for_popping =
23705               (1 << ARG_REGISTER (1))
23706               | (1 << ARG_REGISTER (2))
23707               | (1 << ARG_REGISTER (3));
23708           else
23709             regs_available_for_popping =
23710               (1 << ARG_REGISTER (2))
23711               | (1 << ARG_REGISTER (3));
23712         }
23713       else if (size <= 4)
23714         regs_available_for_popping =
23715           (1 << ARG_REGISTER (2))
23716           | (1 << ARG_REGISTER (3));
23717       else if (size <= 8)
23718         regs_available_for_popping =
23719           (1 << ARG_REGISTER (3));
23720     }
23721
23722   /* Match registers to be popped with registers into which we pop them.  */
23723   for (available = regs_available_for_popping,
23724        required  = regs_to_pop;
23725        required != 0 && available != 0;
23726        available &= ~(available & - available),
23727        required  &= ~(required  & - required))
23728     -- pops_needed;
23729
23730   /* If we have any popping registers left over, remove them.  */
23731   if (available > 0)
23732     regs_available_for_popping &= ~available;
23733
23734   /* Otherwise if we need another popping register we can use
23735      the fourth argument register.  */
23736   else if (pops_needed)
23737     {
23738       /* If we have not found any free argument registers and
23739          reg a4 contains the return address, we must move it.  */
23740       if (regs_available_for_popping == 0
23741           && reg_containing_return_addr == LAST_ARG_REGNUM)
23742         {
23743           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23744           reg_containing_return_addr = LR_REGNUM;
23745         }
23746       else if (size > 12)
23747         {
23748           /* Register a4 is being used to hold part of the return value,
23749              but we have dire need of a free, low register.  */
23750           restore_a4 = TRUE;
23751
23752           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23753         }
23754
23755       if (reg_containing_return_addr != LAST_ARG_REGNUM)
23756         {
23757           /* The fourth argument register is available.  */
23758           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23759
23760           --pops_needed;
23761         }
23762     }
23763
23764   /* Pop as many registers as we can.  */
23765   thumb_pop (f, regs_available_for_popping);
23766
23767   /* Process the registers we popped.  */
23768   if (reg_containing_return_addr == -1)
23769     {
23770       /* The return address was popped into the lowest numbered register.  */
23771       regs_to_pop &= ~(1 << LR_REGNUM);
23772
23773       reg_containing_return_addr =
23774         number_of_first_bit_set (regs_available_for_popping);
23775
23776       /* Remove this register for the mask of available registers, so that
23777          the return address will not be corrupted by further pops.  */
23778       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23779     }
23780
23781   /* If we popped other registers then handle them here.  */
23782   if (regs_available_for_popping)
23783     {
23784       int frame_pointer;
23785
23786       /* Work out which register currently contains the frame pointer.  */
23787       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23788
23789       /* Move it into the correct place.  */
23790       asm_fprintf (f, "\tmov\t%r, %r\n",
23791                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23792
23793       /* (Temporarily) remove it from the mask of popped registers.  */
23794       regs_available_for_popping &= ~(1 << frame_pointer);
23795       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23796
23797       if (regs_available_for_popping)
23798         {
23799           int stack_pointer;
23800
23801           /* We popped the stack pointer as well,
23802              find the register that contains it.  */
23803           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23804
23805           /* Move it into the stack register.  */
23806           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23807
23808           /* At this point we have popped all necessary registers, so
23809              do not worry about restoring regs_available_for_popping
23810              to its correct value:
23811
23812              assert (pops_needed == 0)
23813              assert (regs_available_for_popping == (1 << frame_pointer))
23814              assert (regs_to_pop == (1 << STACK_POINTER))  */
23815         }
23816       else
23817         {
23818           /* Since we have just move the popped value into the frame
23819              pointer, the popping register is available for reuse, and
23820              we know that we still have the stack pointer left to pop.  */
23821           regs_available_for_popping |= (1 << frame_pointer);
23822         }
23823     }
23824
23825   /* If we still have registers left on the stack, but we no longer have
23826      any registers into which we can pop them, then we must move the return
23827      address into the link register and make available the register that
23828      contained it.  */
23829   if (regs_available_for_popping == 0 && pops_needed > 0)
23830     {
23831       regs_available_for_popping |= 1 << reg_containing_return_addr;
23832
23833       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23834                    reg_containing_return_addr);
23835
23836       reg_containing_return_addr = LR_REGNUM;
23837     }
23838
23839   /* If we have registers left on the stack then pop some more.
23840      We know that at most we will want to pop FP and SP.  */
23841   if (pops_needed > 0)
23842     {
23843       int  popped_into;
23844       int  move_to;
23845
23846       thumb_pop (f, regs_available_for_popping);
23847
23848       /* We have popped either FP or SP.
23849          Move whichever one it is into the correct register.  */
23850       popped_into = number_of_first_bit_set (regs_available_for_popping);
23851       move_to     = number_of_first_bit_set (regs_to_pop);
23852
23853       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23854
23855       regs_to_pop &= ~(1 << move_to);
23856
23857       --pops_needed;
23858     }
23859
23860   /* If we still have not popped everything then we must have only
23861      had one register available to us and we are now popping the SP.  */
23862   if (pops_needed > 0)
23863     {
23864       int  popped_into;
23865
23866       thumb_pop (f, regs_available_for_popping);
23867
23868       popped_into = number_of_first_bit_set (regs_available_for_popping);
23869
23870       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23871       /*
23872         assert (regs_to_pop == (1 << STACK_POINTER))
23873         assert (pops_needed == 1)
23874       */
23875     }
23876
23877   /* If necessary restore the a4 register.  */
23878   if (restore_a4)
23879     {
23880       if (reg_containing_return_addr != LR_REGNUM)
23881         {
23882           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23883           reg_containing_return_addr = LR_REGNUM;
23884         }
23885
23886       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23887     }
23888
23889   if (crtl->calls_eh_return)
23890     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23891
23892   /* Return to caller.  */
23893   asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23894 }
23895 \f
23896 /* Scan INSN just before assembler is output for it.
23897    For Thumb-1, we track the status of the condition codes; this
23898    information is used in the cbranchsi4_insn pattern.  */
23899 void
23900 thumb1_final_prescan_insn (rtx_insn *insn)
23901 {
23902   if (flag_print_asm_name)
23903     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23904                  INSN_ADDRESSES (INSN_UID (insn)));
23905   /* Don't overwrite the previous setter when we get to a cbranch.  */
23906   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23907     {
23908       enum attr_conds conds;
23909
23910       if (cfun->machine->thumb1_cc_insn)
23911         {
23912           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23913               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23914             CC_STATUS_INIT;
23915         }
23916       conds = get_attr_conds (insn);
23917       if (conds == CONDS_SET)
23918         {
23919           rtx set = single_set (insn);
23920           cfun->machine->thumb1_cc_insn = insn;
23921           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23922           cfun->machine->thumb1_cc_op1 = const0_rtx;
23923           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23924           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23925             {
23926               rtx src1 = XEXP (SET_SRC (set), 1);
23927               if (src1 == const0_rtx)
23928                 cfun->machine->thumb1_cc_mode = CCmode;
23929             }
23930           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23931             {
23932               /* Record the src register operand instead of dest because
23933                  cprop_hardreg pass propagates src.  */
23934               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23935             }
23936         }
23937       else if (conds != CONDS_NOCOND)
23938         cfun->machine->thumb1_cc_insn = NULL_RTX;
23939     }
23940
23941     /* Check if unexpected far jump is used.  */
23942     if (cfun->machine->lr_save_eliminated
23943         && get_attr_far_jump (insn) == FAR_JUMP_YES)
23944       internal_error("Unexpected thumb1 far jump");
23945 }
23946
23947 int
23948 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23949 {
23950   unsigned HOST_WIDE_INT mask = 0xff;
23951   int i;
23952
23953   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23954   if (val == 0) /* XXX */
23955     return 0;
23956
23957   for (i = 0; i < 25; i++)
23958     if ((val & (mask << i)) == val)
23959       return 1;
23960
23961   return 0;
23962 }
23963
23964 /* Returns nonzero if the current function contains,
23965    or might contain a far jump.  */
23966 static int
23967 thumb_far_jump_used_p (void)
23968 {
23969   rtx_insn *insn;
23970   bool far_jump = false;
23971   unsigned int func_size = 0;
23972
23973   /* This test is only important for leaf functions.  */
23974   /* assert (!leaf_function_p ()); */
23975
23976   /* If we have already decided that far jumps may be used,
23977      do not bother checking again, and always return true even if
23978      it turns out that they are not being used.  Once we have made
23979      the decision that far jumps are present (and that hence the link
23980      register will be pushed onto the stack) we cannot go back on it.  */
23981   if (cfun->machine->far_jump_used)
23982     return 1;
23983
23984   /* If this function is not being called from the prologue/epilogue
23985      generation code then it must be being called from the
23986      INITIAL_ELIMINATION_OFFSET macro.  */
23987   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23988     {
23989       /* In this case we know that we are being asked about the elimination
23990          of the arg pointer register.  If that register is not being used,
23991          then there are no arguments on the stack, and we do not have to
23992          worry that a far jump might force the prologue to push the link
23993          register, changing the stack offsets.  In this case we can just
23994          return false, since the presence of far jumps in the function will
23995          not affect stack offsets.
23996
23997          If the arg pointer is live (or if it was live, but has now been
23998          eliminated and so set to dead) then we do have to test to see if
23999          the function might contain a far jump.  This test can lead to some
24000          false negatives, since before reload is completed, then length of
24001          branch instructions is not known, so gcc defaults to returning their
24002          longest length, which in turn sets the far jump attribute to true.
24003
24004          A false negative will not result in bad code being generated, but it
24005          will result in a needless push and pop of the link register.  We
24006          hope that this does not occur too often.
24007
24008          If we need doubleword stack alignment this could affect the other
24009          elimination offsets so we can't risk getting it wrong.  */
24010       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24011         cfun->machine->arg_pointer_live = 1;
24012       else if (!cfun->machine->arg_pointer_live)
24013         return 0;
24014     }
24015
24016   /* We should not change far_jump_used during or after reload, as there is
24017      no chance to change stack frame layout.  */
24018   if (reload_in_progress || reload_completed)
24019     return 0;
24020
24021   /* Check to see if the function contains a branch
24022      insn with the far jump attribute set.  */
24023   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24024     {
24025       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24026         {
24027           far_jump = true;
24028         }
24029       func_size += get_attr_length (insn);
24030     }
24031
24032   /* Attribute far_jump will always be true for thumb1 before
24033      shorten_branch pass.  So checking far_jump attribute before
24034      shorten_branch isn't much useful.
24035
24036      Following heuristic tries to estimate more accurately if a far jump
24037      may finally be used.  The heuristic is very conservative as there is
24038      no chance to roll-back the decision of not to use far jump.
24039
24040      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24041      2-byte insn is associated with a 4 byte constant pool.  Using
24042      function size 2048/3 as the threshold is conservative enough.  */
24043   if (far_jump)
24044     {
24045       if ((func_size * 3) >= 2048)
24046         {
24047           /* Record the fact that we have decided that
24048              the function does use far jumps.  */
24049           cfun->machine->far_jump_used = 1;
24050           return 1;
24051         }
24052     }
24053
24054   return 0;
24055 }
24056
24057 /* Return nonzero if FUNC must be entered in ARM mode.  */
24058 int
24059 is_called_in_ARM_mode (tree func)
24060 {
24061   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24062
24063   /* Ignore the problem about functions whose address is taken.  */
24064   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24065     return TRUE;
24066
24067 #ifdef ARM_PE
24068   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24069 #else
24070   return FALSE;
24071 #endif
24072 }
24073
24074 /* Given the stack offsets and register mask in OFFSETS, decide how
24075    many additional registers to push instead of subtracting a constant
24076    from SP.  For epilogues the principle is the same except we use pop.
24077    FOR_PROLOGUE indicates which we're generating.  */
24078 static int
24079 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24080 {
24081   HOST_WIDE_INT amount;
24082   unsigned long live_regs_mask = offsets->saved_regs_mask;
24083   /* Extract a mask of the ones we can give to the Thumb's push/pop
24084      instruction.  */
24085   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24086   /* Then count how many other high registers will need to be pushed.  */
24087   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24088   int n_free, reg_base, size;
24089
24090   if (!for_prologue && frame_pointer_needed)
24091     amount = offsets->locals_base - offsets->saved_regs;
24092   else
24093     amount = offsets->outgoing_args - offsets->saved_regs;
24094
24095   /* If the stack frame size is 512 exactly, we can save one load
24096      instruction, which should make this a win even when optimizing
24097      for speed.  */
24098   if (!optimize_size && amount != 512)
24099     return 0;
24100
24101   /* Can't do this if there are high registers to push.  */
24102   if (high_regs_pushed != 0)
24103     return 0;
24104
24105   /* Shouldn't do it in the prologue if no registers would normally
24106      be pushed at all.  In the epilogue, also allow it if we'll have
24107      a pop insn for the PC.  */
24108   if  (l_mask == 0
24109        && (for_prologue
24110            || TARGET_BACKTRACE
24111            || (live_regs_mask & 1 << LR_REGNUM) == 0
24112            || TARGET_INTERWORK
24113            || crtl->args.pretend_args_size != 0))
24114     return 0;
24115
24116   /* Don't do this if thumb_expand_prologue wants to emit instructions
24117      between the push and the stack frame allocation.  */
24118   if (for_prologue
24119       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24120           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24121     return 0;
24122
24123   reg_base = 0;
24124   n_free = 0;
24125   if (!for_prologue)
24126     {
24127       size = arm_size_return_regs ();
24128       reg_base = ARM_NUM_INTS (size);
24129       live_regs_mask >>= reg_base;
24130     }
24131
24132   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24133          && (for_prologue || call_used_regs[reg_base + n_free]))
24134     {
24135       live_regs_mask >>= 1;
24136       n_free++;
24137     }
24138
24139   if (n_free == 0)
24140     return 0;
24141   gcc_assert (amount / 4 * 4 == amount);
24142
24143   if (amount >= 512 && (amount - n_free * 4) < 512)
24144     return (amount - 508) / 4;
24145   if (amount <= n_free * 4)
24146     return amount / 4;
24147   return 0;
24148 }
24149
24150 /* The bits which aren't usefully expanded as rtl.  */
24151 const char *
24152 thumb1_unexpanded_epilogue (void)
24153 {
24154   arm_stack_offsets *offsets;
24155   int regno;
24156   unsigned long live_regs_mask = 0;
24157   int high_regs_pushed = 0;
24158   int extra_pop;
24159   int had_to_push_lr;
24160   int size;
24161
24162   if (cfun->machine->return_used_this_function != 0)
24163     return "";
24164
24165   if (IS_NAKED (arm_current_func_type ()))
24166     return "";
24167
24168   offsets = arm_get_frame_offsets ();
24169   live_regs_mask = offsets->saved_regs_mask;
24170   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24171
24172   /* If we can deduce the registers used from the function's return value.
24173      This is more reliable that examining df_regs_ever_live_p () because that
24174      will be set if the register is ever used in the function, not just if
24175      the register is used to hold a return value.  */
24176   size = arm_size_return_regs ();
24177
24178   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24179   if (extra_pop > 0)
24180     {
24181       unsigned long extra_mask = (1 << extra_pop) - 1;
24182       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24183     }
24184
24185   /* The prolog may have pushed some high registers to use as
24186      work registers.  e.g. the testsuite file:
24187      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24188      compiles to produce:
24189         push    {r4, r5, r6, r7, lr}
24190         mov     r7, r9
24191         mov     r6, r8
24192         push    {r6, r7}
24193      as part of the prolog.  We have to undo that pushing here.  */
24194
24195   if (high_regs_pushed)
24196     {
24197       unsigned long mask = live_regs_mask & 0xff;
24198       int next_hi_reg;
24199
24200       /* The available low registers depend on the size of the value we are
24201          returning.  */
24202       if (size <= 12)
24203         mask |=  1 << 3;
24204       if (size <= 8)
24205         mask |= 1 << 2;
24206
24207       if (mask == 0)
24208         /* Oh dear!  We have no low registers into which we can pop
24209            high registers!  */
24210         internal_error
24211           ("no low registers available for popping high registers");
24212
24213       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24214         if (live_regs_mask & (1 << next_hi_reg))
24215           break;
24216
24217       while (high_regs_pushed)
24218         {
24219           /* Find lo register(s) into which the high register(s) can
24220              be popped.  */
24221           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24222             {
24223               if (mask & (1 << regno))
24224                 high_regs_pushed--;
24225               if (high_regs_pushed == 0)
24226                 break;
24227             }
24228
24229           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24230
24231           /* Pop the values into the low register(s).  */
24232           thumb_pop (asm_out_file, mask);
24233
24234           /* Move the value(s) into the high registers.  */
24235           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24236             {
24237               if (mask & (1 << regno))
24238                 {
24239                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24240                                regno);
24241
24242                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24243                     if (live_regs_mask & (1 << next_hi_reg))
24244                       break;
24245                 }
24246             }
24247         }
24248       live_regs_mask &= ~0x0f00;
24249     }
24250
24251   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24252   live_regs_mask &= 0xff;
24253
24254   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24255     {
24256       /* Pop the return address into the PC.  */
24257       if (had_to_push_lr)
24258         live_regs_mask |= 1 << PC_REGNUM;
24259
24260       /* Either no argument registers were pushed or a backtrace
24261          structure was created which includes an adjusted stack
24262          pointer, so just pop everything.  */
24263       if (live_regs_mask)
24264         thumb_pop (asm_out_file, live_regs_mask);
24265
24266       /* We have either just popped the return address into the
24267          PC or it is was kept in LR for the entire function.
24268          Note that thumb_pop has already called thumb_exit if the
24269          PC was in the list.  */
24270       if (!had_to_push_lr)
24271         thumb_exit (asm_out_file, LR_REGNUM);
24272     }
24273   else
24274     {
24275       /* Pop everything but the return address.  */
24276       if (live_regs_mask)
24277         thumb_pop (asm_out_file, live_regs_mask);
24278
24279       if (had_to_push_lr)
24280         {
24281           if (size > 12)
24282             {
24283               /* We have no free low regs, so save one.  */
24284               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24285                            LAST_ARG_REGNUM);
24286             }
24287
24288           /* Get the return address into a temporary register.  */
24289           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24290
24291           if (size > 12)
24292             {
24293               /* Move the return address to lr.  */
24294               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24295                            LAST_ARG_REGNUM);
24296               /* Restore the low register.  */
24297               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24298                            IP_REGNUM);
24299               regno = LR_REGNUM;
24300             }
24301           else
24302             regno = LAST_ARG_REGNUM;
24303         }
24304       else
24305         regno = LR_REGNUM;
24306
24307       /* Remove the argument registers that were pushed onto the stack.  */
24308       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24309                    SP_REGNUM, SP_REGNUM,
24310                    crtl->args.pretend_args_size);
24311
24312       thumb_exit (asm_out_file, regno);
24313     }
24314
24315   return "";
24316 }
24317
24318 /* Functions to save and restore machine-specific function data.  */
24319 static struct machine_function *
24320 arm_init_machine_status (void)
24321 {
24322   struct machine_function *machine;
24323   machine = ggc_cleared_alloc<machine_function> ();
24324
24325 #if ARM_FT_UNKNOWN != 0
24326   machine->func_type = ARM_FT_UNKNOWN;
24327 #endif
24328   return machine;
24329 }
24330
24331 /* Return an RTX indicating where the return address to the
24332    calling function can be found.  */
24333 rtx
24334 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24335 {
24336   if (count != 0)
24337     return NULL_RTX;
24338
24339   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24340 }
24341
24342 /* Do anything needed before RTL is emitted for each function.  */
24343 void
24344 arm_init_expanders (void)
24345 {
24346   /* Arrange to initialize and mark the machine per-function status.  */
24347   init_machine_status = arm_init_machine_status;
24348
24349   /* This is to stop the combine pass optimizing away the alignment
24350      adjustment of va_arg.  */
24351   /* ??? It is claimed that this should not be necessary.  */
24352   if (cfun)
24353     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24354 }
24355
24356
24357 /* Like arm_compute_initial_elimination offset.  Simpler because there
24358    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24359    to point at the base of the local variables after static stack
24360    space for a function has been allocated.  */
24361
24362 HOST_WIDE_INT
24363 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24364 {
24365   arm_stack_offsets *offsets;
24366
24367   offsets = arm_get_frame_offsets ();
24368
24369   switch (from)
24370     {
24371     case ARG_POINTER_REGNUM:
24372       switch (to)
24373         {
24374         case STACK_POINTER_REGNUM:
24375           return offsets->outgoing_args - offsets->saved_args;
24376
24377         case FRAME_POINTER_REGNUM:
24378           return offsets->soft_frame - offsets->saved_args;
24379
24380         case ARM_HARD_FRAME_POINTER_REGNUM:
24381           return offsets->saved_regs - offsets->saved_args;
24382
24383         case THUMB_HARD_FRAME_POINTER_REGNUM:
24384           return offsets->locals_base - offsets->saved_args;
24385
24386         default:
24387           gcc_unreachable ();
24388         }
24389       break;
24390
24391     case FRAME_POINTER_REGNUM:
24392       switch (to)
24393         {
24394         case STACK_POINTER_REGNUM:
24395           return offsets->outgoing_args - offsets->soft_frame;
24396
24397         case ARM_HARD_FRAME_POINTER_REGNUM:
24398           return offsets->saved_regs - offsets->soft_frame;
24399
24400         case THUMB_HARD_FRAME_POINTER_REGNUM:
24401           return offsets->locals_base - offsets->soft_frame;
24402
24403         default:
24404           gcc_unreachable ();
24405         }
24406       break;
24407
24408     default:
24409       gcc_unreachable ();
24410     }
24411 }
24412
24413 /* Generate the function's prologue.  */
24414
24415 void
24416 thumb1_expand_prologue (void)
24417 {
24418   rtx_insn *insn;
24419
24420   HOST_WIDE_INT amount;
24421   arm_stack_offsets *offsets;
24422   unsigned long func_type;
24423   int regno;
24424   unsigned long live_regs_mask;
24425   unsigned long l_mask;
24426   unsigned high_regs_pushed = 0;
24427
24428   func_type = arm_current_func_type ();
24429
24430   /* Naked functions don't have prologues.  */
24431   if (IS_NAKED (func_type))
24432     return;
24433
24434   if (IS_INTERRUPT (func_type))
24435     {
24436       error ("interrupt Service Routines cannot be coded in Thumb mode");
24437       return;
24438     }
24439
24440   if (is_called_in_ARM_mode (current_function_decl))
24441     emit_insn (gen_prologue_thumb1_interwork ());
24442
24443   offsets = arm_get_frame_offsets ();
24444   live_regs_mask = offsets->saved_regs_mask;
24445
24446   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24447   l_mask = live_regs_mask & 0x40ff;
24448   /* Then count how many other high registers will need to be pushed.  */
24449   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24450
24451   if (crtl->args.pretend_args_size)
24452     {
24453       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24454
24455       if (cfun->machine->uses_anonymous_args)
24456         {
24457           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24458           unsigned long mask;
24459
24460           mask = 1ul << (LAST_ARG_REGNUM + 1);
24461           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24462
24463           insn = thumb1_emit_multi_reg_push (mask, 0);
24464         }
24465       else
24466         {
24467           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24468                                         stack_pointer_rtx, x));
24469         }
24470       RTX_FRAME_RELATED_P (insn) = 1;
24471     }
24472
24473   if (TARGET_BACKTRACE)
24474     {
24475       HOST_WIDE_INT offset = 0;
24476       unsigned work_register;
24477       rtx work_reg, x, arm_hfp_rtx;
24478
24479       /* We have been asked to create a stack backtrace structure.
24480          The code looks like this:
24481
24482          0   .align 2
24483          0   func:
24484          0     sub   SP, #16         Reserve space for 4 registers.
24485          2     push  {R7}            Push low registers.
24486          4     add   R7, SP, #20     Get the stack pointer before the push.
24487          6     str   R7, [SP, #8]    Store the stack pointer
24488                                         (before reserving the space).
24489          8     mov   R7, PC          Get hold of the start of this code + 12.
24490         10     str   R7, [SP, #16]   Store it.
24491         12     mov   R7, FP          Get hold of the current frame pointer.
24492         14     str   R7, [SP, #4]    Store it.
24493         16     mov   R7, LR          Get hold of the current return address.
24494         18     str   R7, [SP, #12]   Store it.
24495         20     add   R7, SP, #16     Point at the start of the
24496                                         backtrace structure.
24497         22     mov   FP, R7          Put this value into the frame pointer.  */
24498
24499       work_register = thumb_find_work_register (live_regs_mask);
24500       work_reg = gen_rtx_REG (SImode, work_register);
24501       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24502
24503       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24504                                     stack_pointer_rtx, GEN_INT (-16)));
24505       RTX_FRAME_RELATED_P (insn) = 1;
24506
24507       if (l_mask)
24508         {
24509           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24510           RTX_FRAME_RELATED_P (insn) = 1;
24511
24512           offset = bit_count (l_mask) * UNITS_PER_WORD;
24513         }
24514
24515       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24516       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24517
24518       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24519       x = gen_frame_mem (SImode, x);
24520       emit_move_insn (x, work_reg);
24521
24522       /* Make sure that the instruction fetching the PC is in the right place
24523          to calculate "start of backtrace creation code + 12".  */
24524       /* ??? The stores using the common WORK_REG ought to be enough to
24525          prevent the scheduler from doing anything weird.  Failing that
24526          we could always move all of the following into an UNSPEC_VOLATILE.  */
24527       if (l_mask)
24528         {
24529           x = gen_rtx_REG (SImode, PC_REGNUM);
24530           emit_move_insn (work_reg, x);
24531
24532           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24533           x = gen_frame_mem (SImode, x);
24534           emit_move_insn (x, work_reg);
24535
24536           emit_move_insn (work_reg, arm_hfp_rtx);
24537
24538           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24539           x = gen_frame_mem (SImode, x);
24540           emit_move_insn (x, work_reg);
24541         }
24542       else
24543         {
24544           emit_move_insn (work_reg, arm_hfp_rtx);
24545
24546           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24547           x = gen_frame_mem (SImode, x);
24548           emit_move_insn (x, work_reg);
24549
24550           x = gen_rtx_REG (SImode, PC_REGNUM);
24551           emit_move_insn (work_reg, x);
24552
24553           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24554           x = gen_frame_mem (SImode, x);
24555           emit_move_insn (x, work_reg);
24556         }
24557
24558       x = gen_rtx_REG (SImode, LR_REGNUM);
24559       emit_move_insn (work_reg, x);
24560
24561       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24562       x = gen_frame_mem (SImode, x);
24563       emit_move_insn (x, work_reg);
24564
24565       x = GEN_INT (offset + 12);
24566       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24567
24568       emit_move_insn (arm_hfp_rtx, work_reg);
24569     }
24570   /* Optimization:  If we are not pushing any low registers but we are going
24571      to push some high registers then delay our first push.  This will just
24572      be a push of LR and we can combine it with the push of the first high
24573      register.  */
24574   else if ((l_mask & 0xff) != 0
24575            || (high_regs_pushed == 0 && l_mask))
24576     {
24577       unsigned long mask = l_mask;
24578       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24579       insn = thumb1_emit_multi_reg_push (mask, mask);
24580       RTX_FRAME_RELATED_P (insn) = 1;
24581     }
24582
24583   if (high_regs_pushed)
24584     {
24585       unsigned pushable_regs;
24586       unsigned next_hi_reg;
24587       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24588                                                  : crtl->args.info.nregs;
24589       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24590
24591       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24592         if (live_regs_mask & (1 << next_hi_reg))
24593           break;
24594
24595       /* Here we need to mask out registers used for passing arguments
24596          even if they can be pushed.  This is to avoid using them to stash the high
24597          registers.  Such kind of stash may clobber the use of arguments.  */
24598       pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24599
24600       if (pushable_regs == 0)
24601         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24602
24603       while (high_regs_pushed > 0)
24604         {
24605           unsigned long real_regs_mask = 0;
24606
24607           for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24608             {
24609               if (pushable_regs & (1 << regno))
24610                 {
24611                   emit_move_insn (gen_rtx_REG (SImode, regno),
24612                                   gen_rtx_REG (SImode, next_hi_reg));
24613
24614                   high_regs_pushed --;
24615                   real_regs_mask |= (1 << next_hi_reg);
24616
24617                   if (high_regs_pushed)
24618                     {
24619                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24620                            next_hi_reg --)
24621                         if (live_regs_mask & (1 << next_hi_reg))
24622                           break;
24623                     }
24624                   else
24625                     {
24626                       pushable_regs &= ~((1 << regno) - 1);
24627                       break;
24628                     }
24629                 }
24630             }
24631
24632           /* If we had to find a work register and we have not yet
24633              saved the LR then add it to the list of regs to push.  */
24634           if (l_mask == (1 << LR_REGNUM))
24635             {
24636               pushable_regs |= l_mask;
24637               real_regs_mask |= l_mask;
24638               l_mask = 0;
24639             }
24640
24641           insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24642           RTX_FRAME_RELATED_P (insn) = 1;
24643         }
24644     }
24645
24646   /* Load the pic register before setting the frame pointer,
24647      so we can use r7 as a temporary work register.  */
24648   if (flag_pic && arm_pic_register != INVALID_REGNUM)
24649     arm_load_pic_register (live_regs_mask);
24650
24651   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24652     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24653                     stack_pointer_rtx);
24654
24655   if (flag_stack_usage_info)
24656     current_function_static_stack_size
24657       = offsets->outgoing_args - offsets->saved_args;
24658
24659   amount = offsets->outgoing_args - offsets->saved_regs;
24660   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24661   if (amount)
24662     {
24663       if (amount < 512)
24664         {
24665           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24666                                         GEN_INT (- amount)));
24667           RTX_FRAME_RELATED_P (insn) = 1;
24668         }
24669       else
24670         {
24671           rtx reg, dwarf;
24672
24673           /* The stack decrement is too big for an immediate value in a single
24674              insn.  In theory we could issue multiple subtracts, but after
24675              three of them it becomes more space efficient to place the full
24676              value in the constant pool and load into a register.  (Also the
24677              ARM debugger really likes to see only one stack decrement per
24678              function).  So instead we look for a scratch register into which
24679              we can load the decrement, and then we subtract this from the
24680              stack pointer.  Unfortunately on the thumb the only available
24681              scratch registers are the argument registers, and we cannot use
24682              these as they may hold arguments to the function.  Instead we
24683              attempt to locate a call preserved register which is used by this
24684              function.  If we can find one, then we know that it will have
24685              been pushed at the start of the prologue and so we can corrupt
24686              it now.  */
24687           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24688             if (live_regs_mask & (1 << regno))
24689               break;
24690
24691           gcc_assert(regno <= LAST_LO_REGNUM);
24692
24693           reg = gen_rtx_REG (SImode, regno);
24694
24695           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24696
24697           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24698                                         stack_pointer_rtx, reg));
24699
24700           dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24701                                plus_constant (Pmode, stack_pointer_rtx,
24702                                               -amount));
24703           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24704           RTX_FRAME_RELATED_P (insn) = 1;
24705         }
24706     }
24707
24708   if (frame_pointer_needed)
24709     thumb_set_frame_pointer (offsets);
24710
24711   /* If we are profiling, make sure no instructions are scheduled before
24712      the call to mcount.  Similarly if the user has requested no
24713      scheduling in the prolog.  Similarly if we want non-call exceptions
24714      using the EABI unwinder, to prevent faulting instructions from being
24715      swapped with a stack adjustment.  */
24716   if (crtl->profile || !TARGET_SCHED_PROLOG
24717       || (arm_except_unwind_info (&global_options) == UI_TARGET
24718           && cfun->can_throw_non_call_exceptions))
24719     emit_insn (gen_blockage ());
24720
24721   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24722   if (live_regs_mask & 0xff)
24723     cfun->machine->lr_save_eliminated = 0;
24724 }
24725
24726 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24727    POP instruction can be generated.  LR should be replaced by PC.  All
24728    the checks required are already done by  USE_RETURN_INSN ().  Hence,
24729    all we really need to check here is if single register is to be
24730    returned, or multiple register return.  */
24731 void
24732 thumb2_expand_return (bool simple_return)
24733 {
24734   int i, num_regs;
24735   unsigned long saved_regs_mask;
24736   arm_stack_offsets *offsets;
24737
24738   offsets = arm_get_frame_offsets ();
24739   saved_regs_mask = offsets->saved_regs_mask;
24740
24741   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24742     if (saved_regs_mask & (1 << i))
24743       num_regs++;
24744
24745   if (!simple_return && saved_regs_mask)
24746     {
24747       if (num_regs == 1)
24748         {
24749           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24750           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24751           rtx addr = gen_rtx_MEM (SImode,
24752                                   gen_rtx_POST_INC (SImode,
24753                                                     stack_pointer_rtx));
24754           set_mem_alias_set (addr, get_frame_alias_set ());
24755           XVECEXP (par, 0, 0) = ret_rtx;
24756           XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24757           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24758           emit_jump_insn (par);
24759         }
24760       else
24761         {
24762           saved_regs_mask &= ~ (1 << LR_REGNUM);
24763           saved_regs_mask |=   (1 << PC_REGNUM);
24764           arm_emit_multi_reg_pop (saved_regs_mask);
24765         }
24766     }
24767   else
24768     {
24769       emit_jump_insn (simple_return_rtx);
24770     }
24771 }
24772
24773 void
24774 thumb1_expand_epilogue (void)
24775 {
24776   HOST_WIDE_INT amount;
24777   arm_stack_offsets *offsets;
24778   int regno;
24779
24780   /* Naked functions don't have prologues.  */
24781   if (IS_NAKED (arm_current_func_type ()))
24782     return;
24783
24784   offsets = arm_get_frame_offsets ();
24785   amount = offsets->outgoing_args - offsets->saved_regs;
24786
24787   if (frame_pointer_needed)
24788     {
24789       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24790       amount = offsets->locals_base - offsets->saved_regs;
24791     }
24792   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24793
24794   gcc_assert (amount >= 0);
24795   if (amount)
24796     {
24797       emit_insn (gen_blockage ());
24798
24799       if (amount < 512)
24800         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24801                                GEN_INT (amount)));
24802       else
24803         {
24804           /* r3 is always free in the epilogue.  */
24805           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24806
24807           emit_insn (gen_movsi (reg, GEN_INT (amount)));
24808           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24809         }
24810     }
24811
24812   /* Emit a USE (stack_pointer_rtx), so that
24813      the stack adjustment will not be deleted.  */
24814   emit_insn (gen_force_register_use (stack_pointer_rtx));
24815
24816   if (crtl->profile || !TARGET_SCHED_PROLOG)
24817     emit_insn (gen_blockage ());
24818
24819   /* Emit a clobber for each insn that will be restored in the epilogue,
24820      so that flow2 will get register lifetimes correct.  */
24821   for (regno = 0; regno < 13; regno++)
24822     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24823       emit_clobber (gen_rtx_REG (SImode, regno));
24824
24825   if (! df_regs_ever_live_p (LR_REGNUM))
24826     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24827 }
24828
24829 /* Epilogue code for APCS frame.  */
24830 static void
24831 arm_expand_epilogue_apcs_frame (bool really_return)
24832 {
24833   unsigned long func_type;
24834   unsigned long saved_regs_mask;
24835   int num_regs = 0;
24836   int i;
24837   int floats_from_frame = 0;
24838   arm_stack_offsets *offsets;
24839
24840   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24841   func_type = arm_current_func_type ();
24842
24843   /* Get frame offsets for ARM.  */
24844   offsets = arm_get_frame_offsets ();
24845   saved_regs_mask = offsets->saved_regs_mask;
24846
24847   /* Find the offset of the floating-point save area in the frame.  */
24848   floats_from_frame
24849     = (offsets->saved_args
24850        + arm_compute_static_chain_stack_bytes ()
24851        - offsets->frame);
24852
24853   /* Compute how many core registers saved and how far away the floats are.  */
24854   for (i = 0; i <= LAST_ARM_REGNUM; i++)
24855     if (saved_regs_mask & (1 << i))
24856       {
24857         num_regs++;
24858         floats_from_frame += 4;
24859       }
24860
24861   if (TARGET_HARD_FLOAT && TARGET_VFP)
24862     {
24863       int start_reg;
24864       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24865
24866       /* The offset is from IP_REGNUM.  */
24867       int saved_size = arm_get_vfp_saved_size ();
24868       if (saved_size > 0)
24869         {
24870           rtx_insn *insn;
24871           floats_from_frame += saved_size;
24872           insn = emit_insn (gen_addsi3 (ip_rtx,
24873                                         hard_frame_pointer_rtx,
24874                                         GEN_INT (-floats_from_frame)));
24875           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24876                                        ip_rtx, hard_frame_pointer_rtx);
24877         }
24878
24879       /* Generate VFP register multi-pop.  */
24880       start_reg = FIRST_VFP_REGNUM;
24881
24882       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24883         /* Look for a case where a reg does not need restoring.  */
24884         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24885             && (!df_regs_ever_live_p (i + 1)
24886                 || call_used_regs[i + 1]))
24887           {
24888             if (start_reg != i)
24889               arm_emit_vfp_multi_reg_pop (start_reg,
24890                                           (i - start_reg) / 2,
24891                                           gen_rtx_REG (SImode,
24892                                                        IP_REGNUM));
24893             start_reg = i + 2;
24894           }
24895
24896       /* Restore the remaining regs that we have discovered (or possibly
24897          even all of them, if the conditional in the for loop never
24898          fired).  */
24899       if (start_reg != i)
24900         arm_emit_vfp_multi_reg_pop (start_reg,
24901                                     (i - start_reg) / 2,
24902                                     gen_rtx_REG (SImode, IP_REGNUM));
24903     }
24904
24905   if (TARGET_IWMMXT)
24906     {
24907       /* The frame pointer is guaranteed to be non-double-word aligned, as
24908          it is set to double-word-aligned old_stack_pointer - 4.  */
24909       rtx_insn *insn;
24910       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24911
24912       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24913         if (df_regs_ever_live_p (i) && !call_used_regs[i])
24914           {
24915             rtx addr = gen_frame_mem (V2SImode,
24916                                  plus_constant (Pmode, hard_frame_pointer_rtx,
24917                                                 - lrm_count * 4));
24918             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24919             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24920                                                gen_rtx_REG (V2SImode, i),
24921                                                NULL_RTX);
24922             lrm_count += 2;
24923           }
24924     }
24925
24926   /* saved_regs_mask should contain IP which contains old stack pointer
24927      at the time of activation creation.  Since SP and IP are adjacent registers,
24928      we can restore the value directly into SP.  */
24929   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24930   saved_regs_mask &= ~(1 << IP_REGNUM);
24931   saved_regs_mask |= (1 << SP_REGNUM);
24932
24933   /* There are two registers left in saved_regs_mask - LR and PC.  We
24934      only need to restore LR (the return address), but to
24935      save time we can load it directly into PC, unless we need a
24936      special function exit sequence, or we are not really returning.  */
24937   if (really_return
24938       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24939       && !crtl->calls_eh_return)
24940     /* Delete LR from the register mask, so that LR on
24941        the stack is loaded into the PC in the register mask.  */
24942     saved_regs_mask &= ~(1 << LR_REGNUM);
24943   else
24944     saved_regs_mask &= ~(1 << PC_REGNUM);
24945
24946   num_regs = bit_count (saved_regs_mask);
24947   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24948     {
24949       rtx_insn *insn;
24950       emit_insn (gen_blockage ());
24951       /* Unwind the stack to just below the saved registers.  */
24952       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24953                                     hard_frame_pointer_rtx,
24954                                     GEN_INT (- 4 * num_regs)));
24955
24956       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24957                                    stack_pointer_rtx, hard_frame_pointer_rtx);
24958     }
24959
24960   arm_emit_multi_reg_pop (saved_regs_mask);
24961
24962   if (IS_INTERRUPT (func_type))
24963     {
24964       /* Interrupt handlers will have pushed the
24965          IP onto the stack, so restore it now.  */
24966       rtx_insn *insn;
24967       rtx addr = gen_rtx_MEM (SImode,
24968                               gen_rtx_POST_INC (SImode,
24969                               stack_pointer_rtx));
24970       set_mem_alias_set (addr, get_frame_alias_set ());
24971       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24972       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24973                                          gen_rtx_REG (SImode, IP_REGNUM),
24974                                          NULL_RTX);
24975     }
24976
24977   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24978     return;
24979
24980   if (crtl->calls_eh_return)
24981     emit_insn (gen_addsi3 (stack_pointer_rtx,
24982                            stack_pointer_rtx,
24983                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24984
24985   if (IS_STACKALIGN (func_type))
24986     /* Restore the original stack pointer.  Before prologue, the stack was
24987        realigned and the original stack pointer saved in r0.  For details,
24988        see comment in arm_expand_prologue.  */
24989     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
24990
24991   emit_jump_insn (simple_return_rtx);
24992 }
24993
24994 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
24995    function is not a sibcall.  */
24996 void
24997 arm_expand_epilogue (bool really_return)
24998 {
24999   unsigned long func_type;
25000   unsigned long saved_regs_mask;
25001   int num_regs = 0;
25002   int i;
25003   int amount;
25004   arm_stack_offsets *offsets;
25005
25006   func_type = arm_current_func_type ();
25007
25008   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25009      let output_return_instruction take care of instruction emission if any.  */
25010   if (IS_NAKED (func_type)
25011       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25012     {
25013       if (really_return)
25014         emit_jump_insn (simple_return_rtx);
25015       return;
25016     }
25017
25018   /* If we are throwing an exception, then we really must be doing a
25019      return, so we can't tail-call.  */
25020   gcc_assert (!crtl->calls_eh_return || really_return);
25021
25022   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25023     {
25024       arm_expand_epilogue_apcs_frame (really_return);
25025       return;
25026     }
25027
25028   /* Get frame offsets for ARM.  */
25029   offsets = arm_get_frame_offsets ();
25030   saved_regs_mask = offsets->saved_regs_mask;
25031   num_regs = bit_count (saved_regs_mask);
25032
25033   if (frame_pointer_needed)
25034     {
25035       rtx_insn *insn;
25036       /* Restore stack pointer if necessary.  */
25037       if (TARGET_ARM)
25038         {
25039           /* In ARM mode, frame pointer points to first saved register.
25040              Restore stack pointer to last saved register.  */
25041           amount = offsets->frame - offsets->saved_regs;
25042
25043           /* Force out any pending memory operations that reference stacked data
25044              before stack de-allocation occurs.  */
25045           emit_insn (gen_blockage ());
25046           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25047                             hard_frame_pointer_rtx,
25048                             GEN_INT (amount)));
25049           arm_add_cfa_adjust_cfa_note (insn, amount,
25050                                        stack_pointer_rtx,
25051                                        hard_frame_pointer_rtx);
25052
25053           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25054              deleted.  */
25055           emit_insn (gen_force_register_use (stack_pointer_rtx));
25056         }
25057       else
25058         {
25059           /* In Thumb-2 mode, the frame pointer points to the last saved
25060              register.  */
25061           amount = offsets->locals_base - offsets->saved_regs;
25062           if (amount)
25063             {
25064               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25065                                 hard_frame_pointer_rtx,
25066                                 GEN_INT (amount)));
25067               arm_add_cfa_adjust_cfa_note (insn, amount,
25068                                            hard_frame_pointer_rtx,
25069                                            hard_frame_pointer_rtx);
25070             }
25071
25072           /* Force out any pending memory operations that reference stacked data
25073              before stack de-allocation occurs.  */
25074           emit_insn (gen_blockage ());
25075           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25076                                        hard_frame_pointer_rtx));
25077           arm_add_cfa_adjust_cfa_note (insn, 0,
25078                                        stack_pointer_rtx,
25079                                        hard_frame_pointer_rtx);
25080           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25081              deleted.  */
25082           emit_insn (gen_force_register_use (stack_pointer_rtx));
25083         }
25084     }
25085   else
25086     {
25087       /* Pop off outgoing args and local frame to adjust stack pointer to
25088          last saved register.  */
25089       amount = offsets->outgoing_args - offsets->saved_regs;
25090       if (amount)
25091         {
25092           rtx_insn *tmp;
25093           /* Force out any pending memory operations that reference stacked data
25094              before stack de-allocation occurs.  */
25095           emit_insn (gen_blockage ());
25096           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25097                                        stack_pointer_rtx,
25098                                        GEN_INT (amount)));
25099           arm_add_cfa_adjust_cfa_note (tmp, amount,
25100                                        stack_pointer_rtx, stack_pointer_rtx);
25101           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25102              not deleted.  */
25103           emit_insn (gen_force_register_use (stack_pointer_rtx));
25104         }
25105     }
25106
25107   if (TARGET_HARD_FLOAT && TARGET_VFP)
25108     {
25109       /* Generate VFP register multi-pop.  */
25110       int end_reg = LAST_VFP_REGNUM + 1;
25111
25112       /* Scan the registers in reverse order.  We need to match
25113          any groupings made in the prologue and generate matching
25114          vldm operations.  The need to match groups is because,
25115          unlike pop, vldm can only do consecutive regs.  */
25116       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25117         /* Look for a case where a reg does not need restoring.  */
25118         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25119             && (!df_regs_ever_live_p (i + 1)
25120                 || call_used_regs[i + 1]))
25121           {
25122             /* Restore the regs discovered so far (from reg+2 to
25123                end_reg).  */
25124             if (end_reg > i + 2)
25125               arm_emit_vfp_multi_reg_pop (i + 2,
25126                                           (end_reg - (i + 2)) / 2,
25127                                           stack_pointer_rtx);
25128             end_reg = i;
25129           }
25130
25131       /* Restore the remaining regs that we have discovered (or possibly
25132          even all of them, if the conditional in the for loop never
25133          fired).  */
25134       if (end_reg > i + 2)
25135         arm_emit_vfp_multi_reg_pop (i + 2,
25136                                     (end_reg - (i + 2)) / 2,
25137                                     stack_pointer_rtx);
25138     }
25139
25140   if (TARGET_IWMMXT)
25141     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25142       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25143         {
25144           rtx_insn *insn;
25145           rtx addr = gen_rtx_MEM (V2SImode,
25146                                   gen_rtx_POST_INC (SImode,
25147                                                     stack_pointer_rtx));
25148           set_mem_alias_set (addr, get_frame_alias_set ());
25149           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25150           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25151                                              gen_rtx_REG (V2SImode, i),
25152                                              NULL_RTX);
25153           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25154                                        stack_pointer_rtx, stack_pointer_rtx);
25155         }
25156
25157   if (saved_regs_mask)
25158     {
25159       rtx insn;
25160       bool return_in_pc = false;
25161
25162       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25163           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25164           && !IS_STACKALIGN (func_type)
25165           && really_return
25166           && crtl->args.pretend_args_size == 0
25167           && saved_regs_mask & (1 << LR_REGNUM)
25168           && !crtl->calls_eh_return)
25169         {
25170           saved_regs_mask &= ~(1 << LR_REGNUM);
25171           saved_regs_mask |= (1 << PC_REGNUM);
25172           return_in_pc = true;
25173         }
25174
25175       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25176         {
25177           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25178             if (saved_regs_mask & (1 << i))
25179               {
25180                 rtx addr = gen_rtx_MEM (SImode,
25181                                         gen_rtx_POST_INC (SImode,
25182                                                           stack_pointer_rtx));
25183                 set_mem_alias_set (addr, get_frame_alias_set ());
25184
25185                 if (i == PC_REGNUM)
25186                   {
25187                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25188                     XVECEXP (insn, 0, 0) = ret_rtx;
25189                     XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
25190                                                         gen_rtx_REG (SImode, i),
25191                                                         addr);
25192                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25193                     insn = emit_jump_insn (insn);
25194                   }
25195                 else
25196                   {
25197                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25198                                                  addr));
25199                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25200                                                        gen_rtx_REG (SImode, i),
25201                                                        NULL_RTX);
25202                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25203                                                  stack_pointer_rtx,
25204                                                  stack_pointer_rtx);
25205                   }
25206               }
25207         }
25208       else
25209         {
25210           if (TARGET_LDRD
25211               && current_tune->prefer_ldrd_strd
25212               && !optimize_function_for_size_p (cfun))
25213             {
25214               if (TARGET_THUMB2)
25215                 thumb2_emit_ldrd_pop (saved_regs_mask);
25216               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25217                 arm_emit_ldrd_pop (saved_regs_mask);
25218               else
25219                 arm_emit_multi_reg_pop (saved_regs_mask);
25220             }
25221           else
25222             arm_emit_multi_reg_pop (saved_regs_mask);
25223         }
25224
25225       if (return_in_pc)
25226         return;
25227     }
25228
25229   if (crtl->args.pretend_args_size)
25230     {
25231       int i, j;
25232       rtx dwarf = NULL_RTX;
25233       rtx_insn *tmp =
25234         emit_insn (gen_addsi3 (stack_pointer_rtx,
25235                                stack_pointer_rtx,
25236                                GEN_INT (crtl->args.pretend_args_size)));
25237
25238       RTX_FRAME_RELATED_P (tmp) = 1;
25239
25240       if (cfun->machine->uses_anonymous_args)
25241         {
25242           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25243              pretend_args in stack.  */
25244           int num_regs = crtl->args.pretend_args_size / 4;
25245           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25246           for (j = 0, i = 0; j < num_regs; i++)
25247             if (saved_regs_mask & (1 << i))
25248               {
25249                 rtx reg = gen_rtx_REG (SImode, i);
25250                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25251                 j++;
25252               }
25253           REG_NOTES (tmp) = dwarf;
25254         }
25255       arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25256                                    stack_pointer_rtx, stack_pointer_rtx);
25257     }
25258
25259   if (!really_return)
25260     return;
25261
25262   if (crtl->calls_eh_return)
25263     emit_insn (gen_addsi3 (stack_pointer_rtx,
25264                            stack_pointer_rtx,
25265                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25266
25267   if (IS_STACKALIGN (func_type))
25268     /* Restore the original stack pointer.  Before prologue, the stack was
25269        realigned and the original stack pointer saved in r0.  For details,
25270        see comment in arm_expand_prologue.  */
25271     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25272
25273   emit_jump_insn (simple_return_rtx);
25274 }
25275
25276 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25277    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25278
25279 const char *
25280 thumb1_output_interwork (void)
25281 {
25282   const char * name;
25283   FILE *f = asm_out_file;
25284
25285   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25286   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25287               == SYMBOL_REF);
25288   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25289
25290   /* Generate code sequence to switch us into Thumb mode.  */
25291   /* The .code 32 directive has already been emitted by
25292      ASM_DECLARE_FUNCTION_NAME.  */
25293   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25294   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25295
25296   /* Generate a label, so that the debugger will notice the
25297      change in instruction sets.  This label is also used by
25298      the assembler to bypass the ARM code when this function
25299      is called from a Thumb encoded function elsewhere in the
25300      same file.  Hence the definition of STUB_NAME here must
25301      agree with the definition in gas/config/tc-arm.c.  */
25302
25303 #define STUB_NAME ".real_start_of"
25304
25305   fprintf (f, "\t.code\t16\n");
25306 #ifdef ARM_PE
25307   if (arm_dllexport_name_p (name))
25308     name = arm_strip_name_encoding (name);
25309 #endif
25310   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25311   fprintf (f, "\t.thumb_func\n");
25312   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25313
25314   return "";
25315 }
25316
25317 /* Handle the case of a double word load into a low register from
25318    a computed memory address.  The computed address may involve a
25319    register which is overwritten by the load.  */
25320 const char *
25321 thumb_load_double_from_address (rtx *operands)
25322 {
25323   rtx addr;
25324   rtx base;
25325   rtx offset;
25326   rtx arg1;
25327   rtx arg2;
25328
25329   gcc_assert (REG_P (operands[0]));
25330   gcc_assert (MEM_P (operands[1]));
25331
25332   /* Get the memory address.  */
25333   addr = XEXP (operands[1], 0);
25334
25335   /* Work out how the memory address is computed.  */
25336   switch (GET_CODE (addr))
25337     {
25338     case REG:
25339       operands[2] = adjust_address (operands[1], SImode, 4);
25340
25341       if (REGNO (operands[0]) == REGNO (addr))
25342         {
25343           output_asm_insn ("ldr\t%H0, %2", operands);
25344           output_asm_insn ("ldr\t%0, %1", operands);
25345         }
25346       else
25347         {
25348           output_asm_insn ("ldr\t%0, %1", operands);
25349           output_asm_insn ("ldr\t%H0, %2", operands);
25350         }
25351       break;
25352
25353     case CONST:
25354       /* Compute <address> + 4 for the high order load.  */
25355       operands[2] = adjust_address (operands[1], SImode, 4);
25356
25357       output_asm_insn ("ldr\t%0, %1", operands);
25358       output_asm_insn ("ldr\t%H0, %2", operands);
25359       break;
25360
25361     case PLUS:
25362       arg1   = XEXP (addr, 0);
25363       arg2   = XEXP (addr, 1);
25364
25365       if (CONSTANT_P (arg1))
25366         base = arg2, offset = arg1;
25367       else
25368         base = arg1, offset = arg2;
25369
25370       gcc_assert (REG_P (base));
25371
25372       /* Catch the case of <address> = <reg> + <reg> */
25373       if (REG_P (offset))
25374         {
25375           int reg_offset = REGNO (offset);
25376           int reg_base   = REGNO (base);
25377           int reg_dest   = REGNO (operands[0]);
25378
25379           /* Add the base and offset registers together into the
25380              higher destination register.  */
25381           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25382                        reg_dest + 1, reg_base, reg_offset);
25383
25384           /* Load the lower destination register from the address in
25385              the higher destination register.  */
25386           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25387                        reg_dest, reg_dest + 1);
25388
25389           /* Load the higher destination register from its own address
25390              plus 4.  */
25391           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25392                        reg_dest + 1, reg_dest + 1);
25393         }
25394       else
25395         {
25396           /* Compute <address> + 4 for the high order load.  */
25397           operands[2] = adjust_address (operands[1], SImode, 4);
25398
25399           /* If the computed address is held in the low order register
25400              then load the high order register first, otherwise always
25401              load the low order register first.  */
25402           if (REGNO (operands[0]) == REGNO (base))
25403             {
25404               output_asm_insn ("ldr\t%H0, %2", operands);
25405               output_asm_insn ("ldr\t%0, %1", operands);
25406             }
25407           else
25408             {
25409               output_asm_insn ("ldr\t%0, %1", operands);
25410               output_asm_insn ("ldr\t%H0, %2", operands);
25411             }
25412         }
25413       break;
25414
25415     case LABEL_REF:
25416       /* With no registers to worry about we can just load the value
25417          directly.  */
25418       operands[2] = adjust_address (operands[1], SImode, 4);
25419
25420       output_asm_insn ("ldr\t%H0, %2", operands);
25421       output_asm_insn ("ldr\t%0, %1", operands);
25422       break;
25423
25424     default:
25425       gcc_unreachable ();
25426     }
25427
25428   return "";
25429 }
25430
25431 const char *
25432 thumb_output_move_mem_multiple (int n, rtx *operands)
25433 {
25434   rtx tmp;
25435
25436   switch (n)
25437     {
25438     case 2:
25439       if (REGNO (operands[4]) > REGNO (operands[5]))
25440         {
25441           tmp = operands[4];
25442           operands[4] = operands[5];
25443           operands[5] = tmp;
25444         }
25445       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25446       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25447       break;
25448
25449     case 3:
25450       if (REGNO (operands[4]) > REGNO (operands[5]))
25451         std::swap (operands[4], operands[5]);
25452       if (REGNO (operands[5]) > REGNO (operands[6]))
25453         std::swap (operands[5], operands[6]);
25454       if (REGNO (operands[4]) > REGNO (operands[5]))
25455         std::swap (operands[4], operands[5]);
25456
25457       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25458       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25459       break;
25460
25461     default:
25462       gcc_unreachable ();
25463     }
25464
25465   return "";
25466 }
25467
25468 /* Output a call-via instruction for thumb state.  */
25469 const char *
25470 thumb_call_via_reg (rtx reg)
25471 {
25472   int regno = REGNO (reg);
25473   rtx *labelp;
25474
25475   gcc_assert (regno < LR_REGNUM);
25476
25477   /* If we are in the normal text section we can use a single instance
25478      per compilation unit.  If we are doing function sections, then we need
25479      an entry per section, since we can't rely on reachability.  */
25480   if (in_section == text_section)
25481     {
25482       thumb_call_reg_needed = 1;
25483
25484       if (thumb_call_via_label[regno] == NULL)
25485         thumb_call_via_label[regno] = gen_label_rtx ();
25486       labelp = thumb_call_via_label + regno;
25487     }
25488   else
25489     {
25490       if (cfun->machine->call_via[regno] == NULL)
25491         cfun->machine->call_via[regno] = gen_label_rtx ();
25492       labelp = cfun->machine->call_via + regno;
25493     }
25494
25495   output_asm_insn ("bl\t%a0", labelp);
25496   return "";
25497 }
25498
25499 /* Routines for generating rtl.  */
25500 void
25501 thumb_expand_movmemqi (rtx *operands)
25502 {
25503   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25504   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25505   HOST_WIDE_INT len = INTVAL (operands[2]);
25506   HOST_WIDE_INT offset = 0;
25507
25508   while (len >= 12)
25509     {
25510       emit_insn (gen_movmem12b (out, in, out, in));
25511       len -= 12;
25512     }
25513
25514   if (len >= 8)
25515     {
25516       emit_insn (gen_movmem8b (out, in, out, in));
25517       len -= 8;
25518     }
25519
25520   if (len >= 4)
25521     {
25522       rtx reg = gen_reg_rtx (SImode);
25523       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25524       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25525       len -= 4;
25526       offset += 4;
25527     }
25528
25529   if (len >= 2)
25530     {
25531       rtx reg = gen_reg_rtx (HImode);
25532       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25533                                               plus_constant (Pmode, in,
25534                                                              offset))));
25535       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25536                                                                 offset)),
25537                             reg));
25538       len -= 2;
25539       offset += 2;
25540     }
25541
25542   if (len)
25543     {
25544       rtx reg = gen_reg_rtx (QImode);
25545       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25546                                               plus_constant (Pmode, in,
25547                                                              offset))));
25548       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25549                                                                 offset)),
25550                             reg));
25551     }
25552 }
25553
25554 void
25555 thumb_reload_out_hi (rtx *operands)
25556 {
25557   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25558 }
25559
25560 /* Handle reading a half-word from memory during reload.  */
25561 void
25562 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25563 {
25564   gcc_unreachable ();
25565 }
25566
25567 /* Return the length of a function name prefix
25568     that starts with the character 'c'.  */
25569 static int
25570 arm_get_strip_length (int c)
25571 {
25572   switch (c)
25573     {
25574     ARM_NAME_ENCODING_LENGTHS
25575       default: return 0;
25576     }
25577 }
25578
25579 /* Return a pointer to a function's name with any
25580    and all prefix encodings stripped from it.  */
25581 const char *
25582 arm_strip_name_encoding (const char *name)
25583 {
25584   int skip;
25585
25586   while ((skip = arm_get_strip_length (* name)))
25587     name += skip;
25588
25589   return name;
25590 }
25591
25592 /* If there is a '*' anywhere in the name's prefix, then
25593    emit the stripped name verbatim, otherwise prepend an
25594    underscore if leading underscores are being used.  */
25595 void
25596 arm_asm_output_labelref (FILE *stream, const char *name)
25597 {
25598   int skip;
25599   int verbatim = 0;
25600
25601   while ((skip = arm_get_strip_length (* name)))
25602     {
25603       verbatim |= (*name == '*');
25604       name += skip;
25605     }
25606
25607   if (verbatim)
25608     fputs (name, stream);
25609   else
25610     asm_fprintf (stream, "%U%s", name);
25611 }
25612
25613 /* This function is used to emit an EABI tag and its associated value.
25614    We emit the numerical value of the tag in case the assembler does not
25615    support textual tags.  (Eg gas prior to 2.20).  If requested we include
25616    the tag name in a comment so that anyone reading the assembler output
25617    will know which tag is being set.
25618
25619    This function is not static because arm-c.c needs it too.  */
25620
25621 void
25622 arm_emit_eabi_attribute (const char *name, int num, int val)
25623 {
25624   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25625   if (flag_verbose_asm || flag_debug_asm)
25626     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25627   asm_fprintf (asm_out_file, "\n");
25628 }
25629
25630 /* This function is used to print CPU tuning information as comment
25631    in assembler file.  Pointers are not printed for now.  */
25632
25633 void
25634 arm_print_tune_info (void)
25635 {
25636   asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25637   asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25638                current_tune->constant_limit);
25639   asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25640                current_tune->max_insns_skipped);
25641   asm_fprintf (asm_out_file, "\t\t@num_prefetch_slots:\t%d\n",
25642                current_tune->num_prefetch_slots);
25643   asm_fprintf (asm_out_file, "\t\t@l1_cache_size:\t%d\n",
25644                current_tune->l1_cache_size);
25645   asm_fprintf (asm_out_file, "\t\t@l1_cache_line_size:\t%d\n",
25646                current_tune->l1_cache_line_size);
25647   asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25648                (int) current_tune->prefer_constant_pool);
25649   asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25650   asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25651   asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25652                current_tune->branch_cost (false, false));
25653   asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25654                current_tune->branch_cost (false, true));
25655   asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25656                current_tune->branch_cost (true, false));
25657   asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25658                current_tune->branch_cost (true, true));
25659   asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25660                (int) current_tune->prefer_ldrd_strd);
25661   asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25662                (int) current_tune->logical_op_non_short_circuit[0],
25663                (int) current_tune->logical_op_non_short_circuit[1]);
25664   asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25665                (int) current_tune->prefer_neon_for_64bits);
25666   asm_fprintf (asm_out_file,
25667                "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25668                (int) current_tune->disparage_flag_setting_t16_encodings);
25669   asm_fprintf (asm_out_file,
25670                "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n",
25671                (int) current_tune
25672                        ->disparage_partial_flag_setting_t16_encodings);
25673   asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25674                (int) current_tune->string_ops_prefer_neon);
25675   asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25676                current_tune->max_insns_inline_memset);
25677   asm_fprintf (asm_out_file, "\t\t@fuseable_ops:\t%u\n",
25678                current_tune->fuseable_ops);
25679   asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25680                (int) current_tune->sched_autopref);
25681 }
25682
25683 static void
25684 arm_file_start (void)
25685 {
25686   int val;
25687
25688   if (TARGET_UNIFIED_ASM)
25689     asm_fprintf (asm_out_file, "\t.syntax unified\n");
25690
25691   if (TARGET_BPABI)
25692     {
25693       const char *fpu_name;
25694       if (arm_selected_arch)
25695         {
25696           /* armv7ve doesn't support any extensions.  */
25697           if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25698             {
25699               /* Keep backward compatability for assemblers
25700                  which don't support armv7ve.  */
25701               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25702               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25703               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25704               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25705               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25706             }
25707           else
25708             {
25709               const char* pos = strchr (arm_selected_arch->name, '+');
25710               if (pos)
25711                 {
25712                   char buf[15];
25713                   gcc_assert (strlen (arm_selected_arch->name)
25714                               <= sizeof (buf) / sizeof (*pos));
25715                   strncpy (buf, arm_selected_arch->name,
25716                                 (pos - arm_selected_arch->name) * sizeof (*pos));
25717                   buf[pos - arm_selected_arch->name] = '\0';
25718                   asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25719                   asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25720                 }
25721               else
25722                 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25723             }
25724         }
25725       else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25726         asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25727       else
25728         {
25729           const char* truncated_name
25730             = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25731           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25732         }
25733
25734       if (print_tune_info)
25735         arm_print_tune_info ();
25736
25737       if (TARGET_SOFT_FLOAT)
25738         {
25739           fpu_name = "softvfp";
25740         }
25741       else
25742         {
25743           fpu_name = arm_fpu_desc->name;
25744           if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25745             {
25746               if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
25747                 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25748
25749               if (TARGET_HARD_FLOAT_ABI)
25750                 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25751             }
25752         }
25753       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25754
25755       /* Some of these attributes only apply when the corresponding features
25756          are used.  However we don't have any easy way of figuring this out.
25757          Conservatively record the setting that would have been used.  */
25758
25759       if (flag_rounding_math)
25760         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25761
25762       if (!flag_unsafe_math_optimizations)
25763         {
25764           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25765           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25766         }
25767       if (flag_signaling_nans)
25768         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25769
25770       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25771                            flag_finite_math_only ? 1 : 3);
25772
25773       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25774       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25775       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25776                                flag_short_enums ? 1 : 2);
25777
25778       /* Tag_ABI_optimization_goals.  */
25779       if (optimize_size)
25780         val = 4;
25781       else if (optimize >= 2)
25782         val = 2;
25783       else if (optimize)
25784         val = 1;
25785       else
25786         val = 6;
25787       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25788
25789       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25790                                unaligned_access);
25791
25792       if (arm_fp16_format)
25793         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25794                              (int) arm_fp16_format);
25795
25796       if (arm_lang_output_object_attributes_hook)
25797         arm_lang_output_object_attributes_hook();
25798     }
25799
25800   default_file_start ();
25801 }
25802
25803 static void
25804 arm_file_end (void)
25805 {
25806   int regno;
25807
25808   if (NEED_INDICATE_EXEC_STACK)
25809     /* Add .note.GNU-stack.  */
25810     file_end_indicate_exec_stack ();
25811
25812   if (! thumb_call_reg_needed)
25813     return;
25814
25815   switch_to_section (text_section);
25816   asm_fprintf (asm_out_file, "\t.code 16\n");
25817   ASM_OUTPUT_ALIGN (asm_out_file, 1);
25818
25819   for (regno = 0; regno < LR_REGNUM; regno++)
25820     {
25821       rtx label = thumb_call_via_label[regno];
25822
25823       if (label != 0)
25824         {
25825           targetm.asm_out.internal_label (asm_out_file, "L",
25826                                           CODE_LABEL_NUMBER (label));
25827           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25828         }
25829     }
25830 }
25831
25832 #ifndef ARM_PE
25833 /* Symbols in the text segment can be accessed without indirecting via the
25834    constant pool; it may take an extra binary operation, but this is still
25835    faster than indirecting via memory.  Don't do this when not optimizing,
25836    since we won't be calculating al of the offsets necessary to do this
25837    simplification.  */
25838
25839 static void
25840 arm_encode_section_info (tree decl, rtx rtl, int first)
25841 {
25842   if (optimize > 0 && TREE_CONSTANT (decl))
25843     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25844
25845   default_encode_section_info (decl, rtl, first);
25846 }
25847 #endif /* !ARM_PE */
25848
25849 static void
25850 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25851 {
25852   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25853       && !strcmp (prefix, "L"))
25854     {
25855       arm_ccfsm_state = 0;
25856       arm_target_insn = NULL;
25857     }
25858   default_internal_label (stream, prefix, labelno);
25859 }
25860
25861 /* Output code to add DELTA to the first argument, and then jump
25862    to FUNCTION.  Used for C++ multiple inheritance.  */
25863 static void
25864 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25865                      HOST_WIDE_INT delta,
25866                      HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25867                      tree function)
25868 {
25869   static int thunk_label = 0;
25870   char label[256];
25871   char labelpc[256];
25872   int mi_delta = delta;
25873   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25874   int shift = 0;
25875   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25876                     ? 1 : 0);
25877   if (mi_delta < 0)
25878     mi_delta = - mi_delta;
25879
25880   final_start_function (emit_barrier (), file, 1);
25881
25882   if (TARGET_THUMB1)
25883     {
25884       int labelno = thunk_label++;
25885       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25886       /* Thunks are entered in arm mode when avaiable.  */
25887       if (TARGET_THUMB1_ONLY)
25888         {
25889           /* push r3 so we can use it as a temporary.  */
25890           /* TODO: Omit this save if r3 is not used.  */
25891           fputs ("\tpush {r3}\n", file);
25892           fputs ("\tldr\tr3, ", file);
25893         }
25894       else
25895         {
25896           fputs ("\tldr\tr12, ", file);
25897         }
25898       assemble_name (file, label);
25899       fputc ('\n', file);
25900       if (flag_pic)
25901         {
25902           /* If we are generating PIC, the ldr instruction below loads
25903              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
25904              the address of the add + 8, so we have:
25905
25906              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25907                  = target + 1.
25908
25909              Note that we have "+ 1" because some versions of GNU ld
25910              don't set the low bit of the result for R_ARM_REL32
25911              relocations against thumb function symbols.
25912              On ARMv6M this is +4, not +8.  */
25913           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25914           assemble_name (file, labelpc);
25915           fputs (":\n", file);
25916           if (TARGET_THUMB1_ONLY)
25917             {
25918               /* This is 2 insns after the start of the thunk, so we know it
25919                  is 4-byte aligned.  */
25920               fputs ("\tadd\tr3, pc, r3\n", file);
25921               fputs ("\tmov r12, r3\n", file);
25922             }
25923           else
25924             fputs ("\tadd\tr12, pc, r12\n", file);
25925         }
25926       else if (TARGET_THUMB1_ONLY)
25927         fputs ("\tmov r12, r3\n", file);
25928     }
25929   if (TARGET_THUMB1_ONLY)
25930     {
25931       if (mi_delta > 255)
25932         {
25933           fputs ("\tldr\tr3, ", file);
25934           assemble_name (file, label);
25935           fputs ("+4\n", file);
25936           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25937                        mi_op, this_regno, this_regno);
25938         }
25939       else if (mi_delta != 0)
25940         {
25941           /* Thumb1 unified syntax requires s suffix in instruction name when
25942              one of the operands is immediate.  */
25943           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25944                        mi_op, this_regno, this_regno,
25945                        mi_delta);
25946         }
25947     }
25948   else
25949     {
25950       /* TODO: Use movw/movt for large constants when available.  */
25951       while (mi_delta != 0)
25952         {
25953           if ((mi_delta & (3 << shift)) == 0)
25954             shift += 2;
25955           else
25956             {
25957               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25958                            mi_op, this_regno, this_regno,
25959                            mi_delta & (0xff << shift));
25960               mi_delta &= ~(0xff << shift);
25961               shift += 8;
25962             }
25963         }
25964     }
25965   if (TARGET_THUMB1)
25966     {
25967       if (TARGET_THUMB1_ONLY)
25968         fputs ("\tpop\t{r3}\n", file);
25969
25970       fprintf (file, "\tbx\tr12\n");
25971       ASM_OUTPUT_ALIGN (file, 2);
25972       assemble_name (file, label);
25973       fputs (":\n", file);
25974       if (flag_pic)
25975         {
25976           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
25977           rtx tem = XEXP (DECL_RTL (function), 0);
25978           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25979              pipeline offset is four rather than eight.  Adjust the offset
25980              accordingly.  */
25981           tem = plus_constant (GET_MODE (tem), tem,
25982                                TARGET_THUMB1_ONLY ? -3 : -7);
25983           tem = gen_rtx_MINUS (GET_MODE (tem),
25984                                tem,
25985                                gen_rtx_SYMBOL_REF (Pmode,
25986                                                    ggc_strdup (labelpc)));
25987           assemble_integer (tem, 4, BITS_PER_WORD, 1);
25988         }
25989       else
25990         /* Output ".word .LTHUNKn".  */
25991         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25992
25993       if (TARGET_THUMB1_ONLY && mi_delta > 255)
25994         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25995     }
25996   else
25997     {
25998       fputs ("\tb\t", file);
25999       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26000       if (NEED_PLT_RELOC)
26001         fputs ("(PLT)", file);
26002       fputc ('\n', file);
26003     }
26004
26005   final_end_function ();
26006 }
26007
26008 int
26009 arm_emit_vector_const (FILE *file, rtx x)
26010 {
26011   int i;
26012   const char * pattern;
26013
26014   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26015
26016   switch (GET_MODE (x))
26017     {
26018     case V2SImode: pattern = "%08x"; break;
26019     case V4HImode: pattern = "%04x"; break;
26020     case V8QImode: pattern = "%02x"; break;
26021     default:       gcc_unreachable ();
26022     }
26023
26024   fprintf (file, "0x");
26025   for (i = CONST_VECTOR_NUNITS (x); i--;)
26026     {
26027       rtx element;
26028
26029       element = CONST_VECTOR_ELT (x, i);
26030       fprintf (file, pattern, INTVAL (element));
26031     }
26032
26033   return 1;
26034 }
26035
26036 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26037    HFmode constant pool entries are actually loaded with ldr.  */
26038 void
26039 arm_emit_fp16_const (rtx c)
26040 {
26041   REAL_VALUE_TYPE r;
26042   long bits;
26043
26044   REAL_VALUE_FROM_CONST_DOUBLE (r, c);
26045   bits = real_to_target (NULL, &r, HFmode);
26046   if (WORDS_BIG_ENDIAN)
26047     assemble_zeros (2);
26048   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26049   if (!WORDS_BIG_ENDIAN)
26050     assemble_zeros (2);
26051 }
26052
26053 const char *
26054 arm_output_load_gr (rtx *operands)
26055 {
26056   rtx reg;
26057   rtx offset;
26058   rtx wcgr;
26059   rtx sum;
26060
26061   if (!MEM_P (operands [1])
26062       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26063       || !REG_P (reg = XEXP (sum, 0))
26064       || !CONST_INT_P (offset = XEXP (sum, 1))
26065       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26066     return "wldrw%?\t%0, %1";
26067
26068   /* Fix up an out-of-range load of a GR register.  */
26069   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26070   wcgr = operands[0];
26071   operands[0] = reg;
26072   output_asm_insn ("ldr%?\t%0, %1", operands);
26073
26074   operands[0] = wcgr;
26075   operands[1] = reg;
26076   output_asm_insn ("tmcr%?\t%0, %1", operands);
26077   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26078
26079   return "";
26080 }
26081
26082 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26083
26084    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26085    named arg and all anonymous args onto the stack.
26086    XXX I know the prologue shouldn't be pushing registers, but it is faster
26087    that way.  */
26088
26089 static void
26090 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26091                             machine_mode mode,
26092                             tree type,
26093                             int *pretend_size,
26094                             int second_time ATTRIBUTE_UNUSED)
26095 {
26096   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26097   int nregs;
26098
26099   cfun->machine->uses_anonymous_args = 1;
26100   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26101     {
26102       nregs = pcum->aapcs_ncrn;
26103       if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26104         nregs++;
26105     }
26106   else
26107     nregs = pcum->nregs;
26108
26109   if (nregs < NUM_ARG_REGS)
26110     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26111 }
26112
26113 /* We can't rely on the caller doing the proper promotion when
26114    using APCS or ATPCS.  */
26115
26116 static bool
26117 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26118 {
26119     return !TARGET_AAPCS_BASED;
26120 }
26121
26122 static machine_mode
26123 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26124                            machine_mode mode,
26125                            int *punsignedp ATTRIBUTE_UNUSED,
26126                            const_tree fntype ATTRIBUTE_UNUSED,
26127                            int for_return ATTRIBUTE_UNUSED)
26128 {
26129   if (GET_MODE_CLASS (mode) == MODE_INT
26130       && GET_MODE_SIZE (mode) < 4)
26131     return SImode;
26132
26133   return mode;
26134 }
26135
26136 /* AAPCS based ABIs use short enums by default.  */
26137
26138 static bool
26139 arm_default_short_enums (void)
26140 {
26141   return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26142 }
26143
26144
26145 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26146
26147 static bool
26148 arm_align_anon_bitfield (void)
26149 {
26150   return TARGET_AAPCS_BASED;
26151 }
26152
26153
26154 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26155
26156 static tree
26157 arm_cxx_guard_type (void)
26158 {
26159   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26160 }
26161
26162
26163 /* The EABI says test the least significant bit of a guard variable.  */
26164
26165 static bool
26166 arm_cxx_guard_mask_bit (void)
26167 {
26168   return TARGET_AAPCS_BASED;
26169 }
26170
26171
26172 /* The EABI specifies that all array cookies are 8 bytes long.  */
26173
26174 static tree
26175 arm_get_cookie_size (tree type)
26176 {
26177   tree size;
26178
26179   if (!TARGET_AAPCS_BASED)
26180     return default_cxx_get_cookie_size (type);
26181
26182   size = build_int_cst (sizetype, 8);
26183   return size;
26184 }
26185
26186
26187 /* The EABI says that array cookies should also contain the element size.  */
26188
26189 static bool
26190 arm_cookie_has_size (void)
26191 {
26192   return TARGET_AAPCS_BASED;
26193 }
26194
26195
26196 /* The EABI says constructors and destructors should return a pointer to
26197    the object constructed/destroyed.  */
26198
26199 static bool
26200 arm_cxx_cdtor_returns_this (void)
26201 {
26202   return TARGET_AAPCS_BASED;
26203 }
26204
26205 /* The EABI says that an inline function may never be the key
26206    method.  */
26207
26208 static bool
26209 arm_cxx_key_method_may_be_inline (void)
26210 {
26211   return !TARGET_AAPCS_BASED;
26212 }
26213
26214 static void
26215 arm_cxx_determine_class_data_visibility (tree decl)
26216 {
26217   if (!TARGET_AAPCS_BASED
26218       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26219     return;
26220
26221   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26222      is exported.  However, on systems without dynamic vague linkage,
26223      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26224   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26225     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26226   else
26227     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26228   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26229 }
26230
26231 static bool
26232 arm_cxx_class_data_always_comdat (void)
26233 {
26234   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26235      vague linkage if the class has no key function.  */
26236   return !TARGET_AAPCS_BASED;
26237 }
26238
26239
26240 /* The EABI says __aeabi_atexit should be used to register static
26241    destructors.  */
26242
26243 static bool
26244 arm_cxx_use_aeabi_atexit (void)
26245 {
26246   return TARGET_AAPCS_BASED;
26247 }
26248
26249
26250 void
26251 arm_set_return_address (rtx source, rtx scratch)
26252 {
26253   arm_stack_offsets *offsets;
26254   HOST_WIDE_INT delta;
26255   rtx addr;
26256   unsigned long saved_regs;
26257
26258   offsets = arm_get_frame_offsets ();
26259   saved_regs = offsets->saved_regs_mask;
26260
26261   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26262     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26263   else
26264     {
26265       if (frame_pointer_needed)
26266         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26267       else
26268         {
26269           /* LR will be the first saved register.  */
26270           delta = offsets->outgoing_args - (offsets->frame + 4);
26271
26272
26273           if (delta >= 4096)
26274             {
26275               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26276                                      GEN_INT (delta & ~4095)));
26277               addr = scratch;
26278               delta &= 4095;
26279             }
26280           else
26281             addr = stack_pointer_rtx;
26282
26283           addr = plus_constant (Pmode, addr, delta);
26284         }
26285       /* The store needs to be marked as frame related in order to prevent
26286          DSE from deleting it as dead if it is based on fp.  */
26287       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26288       RTX_FRAME_RELATED_P (insn) = 1;
26289       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26290     }
26291 }
26292
26293
26294 void
26295 thumb_set_return_address (rtx source, rtx scratch)
26296 {
26297   arm_stack_offsets *offsets;
26298   HOST_WIDE_INT delta;
26299   HOST_WIDE_INT limit;
26300   int reg;
26301   rtx addr;
26302   unsigned long mask;
26303
26304   emit_use (source);
26305
26306   offsets = arm_get_frame_offsets ();
26307   mask = offsets->saved_regs_mask;
26308   if (mask & (1 << LR_REGNUM))
26309     {
26310       limit = 1024;
26311       /* Find the saved regs.  */
26312       if (frame_pointer_needed)
26313         {
26314           delta = offsets->soft_frame - offsets->saved_args;
26315           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26316           if (TARGET_THUMB1)
26317             limit = 128;
26318         }
26319       else
26320         {
26321           delta = offsets->outgoing_args - offsets->saved_args;
26322           reg = SP_REGNUM;
26323         }
26324       /* Allow for the stack frame.  */
26325       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26326         delta -= 16;
26327       /* The link register is always the first saved register.  */
26328       delta -= 4;
26329
26330       /* Construct the address.  */
26331       addr = gen_rtx_REG (SImode, reg);
26332       if (delta > limit)
26333         {
26334           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26335           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26336           addr = scratch;
26337         }
26338       else
26339         addr = plus_constant (Pmode, addr, delta);
26340
26341       /* The store needs to be marked as frame related in order to prevent
26342          DSE from deleting it as dead if it is based on fp.  */
26343       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26344       RTX_FRAME_RELATED_P (insn) = 1;
26345       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26346     }
26347   else
26348     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26349 }
26350
26351 /* Implements target hook vector_mode_supported_p.  */
26352 bool
26353 arm_vector_mode_supported_p (machine_mode mode)
26354 {
26355   /* Neon also supports V2SImode, etc. listed in the clause below.  */
26356   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26357       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26358     return true;
26359
26360   if ((TARGET_NEON || TARGET_IWMMXT)
26361       && ((mode == V2SImode)
26362           || (mode == V4HImode)
26363           || (mode == V8QImode)))
26364     return true;
26365
26366   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26367       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26368       || mode == V2HAmode))
26369     return true;
26370
26371   return false;
26372 }
26373
26374 /* Implements target hook array_mode_supported_p.  */
26375
26376 static bool
26377 arm_array_mode_supported_p (machine_mode mode,
26378                             unsigned HOST_WIDE_INT nelems)
26379 {
26380   if (TARGET_NEON
26381       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26382       && (nelems >= 2 && nelems <= 4))
26383     return true;
26384
26385   return false;
26386 }
26387
26388 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26389    registers when autovectorizing for Neon, at least until multiple vector
26390    widths are supported properly by the middle-end.  */
26391
26392 static machine_mode
26393 arm_preferred_simd_mode (machine_mode mode)
26394 {
26395   if (TARGET_NEON)
26396     switch (mode)
26397       {
26398       case SFmode:
26399         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26400       case SImode:
26401         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26402       case HImode:
26403         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26404       case QImode:
26405         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26406       case DImode:
26407         if (!TARGET_NEON_VECTORIZE_DOUBLE)
26408           return V2DImode;
26409         break;
26410
26411       default:;
26412       }
26413
26414   if (TARGET_REALLY_IWMMXT)
26415     switch (mode)
26416       {
26417       case SImode:
26418         return V2SImode;
26419       case HImode:
26420         return V4HImode;
26421       case QImode:
26422         return V8QImode;
26423
26424       default:;
26425       }
26426
26427   return word_mode;
26428 }
26429
26430 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26431
26432    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
26433    using r0-r4 for function arguments, r7 for the stack frame and don't have
26434    enough left over to do doubleword arithmetic.  For Thumb-2 all the
26435    potentially problematic instructions accept high registers so this is not
26436    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
26437    that require many low registers.  */
26438 static bool
26439 arm_class_likely_spilled_p (reg_class_t rclass)
26440 {
26441   if ((TARGET_THUMB1 && rclass == LO_REGS)
26442       || rclass  == CC_REG)
26443     return true;
26444
26445   return false;
26446 }
26447
26448 /* Implements target hook small_register_classes_for_mode_p.  */
26449 bool
26450 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26451 {
26452   return TARGET_THUMB1;
26453 }
26454
26455 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
26456    ARM insns and therefore guarantee that the shift count is modulo 256.
26457    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26458    guarantee no particular behavior for out-of-range counts.  */
26459
26460 static unsigned HOST_WIDE_INT
26461 arm_shift_truncation_mask (machine_mode mode)
26462 {
26463   return mode == SImode ? 255 : 0;
26464 }
26465
26466
26467 /* Map internal gcc register numbers to DWARF2 register numbers.  */
26468
26469 unsigned int
26470 arm_dbx_register_number (unsigned int regno)
26471 {
26472   if (regno < 16)
26473     return regno;
26474
26475   if (IS_VFP_REGNUM (regno))
26476     {
26477       /* See comment in arm_dwarf_register_span.  */
26478       if (VFP_REGNO_OK_FOR_SINGLE (regno))
26479         return 64 + regno - FIRST_VFP_REGNUM;
26480       else
26481         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26482     }
26483
26484   if (IS_IWMMXT_GR_REGNUM (regno))
26485     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26486
26487   if (IS_IWMMXT_REGNUM (regno))
26488     return 112 + regno - FIRST_IWMMXT_REGNUM;
26489
26490   gcc_unreachable ();
26491 }
26492
26493 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26494    GCC models tham as 64 32-bit registers, so we need to describe this to
26495    the DWARF generation code.  Other registers can use the default.  */
26496 static rtx
26497 arm_dwarf_register_span (rtx rtl)
26498 {
26499   machine_mode mode;
26500   unsigned regno;
26501   rtx parts[16];
26502   int nregs;
26503   int i;
26504
26505   regno = REGNO (rtl);
26506   if (!IS_VFP_REGNUM (regno))
26507     return NULL_RTX;
26508
26509   /* XXX FIXME: The EABI defines two VFP register ranges:
26510         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26511         256-287: D0-D31
26512      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26513      corresponding D register.  Until GDB supports this, we shall use the
26514      legacy encodings.  We also use these encodings for D0-D15 for
26515      compatibility with older debuggers.  */
26516   mode = GET_MODE (rtl);
26517   if (GET_MODE_SIZE (mode) < 8)
26518     return NULL_RTX;
26519
26520   if (VFP_REGNO_OK_FOR_SINGLE (regno))
26521     {
26522       nregs = GET_MODE_SIZE (mode) / 4;
26523       for (i = 0; i < nregs; i += 2)
26524         if (TARGET_BIG_END)
26525           {
26526             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26527             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26528           }
26529         else
26530           {
26531             parts[i] = gen_rtx_REG (SImode, regno + i);
26532             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26533           }
26534     }
26535   else
26536     {
26537       nregs = GET_MODE_SIZE (mode) / 8;
26538       for (i = 0; i < nregs; i++)
26539         parts[i] = gen_rtx_REG (DImode, regno + i);
26540     }
26541
26542   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26543 }
26544
26545 #if ARM_UNWIND_INFO
26546 /* Emit unwind directives for a store-multiple instruction or stack pointer
26547    push during alignment.
26548    These should only ever be generated by the function prologue code, so
26549    expect them to have a particular form.
26550    The store-multiple instruction sometimes pushes pc as the last register,
26551    although it should not be tracked into unwind information, or for -Os
26552    sometimes pushes some dummy registers before first register that needs
26553    to be tracked in unwind information; such dummy registers are there just
26554    to avoid separate stack adjustment, and will not be restored in the
26555    epilogue.  */
26556
26557 static void
26558 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26559 {
26560   int i;
26561   HOST_WIDE_INT offset;
26562   HOST_WIDE_INT nregs;
26563   int reg_size;
26564   unsigned reg;
26565   unsigned lastreg;
26566   unsigned padfirst = 0, padlast = 0;
26567   rtx e;
26568
26569   e = XVECEXP (p, 0, 0);
26570   gcc_assert (GET_CODE (e) == SET);
26571
26572   /* First insn will adjust the stack pointer.  */
26573   gcc_assert (GET_CODE (e) == SET
26574               && REG_P (SET_DEST (e))
26575               && REGNO (SET_DEST (e)) == SP_REGNUM
26576               && GET_CODE (SET_SRC (e)) == PLUS);
26577
26578   offset = -INTVAL (XEXP (SET_SRC (e), 1));
26579   nregs = XVECLEN (p, 0) - 1;
26580   gcc_assert (nregs);
26581
26582   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26583   if (reg < 16)
26584     {
26585       /* For -Os dummy registers can be pushed at the beginning to
26586          avoid separate stack pointer adjustment.  */
26587       e = XVECEXP (p, 0, 1);
26588       e = XEXP (SET_DEST (e), 0);
26589       if (GET_CODE (e) == PLUS)
26590         padfirst = INTVAL (XEXP (e, 1));
26591       gcc_assert (padfirst == 0 || optimize_size);
26592       /* The function prologue may also push pc, but not annotate it as it is
26593          never restored.  We turn this into a stack pointer adjustment.  */
26594       e = XVECEXP (p, 0, nregs);
26595       e = XEXP (SET_DEST (e), 0);
26596       if (GET_CODE (e) == PLUS)
26597         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26598       else
26599         padlast = offset - 4;
26600       gcc_assert (padlast == 0 || padlast == 4);
26601       if (padlast == 4)
26602         fprintf (asm_out_file, "\t.pad #4\n");
26603       reg_size = 4;
26604       fprintf (asm_out_file, "\t.save {");
26605     }
26606   else if (IS_VFP_REGNUM (reg))
26607     {
26608       reg_size = 8;
26609       fprintf (asm_out_file, "\t.vsave {");
26610     }
26611   else
26612     /* Unknown register type.  */
26613     gcc_unreachable ();
26614
26615   /* If the stack increment doesn't match the size of the saved registers,
26616      something has gone horribly wrong.  */
26617   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26618
26619   offset = padfirst;
26620   lastreg = 0;
26621   /* The remaining insns will describe the stores.  */
26622   for (i = 1; i <= nregs; i++)
26623     {
26624       /* Expect (set (mem <addr>) (reg)).
26625          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
26626       e = XVECEXP (p, 0, i);
26627       gcc_assert (GET_CODE (e) == SET
26628                   && MEM_P (SET_DEST (e))
26629                   && REG_P (SET_SRC (e)));
26630
26631       reg = REGNO (SET_SRC (e));
26632       gcc_assert (reg >= lastreg);
26633
26634       if (i != 1)
26635         fprintf (asm_out_file, ", ");
26636       /* We can't use %r for vfp because we need to use the
26637          double precision register names.  */
26638       if (IS_VFP_REGNUM (reg))
26639         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26640       else
26641         asm_fprintf (asm_out_file, "%r", reg);
26642
26643 #ifdef ENABLE_CHECKING
26644       /* Check that the addresses are consecutive.  */
26645       e = XEXP (SET_DEST (e), 0);
26646       if (GET_CODE (e) == PLUS)
26647         gcc_assert (REG_P (XEXP (e, 0))
26648                     && REGNO (XEXP (e, 0)) == SP_REGNUM
26649                     && CONST_INT_P (XEXP (e, 1))
26650                     && offset == INTVAL (XEXP (e, 1)));
26651       else
26652         gcc_assert (i == 1
26653                     && REG_P (e)
26654                     && REGNO (e) == SP_REGNUM);
26655       offset += reg_size;
26656 #endif
26657     }
26658   fprintf (asm_out_file, "}\n");
26659   if (padfirst)
26660     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26661 }
26662
26663 /*  Emit unwind directives for a SET.  */
26664
26665 static void
26666 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26667 {
26668   rtx e0;
26669   rtx e1;
26670   unsigned reg;
26671
26672   e0 = XEXP (p, 0);
26673   e1 = XEXP (p, 1);
26674   switch (GET_CODE (e0))
26675     {
26676     case MEM:
26677       /* Pushing a single register.  */
26678       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26679           || !REG_P (XEXP (XEXP (e0, 0), 0))
26680           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26681         abort ();
26682
26683       asm_fprintf (asm_out_file, "\t.save ");
26684       if (IS_VFP_REGNUM (REGNO (e1)))
26685         asm_fprintf(asm_out_file, "{d%d}\n",
26686                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26687       else
26688         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26689       break;
26690
26691     case REG:
26692       if (REGNO (e0) == SP_REGNUM)
26693         {
26694           /* A stack increment.  */
26695           if (GET_CODE (e1) != PLUS
26696               || !REG_P (XEXP (e1, 0))
26697               || REGNO (XEXP (e1, 0)) != SP_REGNUM
26698               || !CONST_INT_P (XEXP (e1, 1)))
26699             abort ();
26700
26701           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26702                        -INTVAL (XEXP (e1, 1)));
26703         }
26704       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26705         {
26706           HOST_WIDE_INT offset;
26707
26708           if (GET_CODE (e1) == PLUS)
26709             {
26710               if (!REG_P (XEXP (e1, 0))
26711                   || !CONST_INT_P (XEXP (e1, 1)))
26712                 abort ();
26713               reg = REGNO (XEXP (e1, 0));
26714               offset = INTVAL (XEXP (e1, 1));
26715               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26716                            HARD_FRAME_POINTER_REGNUM, reg,
26717                            offset);
26718             }
26719           else if (REG_P (e1))
26720             {
26721               reg = REGNO (e1);
26722               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26723                            HARD_FRAME_POINTER_REGNUM, reg);
26724             }
26725           else
26726             abort ();
26727         }
26728       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26729         {
26730           /* Move from sp to reg.  */
26731           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26732         }
26733      else if (GET_CODE (e1) == PLUS
26734               && REG_P (XEXP (e1, 0))
26735               && REGNO (XEXP (e1, 0)) == SP_REGNUM
26736               && CONST_INT_P (XEXP (e1, 1)))
26737         {
26738           /* Set reg to offset from sp.  */
26739           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26740                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26741         }
26742       else
26743         abort ();
26744       break;
26745
26746     default:
26747       abort ();
26748     }
26749 }
26750
26751
26752 /* Emit unwind directives for the given insn.  */
26753
26754 static void
26755 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26756 {
26757   rtx note, pat;
26758   bool handled_one = false;
26759
26760   if (arm_except_unwind_info (&global_options) != UI_TARGET)
26761     return;
26762
26763   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26764       && (TREE_NOTHROW (current_function_decl)
26765           || crtl->all_throwers_are_sibcalls))
26766     return;
26767
26768   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26769     return;
26770
26771   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26772     {
26773       switch (REG_NOTE_KIND (note))
26774         {
26775         case REG_FRAME_RELATED_EXPR:
26776           pat = XEXP (note, 0);
26777           goto found;
26778
26779         case REG_CFA_REGISTER:
26780           pat = XEXP (note, 0);
26781           if (pat == NULL)
26782             {
26783               pat = PATTERN (insn);
26784               if (GET_CODE (pat) == PARALLEL)
26785                 pat = XVECEXP (pat, 0, 0);
26786             }
26787
26788           /* Only emitted for IS_STACKALIGN re-alignment.  */
26789           {
26790             rtx dest, src;
26791             unsigned reg;
26792
26793             src = SET_SRC (pat);
26794             dest = SET_DEST (pat);
26795
26796             gcc_assert (src == stack_pointer_rtx);
26797             reg = REGNO (dest);
26798             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26799                          reg + 0x90, reg);
26800           }
26801           handled_one = true;
26802           break;
26803
26804         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
26805            to get correct dwarf information for shrink-wrap.  We should not
26806            emit unwind information for it because these are used either for
26807            pretend arguments or notes to adjust sp and restore registers from
26808            stack.  */
26809         case REG_CFA_DEF_CFA:
26810         case REG_CFA_ADJUST_CFA:
26811         case REG_CFA_RESTORE:
26812           return;
26813
26814         case REG_CFA_EXPRESSION:
26815         case REG_CFA_OFFSET:
26816           /* ??? Only handling here what we actually emit.  */
26817           gcc_unreachable ();
26818
26819         default:
26820           break;
26821         }
26822     }
26823   if (handled_one)
26824     return;
26825   pat = PATTERN (insn);
26826  found:
26827
26828   switch (GET_CODE (pat))
26829     {
26830     case SET:
26831       arm_unwind_emit_set (asm_out_file, pat);
26832       break;
26833
26834     case SEQUENCE:
26835       /* Store multiple.  */
26836       arm_unwind_emit_sequence (asm_out_file, pat);
26837       break;
26838
26839     default:
26840       abort();
26841     }
26842 }
26843
26844
26845 /* Output a reference from a function exception table to the type_info
26846    object X.  The EABI specifies that the symbol should be relocated by
26847    an R_ARM_TARGET2 relocation.  */
26848
26849 static bool
26850 arm_output_ttype (rtx x)
26851 {
26852   fputs ("\t.word\t", asm_out_file);
26853   output_addr_const (asm_out_file, x);
26854   /* Use special relocations for symbol references.  */
26855   if (!CONST_INT_P (x))
26856     fputs ("(TARGET2)", asm_out_file);
26857   fputc ('\n', asm_out_file);
26858
26859   return TRUE;
26860 }
26861
26862 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
26863
26864 static void
26865 arm_asm_emit_except_personality (rtx personality)
26866 {
26867   fputs ("\t.personality\t", asm_out_file);
26868   output_addr_const (asm_out_file, personality);
26869   fputc ('\n', asm_out_file);
26870 }
26871
26872 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
26873
26874 static void
26875 arm_asm_init_sections (void)
26876 {
26877   exception_section = get_unnamed_section (0, output_section_asm_op,
26878                                            "\t.handlerdata");
26879 }
26880 #endif /* ARM_UNWIND_INFO */
26881
26882 /* Output unwind directives for the start/end of a function.  */
26883
26884 void
26885 arm_output_fn_unwind (FILE * f, bool prologue)
26886 {
26887   if (arm_except_unwind_info (&global_options) != UI_TARGET)
26888     return;
26889
26890   if (prologue)
26891     fputs ("\t.fnstart\n", f);
26892   else
26893     {
26894       /* If this function will never be unwound, then mark it as such.
26895          The came condition is used in arm_unwind_emit to suppress
26896          the frame annotations.  */
26897       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26898           && (TREE_NOTHROW (current_function_decl)
26899               || crtl->all_throwers_are_sibcalls))
26900         fputs("\t.cantunwind\n", f);
26901
26902       fputs ("\t.fnend\n", f);
26903     }
26904 }
26905
26906 static bool
26907 arm_emit_tls_decoration (FILE *fp, rtx x)
26908 {
26909   enum tls_reloc reloc;
26910   rtx val;
26911
26912   val = XVECEXP (x, 0, 0);
26913   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26914
26915   output_addr_const (fp, val);
26916
26917   switch (reloc)
26918     {
26919     case TLS_GD32:
26920       fputs ("(tlsgd)", fp);
26921       break;
26922     case TLS_LDM32:
26923       fputs ("(tlsldm)", fp);
26924       break;
26925     case TLS_LDO32:
26926       fputs ("(tlsldo)", fp);
26927       break;
26928     case TLS_IE32:
26929       fputs ("(gottpoff)", fp);
26930       break;
26931     case TLS_LE32:
26932       fputs ("(tpoff)", fp);
26933       break;
26934     case TLS_DESCSEQ:
26935       fputs ("(tlsdesc)", fp);
26936       break;
26937     default:
26938       gcc_unreachable ();
26939     }
26940
26941   switch (reloc)
26942     {
26943     case TLS_GD32:
26944     case TLS_LDM32:
26945     case TLS_IE32:
26946     case TLS_DESCSEQ:
26947       fputs (" + (. - ", fp);
26948       output_addr_const (fp, XVECEXP (x, 0, 2));
26949       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26950       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26951       output_addr_const (fp, XVECEXP (x, 0, 3));
26952       fputc (')', fp);
26953       break;
26954     default:
26955       break;
26956     }
26957
26958   return TRUE;
26959 }
26960
26961 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
26962
26963 static void
26964 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26965 {
26966   gcc_assert (size == 4);
26967   fputs ("\t.word\t", file);
26968   output_addr_const (file, x);
26969   fputs ("(tlsldo)", file);
26970 }
26971
26972 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
26973
26974 static bool
26975 arm_output_addr_const_extra (FILE *fp, rtx x)
26976 {
26977   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26978     return arm_emit_tls_decoration (fp, x);
26979   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26980     {
26981       char label[256];
26982       int labelno = INTVAL (XVECEXP (x, 0, 0));
26983
26984       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26985       assemble_name_raw (fp, label);
26986
26987       return TRUE;
26988     }
26989   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26990     {
26991       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26992       if (GOT_PCREL)
26993         fputs ("+.", fp);
26994       fputs ("-(", fp);
26995       output_addr_const (fp, XVECEXP (x, 0, 0));
26996       fputc (')', fp);
26997       return TRUE;
26998     }
26999   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27000     {
27001       output_addr_const (fp, XVECEXP (x, 0, 0));
27002       if (GOT_PCREL)
27003         fputs ("+.", fp);
27004       fputs ("-(", fp);
27005       output_addr_const (fp, XVECEXP (x, 0, 1));
27006       fputc (')', fp);
27007       return TRUE;
27008     }
27009   else if (GET_CODE (x) == CONST_VECTOR)
27010     return arm_emit_vector_const (fp, x);
27011
27012   return FALSE;
27013 }
27014
27015 /* Output assembly for a shift instruction.
27016    SET_FLAGS determines how the instruction modifies the condition codes.
27017    0 - Do not set condition codes.
27018    1 - Set condition codes.
27019    2 - Use smallest instruction.  */
27020 const char *
27021 arm_output_shift(rtx * operands, int set_flags)
27022 {
27023   char pattern[100];
27024   static const char flag_chars[3] = {'?', '.', '!'};
27025   const char *shift;
27026   HOST_WIDE_INT val;
27027   char c;
27028
27029   c = flag_chars[set_flags];
27030   if (TARGET_UNIFIED_ASM)
27031     {
27032       shift = shift_op(operands[3], &val);
27033       if (shift)
27034         {
27035           if (val != -1)
27036             operands[2] = GEN_INT(val);
27037           sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27038         }
27039       else
27040         sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27041     }
27042   else
27043     sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
27044   output_asm_insn (pattern, operands);
27045   return "";
27046 }
27047
27048 /* Output assembly for a WMMX immediate shift instruction.  */
27049 const char *
27050 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27051 {
27052   int shift = INTVAL (operands[2]);
27053   char templ[50];
27054   machine_mode opmode = GET_MODE (operands[0]);
27055
27056   gcc_assert (shift >= 0);
27057
27058   /* If the shift value in the register versions is > 63 (for D qualifier),
27059      31 (for W qualifier) or 15 (for H qualifier).  */
27060   if (((opmode == V4HImode) && (shift > 15))
27061         || ((opmode == V2SImode) && (shift > 31))
27062         || ((opmode == DImode) && (shift > 63)))
27063   {
27064     if (wror_or_wsra)
27065       {
27066         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27067         output_asm_insn (templ, operands);
27068         if (opmode == DImode)
27069           {
27070             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27071             output_asm_insn (templ, operands);
27072           }
27073       }
27074     else
27075       {
27076         /* The destination register will contain all zeros.  */
27077         sprintf (templ, "wzero\t%%0");
27078         output_asm_insn (templ, operands);
27079       }
27080     return "";
27081   }
27082
27083   if ((opmode == DImode) && (shift > 32))
27084     {
27085       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27086       output_asm_insn (templ, operands);
27087       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27088       output_asm_insn (templ, operands);
27089     }
27090   else
27091     {
27092       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27093       output_asm_insn (templ, operands);
27094     }
27095   return "";
27096 }
27097
27098 /* Output assembly for a WMMX tinsr instruction.  */
27099 const char *
27100 arm_output_iwmmxt_tinsr (rtx *operands)
27101 {
27102   int mask = INTVAL (operands[3]);
27103   int i;
27104   char templ[50];
27105   int units = mode_nunits[GET_MODE (operands[0])];
27106   gcc_assert ((mask & (mask - 1)) == 0);
27107   for (i = 0; i < units; ++i)
27108     {
27109       if ((mask & 0x01) == 1)
27110         {
27111           break;
27112         }
27113       mask >>= 1;
27114     }
27115   gcc_assert (i < units);
27116   {
27117     switch (GET_MODE (operands[0]))
27118       {
27119       case V8QImode:
27120         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27121         break;
27122       case V4HImode:
27123         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27124         break;
27125       case V2SImode:
27126         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27127         break;
27128       default:
27129         gcc_unreachable ();
27130         break;
27131       }
27132     output_asm_insn (templ, operands);
27133   }
27134   return "";
27135 }
27136
27137 /* Output a Thumb-1 casesi dispatch sequence.  */
27138 const char *
27139 thumb1_output_casesi (rtx *operands)
27140 {
27141   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27142
27143   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27144
27145   switch (GET_MODE(diff_vec))
27146     {
27147     case QImode:
27148       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27149               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27150     case HImode:
27151       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27152               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27153     case SImode:
27154       return "bl\t%___gnu_thumb1_case_si";
27155     default:
27156       gcc_unreachable ();
27157     }
27158 }
27159
27160 /* Output a Thumb-2 casesi instruction.  */
27161 const char *
27162 thumb2_output_casesi (rtx *operands)
27163 {
27164   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27165
27166   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27167
27168   output_asm_insn ("cmp\t%0, %1", operands);
27169   output_asm_insn ("bhi\t%l3", operands);
27170   switch (GET_MODE(diff_vec))
27171     {
27172     case QImode:
27173       return "tbb\t[%|pc, %0]";
27174     case HImode:
27175       return "tbh\t[%|pc, %0, lsl #1]";
27176     case SImode:
27177       if (flag_pic)
27178         {
27179           output_asm_insn ("adr\t%4, %l2", operands);
27180           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27181           output_asm_insn ("add\t%4, %4, %5", operands);
27182           return "bx\t%4";
27183         }
27184       else
27185         {
27186           output_asm_insn ("adr\t%4, %l2", operands);
27187           return "ldr\t%|pc, [%4, %0, lsl #2]";
27188         }
27189     default:
27190       gcc_unreachable ();
27191     }
27192 }
27193
27194 /* Most ARM cores are single issue, but some newer ones can dual issue.
27195    The scheduler descriptions rely on this being correct.  */
27196 static int
27197 arm_issue_rate (void)
27198 {
27199   switch (arm_tune)
27200     {
27201     case xgene1:
27202       return 4;
27203
27204     case cortexa15:
27205     case cortexa57:
27206     case exynosm1:
27207       return 3;
27208
27209     case cortexm7:
27210     case cortexr4:
27211     case cortexr4f:
27212     case cortexr5:
27213     case genericv7a:
27214     case cortexa5:
27215     case cortexa7:
27216     case cortexa8:
27217     case cortexa9:
27218     case cortexa12:
27219     case cortexa17:
27220     case cortexa53:
27221     case fa726te:
27222     case marvell_pj4:
27223       return 2;
27224
27225     default:
27226       return 1;
27227     }
27228 }
27229
27230 /* Return how many instructions should scheduler lookahead to choose the
27231    best one.  */
27232 static int
27233 arm_first_cycle_multipass_dfa_lookahead (void)
27234 {
27235   int issue_rate = arm_issue_rate ();
27236
27237   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27238 }
27239
27240 /* Enable modeling of L2 auto-prefetcher.  */
27241 static int
27242 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27243 {
27244   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27245 }
27246
27247 const char *
27248 arm_mangle_type (const_tree type)
27249 {
27250   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27251      has to be managled as if it is in the "std" namespace.  */
27252   if (TARGET_AAPCS_BASED
27253       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27254     return "St9__va_list";
27255
27256   /* Half-precision float.  */
27257   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27258     return "Dh";
27259
27260   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27261      builtin type.  */
27262   if (TYPE_NAME (type) != NULL)
27263     return arm_mangle_builtin_type (type);
27264
27265   /* Use the default mangling.  */
27266   return NULL;
27267 }
27268
27269 /* Order of allocation of core registers for Thumb: this allocation is
27270    written over the corresponding initial entries of the array
27271    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27272    first.  Saving and restoring a low register is usually cheaper than
27273    using a call-clobbered high register.  */
27274
27275 static const int thumb_core_reg_alloc_order[] =
27276 {
27277    3,  2,  1,  0,  4,  5,  6,  7,
27278   14, 12,  8,  9, 10, 11
27279 };
27280
27281 /* Adjust register allocation order when compiling for Thumb.  */
27282
27283 void
27284 arm_order_regs_for_local_alloc (void)
27285 {
27286   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27287   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27288   if (TARGET_THUMB)
27289     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27290             sizeof (thumb_core_reg_alloc_order));
27291 }
27292
27293 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27294
27295 bool
27296 arm_frame_pointer_required (void)
27297 {
27298   return (cfun->has_nonlocal_label
27299           || SUBTARGET_FRAME_POINTER_REQUIRED
27300           || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27301 }
27302
27303 /* Only thumb1 can't support conditional execution, so return true if
27304    the target is not thumb1.  */
27305 static bool
27306 arm_have_conditional_execution (void)
27307 {
27308   return !TARGET_THUMB1;
27309 }
27310
27311 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27312 static HOST_WIDE_INT
27313 arm_vector_alignment (const_tree type)
27314 {
27315   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27316
27317   if (TARGET_AAPCS_BASED)
27318     align = MIN (align, 64);
27319
27320   return align;
27321 }
27322
27323 static unsigned int
27324 arm_autovectorize_vector_sizes (void)
27325 {
27326   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27327 }
27328
27329 static bool
27330 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27331 {
27332   /* Vectors which aren't in packed structures will not be less aligned than
27333      the natural alignment of their element type, so this is safe.  */
27334   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27335     return !is_packed;
27336
27337   return default_builtin_vector_alignment_reachable (type, is_packed);
27338 }
27339
27340 static bool
27341 arm_builtin_support_vector_misalignment (machine_mode mode,
27342                                          const_tree type, int misalignment,
27343                                          bool is_packed)
27344 {
27345   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27346     {
27347       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27348
27349       if (is_packed)
27350         return align == 1;
27351
27352       /* If the misalignment is unknown, we should be able to handle the access
27353          so long as it is not to a member of a packed data structure.  */
27354       if (misalignment == -1)
27355         return true;
27356
27357       /* Return true if the misalignment is a multiple of the natural alignment
27358          of the vector's element type.  This is probably always going to be
27359          true in practice, since we've already established that this isn't a
27360          packed access.  */
27361       return ((misalignment % align) == 0);
27362     }
27363
27364   return default_builtin_support_vector_misalignment (mode, type, misalignment,
27365                                                       is_packed);
27366 }
27367
27368 static void
27369 arm_conditional_register_usage (void)
27370 {
27371   int regno;
27372
27373   if (TARGET_THUMB1 && optimize_size)
27374     {
27375       /* When optimizing for size on Thumb-1, it's better not
27376         to use the HI regs, because of the overhead of
27377         stacking them.  */
27378       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27379         fixed_regs[regno] = call_used_regs[regno] = 1;
27380     }
27381
27382   /* The link register can be clobbered by any branch insn,
27383      but we have no way to track that at present, so mark
27384      it as unavailable.  */
27385   if (TARGET_THUMB1)
27386     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27387
27388   if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27389     {
27390       /* VFPv3 registers are disabled when earlier VFP
27391          versions are selected due to the definition of
27392          LAST_VFP_REGNUM.  */
27393       for (regno = FIRST_VFP_REGNUM;
27394            regno <= LAST_VFP_REGNUM; ++ regno)
27395         {
27396           fixed_regs[regno] = 0;
27397           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27398             || regno >= FIRST_VFP_REGNUM + 32;
27399         }
27400     }
27401
27402   if (TARGET_REALLY_IWMMXT)
27403     {
27404       regno = FIRST_IWMMXT_GR_REGNUM;
27405       /* The 2002/10/09 revision of the XScale ABI has wCG0
27406          and wCG1 as call-preserved registers.  The 2002/11/21
27407          revision changed this so that all wCG registers are
27408          scratch registers.  */
27409       for (regno = FIRST_IWMMXT_GR_REGNUM;
27410            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27411         fixed_regs[regno] = 0;
27412       /* The XScale ABI has wR0 - wR9 as scratch registers,
27413          the rest as call-preserved registers.  */
27414       for (regno = FIRST_IWMMXT_REGNUM;
27415            regno <= LAST_IWMMXT_REGNUM; ++ regno)
27416         {
27417           fixed_regs[regno] = 0;
27418           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27419         }
27420     }
27421
27422   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27423     {
27424       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27425       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27426     }
27427   else if (TARGET_APCS_STACK)
27428     {
27429       fixed_regs[10]     = 1;
27430       call_used_regs[10] = 1;
27431     }
27432   /* -mcaller-super-interworking reserves r11 for calls to
27433      _interwork_r11_call_via_rN().  Making the register global
27434      is an easy way of ensuring that it remains valid for all
27435      calls.  */
27436   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27437       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27438     {
27439       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27440       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27441       if (TARGET_CALLER_INTERWORKING)
27442         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27443     }
27444   SUBTARGET_CONDITIONAL_REGISTER_USAGE
27445 }
27446
27447 static reg_class_t
27448 arm_preferred_rename_class (reg_class_t rclass)
27449 {
27450   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27451      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
27452      and code size can be reduced.  */
27453   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27454     return LO_REGS;
27455   else
27456     return NO_REGS;
27457 }
27458
27459 /* Compute the atrribute "length" of insn "*push_multi".
27460    So this function MUST be kept in sync with that insn pattern.  */
27461 int
27462 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27463 {
27464   int i, regno, hi_reg;
27465   int num_saves = XVECLEN (parallel_op, 0);
27466
27467   /* ARM mode.  */
27468   if (TARGET_ARM)
27469     return 4;
27470   /* Thumb1 mode.  */
27471   if (TARGET_THUMB1)
27472     return 2;
27473
27474   /* Thumb2 mode.  */
27475   regno = REGNO (first_op);
27476   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27477   for (i = 1; i < num_saves && !hi_reg; i++)
27478     {
27479       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27480       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27481     }
27482
27483   if (!hi_reg)
27484     return 2;
27485   return 4;
27486 }
27487
27488 /* Compute the number of instructions emitted by output_move_double.  */
27489 int
27490 arm_count_output_move_double_insns (rtx *operands)
27491 {
27492   int count;
27493   rtx ops[2];
27494   /* output_move_double may modify the operands array, so call it
27495      here on a copy of the array.  */
27496   ops[0] = operands[0];
27497   ops[1] = operands[1];
27498   output_move_double (ops, false, &count);
27499   return count;
27500 }
27501
27502 int
27503 vfp3_const_double_for_fract_bits (rtx operand)
27504 {
27505   REAL_VALUE_TYPE r0;
27506
27507   if (!CONST_DOUBLE_P (operand))
27508     return 0;
27509
27510   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27511   if (exact_real_inverse (DFmode, &r0))
27512     {
27513       if (exact_real_truncate (DFmode, &r0))
27514         {
27515           HOST_WIDE_INT value = real_to_integer (&r0);
27516           value = value & 0xffffffff;
27517           if ((value != 0) && ( (value & (value - 1)) == 0))
27518             return int_log2 (value);
27519         }
27520     }
27521   return 0;
27522 }
27523
27524 int
27525 vfp3_const_double_for_bits (rtx operand)
27526 {
27527   REAL_VALUE_TYPE r0;
27528
27529   if (!CONST_DOUBLE_P (operand))
27530     return 0;
27531
27532   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27533   if (exact_real_truncate (DFmode, &r0))
27534     {
27535       HOST_WIDE_INT value = real_to_integer (&r0);
27536       value = value & 0xffffffff;
27537       if ((value != 0) && ( (value & (value - 1)) == 0))
27538         return int_log2 (value);
27539     }
27540
27541   return 0;
27542 }
27543 \f
27544 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
27545
27546 static void
27547 arm_pre_atomic_barrier (enum memmodel model)
27548 {
27549   if (need_atomic_barrier_p (model, true))
27550     emit_insn (gen_memory_barrier ());
27551 }
27552
27553 static void
27554 arm_post_atomic_barrier (enum memmodel model)
27555 {
27556   if (need_atomic_barrier_p (model, false))
27557     emit_insn (gen_memory_barrier ());
27558 }
27559
27560 /* Emit the load-exclusive and store-exclusive instructions.
27561    Use acquire and release versions if necessary.  */
27562
27563 static void
27564 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27565 {
27566   rtx (*gen) (rtx, rtx);
27567
27568   if (acq)
27569     {
27570       switch (mode)
27571         {
27572         case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27573         case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27574         case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27575         case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27576         default:
27577           gcc_unreachable ();
27578         }
27579     }
27580   else
27581     {
27582       switch (mode)
27583         {
27584         case QImode: gen = gen_arm_load_exclusiveqi; break;
27585         case HImode: gen = gen_arm_load_exclusivehi; break;
27586         case SImode: gen = gen_arm_load_exclusivesi; break;
27587         case DImode: gen = gen_arm_load_exclusivedi; break;
27588         default:
27589           gcc_unreachable ();
27590         }
27591     }
27592
27593   emit_insn (gen (rval, mem));
27594 }
27595
27596 static void
27597 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27598                           rtx mem, bool rel)
27599 {
27600   rtx (*gen) (rtx, rtx, rtx);
27601
27602   if (rel)
27603     {
27604       switch (mode)
27605         {
27606         case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27607         case HImode: gen = gen_arm_store_release_exclusivehi; break;
27608         case SImode: gen = gen_arm_store_release_exclusivesi; break;
27609         case DImode: gen = gen_arm_store_release_exclusivedi; break;
27610         default:
27611           gcc_unreachable ();
27612         }
27613     }
27614   else
27615     {
27616       switch (mode)
27617         {
27618         case QImode: gen = gen_arm_store_exclusiveqi; break;
27619         case HImode: gen = gen_arm_store_exclusivehi; break;
27620         case SImode: gen = gen_arm_store_exclusivesi; break;
27621         case DImode: gen = gen_arm_store_exclusivedi; break;
27622         default:
27623           gcc_unreachable ();
27624         }
27625     }
27626
27627   emit_insn (gen (bval, rval, mem));
27628 }
27629
27630 /* Mark the previous jump instruction as unlikely.  */
27631
27632 static void
27633 emit_unlikely_jump (rtx insn)
27634 {
27635   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27636
27637   insn = emit_jump_insn (insn);
27638   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27639 }
27640
27641 /* Expand a compare and swap pattern.  */
27642
27643 void
27644 arm_expand_compare_and_swap (rtx operands[])
27645 {
27646   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27647   machine_mode mode;
27648   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27649
27650   bval = operands[0];
27651   rval = operands[1];
27652   mem = operands[2];
27653   oldval = operands[3];
27654   newval = operands[4];
27655   is_weak = operands[5];
27656   mod_s = operands[6];
27657   mod_f = operands[7];
27658   mode = GET_MODE (mem);
27659
27660   /* Normally the succ memory model must be stronger than fail, but in the
27661      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27662      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
27663
27664   if (TARGET_HAVE_LDACQ
27665       && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27666       && INTVAL (mod_s) == MEMMODEL_RELEASE)
27667     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27668
27669   switch (mode)
27670     {
27671     case QImode:
27672     case HImode:
27673       /* For narrow modes, we're going to perform the comparison in SImode,
27674          so do the zero-extension now.  */
27675       rval = gen_reg_rtx (SImode);
27676       oldval = convert_modes (SImode, mode, oldval, true);
27677       /* FALLTHRU */
27678
27679     case SImode:
27680       /* Force the value into a register if needed.  We waited until after
27681          the zero-extension above to do this properly.  */
27682       if (!arm_add_operand (oldval, SImode))
27683         oldval = force_reg (SImode, oldval);
27684       break;
27685
27686     case DImode:
27687       if (!cmpdi_operand (oldval, mode))
27688         oldval = force_reg (mode, oldval);
27689       break;
27690
27691     default:
27692       gcc_unreachable ();
27693     }
27694
27695   switch (mode)
27696     {
27697     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27698     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27699     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27700     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27701     default:
27702       gcc_unreachable ();
27703     }
27704
27705   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27706
27707   if (mode == QImode || mode == HImode)
27708     emit_move_insn (operands[1], gen_lowpart (mode, rval));
27709
27710   /* In all cases, we arrange for success to be signaled by Z set.
27711      This arrangement allows for the boolean result to be used directly
27712      in a subsequent branch, post optimization.  */
27713   x = gen_rtx_REG (CCmode, CC_REGNUM);
27714   x = gen_rtx_EQ (SImode, x, const0_rtx);
27715   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27716 }
27717
27718 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
27719    another memory store between the load-exclusive and store-exclusive can
27720    reset the monitor from Exclusive to Open state.  This means we must wait
27721    until after reload to split the pattern, lest we get a register spill in
27722    the middle of the atomic sequence.  */
27723
27724 void
27725 arm_split_compare_and_swap (rtx operands[])
27726 {
27727   rtx rval, mem, oldval, newval, scratch;
27728   machine_mode mode;
27729   enum memmodel mod_s, mod_f;
27730   bool is_weak;
27731   rtx_code_label *label1, *label2;
27732   rtx x, cond;
27733
27734   rval = operands[0];
27735   mem = operands[1];
27736   oldval = operands[2];
27737   newval = operands[3];
27738   is_weak = (operands[4] != const0_rtx);
27739   mod_s = (enum memmodel) INTVAL (operands[5]);
27740   mod_f = (enum memmodel) INTVAL (operands[6]);
27741   scratch = operands[7];
27742   mode = GET_MODE (mem);
27743
27744   bool use_acquire = TARGET_HAVE_LDACQ
27745                      && !(mod_s == MEMMODEL_RELAXED
27746                           || mod_s == MEMMODEL_CONSUME
27747                           || mod_s == MEMMODEL_RELEASE);
27748
27749   bool use_release = TARGET_HAVE_LDACQ
27750                      && !(mod_s == MEMMODEL_RELAXED
27751                           || mod_s == MEMMODEL_CONSUME
27752                           || mod_s == MEMMODEL_ACQUIRE);
27753
27754   /* Checks whether a barrier is needed and emits one accordingly.  */
27755   if (!(use_acquire || use_release))
27756     arm_pre_atomic_barrier (mod_s);
27757
27758   label1 = NULL;
27759   if (!is_weak)
27760     {
27761       label1 = gen_label_rtx ();
27762       emit_label (label1);
27763     }
27764   label2 = gen_label_rtx ();
27765
27766   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27767
27768   cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27769   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27770   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27771                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27772   emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27773
27774   arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27775
27776   /* Weak or strong, we want EQ to be true for success, so that we
27777      match the flags that we got from the compare above.  */
27778   cond = gen_rtx_REG (CCmode, CC_REGNUM);
27779   x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27780   emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27781
27782   if (!is_weak)
27783     {
27784       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27785       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27786                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27787       emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27788     }
27789
27790   if (mod_f != MEMMODEL_RELAXED)
27791     emit_label (label2);
27792
27793   /* Checks whether a barrier is needed and emits one accordingly.  */
27794   if (!(use_acquire || use_release))
27795     arm_post_atomic_barrier (mod_s);
27796
27797   if (mod_f == MEMMODEL_RELAXED)
27798     emit_label (label2);
27799 }
27800
27801 void
27802 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27803                      rtx value, rtx model_rtx, rtx cond)
27804 {
27805   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27806   machine_mode mode = GET_MODE (mem);
27807   machine_mode wmode = (mode == DImode ? DImode : SImode);
27808   rtx_code_label *label;
27809   rtx x;
27810
27811   bool use_acquire = TARGET_HAVE_LDACQ
27812                      && !(model == MEMMODEL_RELAXED
27813                           || model == MEMMODEL_CONSUME
27814                           || model == MEMMODEL_RELEASE);
27815
27816   bool use_release = TARGET_HAVE_LDACQ
27817                      && !(model == MEMMODEL_RELAXED
27818                           || model == MEMMODEL_CONSUME
27819                           || model == MEMMODEL_ACQUIRE);
27820
27821   /* Checks whether a barrier is needed and emits one accordingly.  */
27822   if (!(use_acquire || use_release))
27823     arm_pre_atomic_barrier (model);
27824
27825   label = gen_label_rtx ();
27826   emit_label (label);
27827
27828   if (new_out)
27829     new_out = gen_lowpart (wmode, new_out);
27830   if (old_out)
27831     old_out = gen_lowpart (wmode, old_out);
27832   else
27833     old_out = new_out;
27834   value = simplify_gen_subreg (wmode, value, mode, 0);
27835
27836   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27837
27838   switch (code)
27839     {
27840     case SET:
27841       new_out = value;
27842       break;
27843
27844     case NOT:
27845       x = gen_rtx_AND (wmode, old_out, value);
27846       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27847       x = gen_rtx_NOT (wmode, new_out);
27848       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27849       break;
27850
27851     case MINUS:
27852       if (CONST_INT_P (value))
27853         {
27854           value = GEN_INT (-INTVAL (value));
27855           code = PLUS;
27856         }
27857       /* FALLTHRU */
27858
27859     case PLUS:
27860       if (mode == DImode)
27861         {
27862           /* DImode plus/minus need to clobber flags.  */
27863           /* The adddi3 and subdi3 patterns are incorrectly written so that
27864              they require matching operands, even when we could easily support
27865              three operands.  Thankfully, this can be fixed up post-splitting,
27866              as the individual add+adc patterns do accept three operands and
27867              post-reload cprop can make these moves go away.  */
27868           emit_move_insn (new_out, old_out);
27869           if (code == PLUS)
27870             x = gen_adddi3 (new_out, new_out, value);
27871           else
27872             x = gen_subdi3 (new_out, new_out, value);
27873           emit_insn (x);
27874           break;
27875         }
27876       /* FALLTHRU */
27877
27878     default:
27879       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27880       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27881       break;
27882     }
27883
27884   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27885                             use_release);
27886
27887   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27888   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27889
27890   /* Checks whether a barrier is needed and emits one accordingly.  */
27891   if (!(use_acquire || use_release))
27892     arm_post_atomic_barrier (model);
27893 }
27894 \f
27895 #define MAX_VECT_LEN 16
27896
27897 struct expand_vec_perm_d
27898 {
27899   rtx target, op0, op1;
27900   unsigned char perm[MAX_VECT_LEN];
27901   machine_mode vmode;
27902   unsigned char nelt;
27903   bool one_vector_p;
27904   bool testing_p;
27905 };
27906
27907 /* Generate a variable permutation.  */
27908
27909 static void
27910 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27911 {
27912   machine_mode vmode = GET_MODE (target);
27913   bool one_vector_p = rtx_equal_p (op0, op1);
27914
27915   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27916   gcc_checking_assert (GET_MODE (op0) == vmode);
27917   gcc_checking_assert (GET_MODE (op1) == vmode);
27918   gcc_checking_assert (GET_MODE (sel) == vmode);
27919   gcc_checking_assert (TARGET_NEON);
27920
27921   if (one_vector_p)
27922     {
27923       if (vmode == V8QImode)
27924         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27925       else
27926         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27927     }
27928   else
27929     {
27930       rtx pair;
27931
27932       if (vmode == V8QImode)
27933         {
27934           pair = gen_reg_rtx (V16QImode);
27935           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27936           pair = gen_lowpart (TImode, pair);
27937           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27938         }
27939       else
27940         {
27941           pair = gen_reg_rtx (OImode);
27942           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27943           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27944         }
27945     }
27946 }
27947
27948 void
27949 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27950 {
27951   machine_mode vmode = GET_MODE (target);
27952   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27953   bool one_vector_p = rtx_equal_p (op0, op1);
27954   rtx rmask[MAX_VECT_LEN], mask;
27955
27956   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
27957      numbering of elements for big-endian, we must reverse the order.  */
27958   gcc_checking_assert (!BYTES_BIG_ENDIAN);
27959
27960   /* The VTBL instruction does not use a modulo index, so we must take care
27961      of that ourselves.  */
27962   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27963   for (i = 0; i < nelt; ++i)
27964     rmask[i] = mask;
27965   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27966   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27967
27968   arm_expand_vec_perm_1 (target, op0, op1, sel);
27969 }
27970
27971 /* Generate or test for an insn that supports a constant permutation.  */
27972
27973 /* Recognize patterns for the VUZP insns.  */
27974
27975 static bool
27976 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27977 {
27978   unsigned int i, odd, mask, nelt = d->nelt;
27979   rtx out0, out1, in0, in1, x;
27980   rtx (*gen)(rtx, rtx, rtx, rtx);
27981
27982   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27983     return false;
27984
27985   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
27986   if (d->perm[0] == 0)
27987     odd = 0;
27988   else if (d->perm[0] == 1)
27989     odd = 1;
27990   else
27991     return false;
27992   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27993
27994   for (i = 0; i < nelt; i++)
27995     {
27996       unsigned elt = (i * 2 + odd) & mask;
27997       if (d->perm[i] != elt)
27998         return false;
27999     }
28000
28001   /* Success!  */
28002   if (d->testing_p)
28003     return true;
28004
28005   switch (d->vmode)
28006     {
28007     case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28008     case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
28009     case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
28010     case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
28011     case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
28012     case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
28013     case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
28014     case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
28015     default:
28016       gcc_unreachable ();
28017     }
28018
28019   in0 = d->op0;
28020   in1 = d->op1;
28021   if (BYTES_BIG_ENDIAN)
28022     {
28023       x = in0, in0 = in1, in1 = x;
28024       odd = !odd;
28025     }
28026
28027   out0 = d->target;
28028   out1 = gen_reg_rtx (d->vmode);
28029   if (odd)
28030     x = out0, out0 = out1, out1 = x;
28031
28032   emit_insn (gen (out0, in0, in1, out1));
28033   return true;
28034 }
28035
28036 /* Recognize patterns for the VZIP insns.  */
28037
28038 static bool
28039 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28040 {
28041   unsigned int i, high, mask, nelt = d->nelt;
28042   rtx out0, out1, in0, in1, x;
28043   rtx (*gen)(rtx, rtx, rtx, rtx);
28044
28045   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28046     return false;
28047
28048   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
28049   high = nelt / 2;
28050   if (d->perm[0] == high)
28051     ;
28052   else if (d->perm[0] == 0)
28053     high = 0;
28054   else
28055     return false;
28056   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28057
28058   for (i = 0; i < nelt / 2; i++)
28059     {
28060       unsigned elt = (i + high) & mask;
28061       if (d->perm[i * 2] != elt)
28062         return false;
28063       elt = (elt + nelt) & mask;
28064       if (d->perm[i * 2 + 1] != elt)
28065         return false;
28066     }
28067
28068   /* Success!  */
28069   if (d->testing_p)
28070     return true;
28071
28072   switch (d->vmode)
28073     {
28074     case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28075     case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
28076     case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
28077     case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
28078     case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
28079     case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
28080     case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
28081     case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
28082     default:
28083       gcc_unreachable ();
28084     }
28085
28086   in0 = d->op0;
28087   in1 = d->op1;
28088   if (BYTES_BIG_ENDIAN)
28089     {
28090       x = in0, in0 = in1, in1 = x;
28091       high = !high;
28092     }
28093
28094   out0 = d->target;
28095   out1 = gen_reg_rtx (d->vmode);
28096   if (high)
28097     x = out0, out0 = out1, out1 = x;
28098
28099   emit_insn (gen (out0, in0, in1, out1));
28100   return true;
28101 }
28102
28103 /* Recognize patterns for the VREV insns.  */
28104
28105 static bool
28106 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28107 {
28108   unsigned int i, j, diff, nelt = d->nelt;
28109   rtx (*gen)(rtx, rtx);
28110
28111   if (!d->one_vector_p)
28112     return false;
28113
28114   diff = d->perm[0];
28115   switch (diff)
28116     {
28117     case 7:
28118       switch (d->vmode)
28119         {
28120         case V16QImode: gen = gen_neon_vrev64v16qi; break;
28121         case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
28122         default:
28123           return false;
28124         }
28125       break;
28126     case 3:
28127       switch (d->vmode)
28128         {
28129         case V16QImode: gen = gen_neon_vrev32v16qi; break;
28130         case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
28131         case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
28132         case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
28133         default:
28134           return false;
28135         }
28136       break;
28137     case 1:
28138       switch (d->vmode)
28139         {
28140         case V16QImode: gen = gen_neon_vrev16v16qi; break;
28141         case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
28142         case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
28143         case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
28144         case V4SImode:  gen = gen_neon_vrev64v4si;  break;
28145         case V2SImode:  gen = gen_neon_vrev64v2si;  break;
28146         case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
28147         case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
28148         default:
28149           return false;
28150         }
28151       break;
28152     default:
28153       return false;
28154     }
28155
28156   for (i = 0; i < nelt ; i += diff + 1)
28157     for (j = 0; j <= diff; j += 1)
28158       {
28159         /* This is guaranteed to be true as the value of diff
28160            is 7, 3, 1 and we should have enough elements in the
28161            queue to generate this. Getting a vector mask with a
28162            value of diff other than these values implies that
28163            something is wrong by the time we get here.  */
28164         gcc_assert (i + j < nelt);
28165         if (d->perm[i + j] != i + diff - j)
28166           return false;
28167       }
28168
28169   /* Success! */
28170   if (d->testing_p)
28171     return true;
28172
28173   emit_insn (gen (d->target, d->op0));
28174   return true;
28175 }
28176
28177 /* Recognize patterns for the VTRN insns.  */
28178
28179 static bool
28180 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28181 {
28182   unsigned int i, odd, mask, nelt = d->nelt;
28183   rtx out0, out1, in0, in1, x;
28184   rtx (*gen)(rtx, rtx, rtx, rtx);
28185
28186   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28187     return false;
28188
28189   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
28190   if (d->perm[0] == 0)
28191     odd = 0;
28192   else if (d->perm[0] == 1)
28193     odd = 1;
28194   else
28195     return false;
28196   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28197
28198   for (i = 0; i < nelt; i += 2)
28199     {
28200       if (d->perm[i] != i + odd)
28201         return false;
28202       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28203         return false;
28204     }
28205
28206   /* Success!  */
28207   if (d->testing_p)
28208     return true;
28209
28210   switch (d->vmode)
28211     {
28212     case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28213     case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
28214     case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
28215     case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
28216     case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
28217     case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
28218     case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
28219     case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
28220     default:
28221       gcc_unreachable ();
28222     }
28223
28224   in0 = d->op0;
28225   in1 = d->op1;
28226   if (BYTES_BIG_ENDIAN)
28227     {
28228       x = in0, in0 = in1, in1 = x;
28229       odd = !odd;
28230     }
28231
28232   out0 = d->target;
28233   out1 = gen_reg_rtx (d->vmode);
28234   if (odd)
28235     x = out0, out0 = out1, out1 = x;
28236
28237   emit_insn (gen (out0, in0, in1, out1));
28238   return true;
28239 }
28240
28241 /* Recognize patterns for the VEXT insns.  */
28242
28243 static bool
28244 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28245 {
28246   unsigned int i, nelt = d->nelt;
28247   rtx (*gen) (rtx, rtx, rtx, rtx);
28248   rtx offset;
28249
28250   unsigned int location;
28251
28252   unsigned int next  = d->perm[0] + 1;
28253
28254   /* TODO: Handle GCC's numbering of elements for big-endian.  */
28255   if (BYTES_BIG_ENDIAN)
28256     return false;
28257
28258   /* Check if the extracted indexes are increasing by one.  */
28259   for (i = 1; i < nelt; next++, i++)
28260     {
28261       /* If we hit the most significant element of the 2nd vector in
28262          the previous iteration, no need to test further.  */
28263       if (next == 2 * nelt)
28264         return false;
28265
28266       /* If we are operating on only one vector: it could be a
28267          rotation.  If there are only two elements of size < 64, let
28268          arm_evpc_neon_vrev catch it.  */
28269       if (d->one_vector_p && (next == nelt))
28270         {
28271           if ((nelt == 2) && (d->vmode != V2DImode))
28272             return false;
28273           else
28274             next = 0;
28275         }
28276
28277       if (d->perm[i] != next)
28278         return false;
28279     }
28280
28281   location = d->perm[0];
28282
28283   switch (d->vmode)
28284     {
28285     case V16QImode: gen = gen_neon_vextv16qi; break;
28286     case V8QImode: gen = gen_neon_vextv8qi; break;
28287     case V4HImode: gen = gen_neon_vextv4hi; break;
28288     case V8HImode: gen = gen_neon_vextv8hi; break;
28289     case V2SImode: gen = gen_neon_vextv2si; break;
28290     case V4SImode: gen = gen_neon_vextv4si; break;
28291     case V2SFmode: gen = gen_neon_vextv2sf; break;
28292     case V4SFmode: gen = gen_neon_vextv4sf; break;
28293     case V2DImode: gen = gen_neon_vextv2di; break;
28294     default:
28295       return false;
28296     }
28297
28298   /* Success! */
28299   if (d->testing_p)
28300     return true;
28301
28302   offset = GEN_INT (location);
28303   emit_insn (gen (d->target, d->op0, d->op1, offset));
28304   return true;
28305 }
28306
28307 /* The NEON VTBL instruction is a fully variable permuation that's even
28308    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
28309    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
28310    can do slightly better by expanding this as a constant where we don't
28311    have to apply a mask.  */
28312
28313 static bool
28314 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28315 {
28316   rtx rperm[MAX_VECT_LEN], sel;
28317   machine_mode vmode = d->vmode;
28318   unsigned int i, nelt = d->nelt;
28319
28320   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28321      numbering of elements for big-endian, we must reverse the order.  */
28322   if (BYTES_BIG_ENDIAN)
28323     return false;
28324
28325   if (d->testing_p)
28326     return true;
28327
28328   /* Generic code will try constant permutation twice.  Once with the
28329      original mode and again with the elements lowered to QImode.
28330      So wait and don't do the selector expansion ourselves.  */
28331   if (vmode != V8QImode && vmode != V16QImode)
28332     return false;
28333
28334   for (i = 0; i < nelt; ++i)
28335     rperm[i] = GEN_INT (d->perm[i]);
28336   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28337   sel = force_reg (vmode, sel);
28338
28339   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28340   return true;
28341 }
28342
28343 static bool
28344 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28345 {
28346   /* Check if the input mask matches vext before reordering the
28347      operands.  */
28348   if (TARGET_NEON)
28349     if (arm_evpc_neon_vext (d))
28350       return true;
28351
28352   /* The pattern matching functions above are written to look for a small
28353      number to begin the sequence (0, 1, N/2).  If we begin with an index
28354      from the second operand, we can swap the operands.  */
28355   if (d->perm[0] >= d->nelt)
28356     {
28357       unsigned i, nelt = d->nelt;
28358       rtx x;
28359
28360       for (i = 0; i < nelt; ++i)
28361         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28362
28363       x = d->op0;
28364       d->op0 = d->op1;
28365       d->op1 = x;
28366     }
28367
28368   if (TARGET_NEON)
28369     {
28370       if (arm_evpc_neon_vuzp (d))
28371         return true;
28372       if (arm_evpc_neon_vzip (d))
28373         return true;
28374       if (arm_evpc_neon_vrev (d))
28375         return true;
28376       if (arm_evpc_neon_vtrn (d))
28377         return true;
28378       return arm_evpc_neon_vtbl (d);
28379     }
28380   return false;
28381 }
28382
28383 /* Expand a vec_perm_const pattern.  */
28384
28385 bool
28386 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28387 {
28388   struct expand_vec_perm_d d;
28389   int i, nelt, which;
28390
28391   d.target = target;
28392   d.op0 = op0;
28393   d.op1 = op1;
28394
28395   d.vmode = GET_MODE (target);
28396   gcc_assert (VECTOR_MODE_P (d.vmode));
28397   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28398   d.testing_p = false;
28399
28400   for (i = which = 0; i < nelt; ++i)
28401     {
28402       rtx e = XVECEXP (sel, 0, i);
28403       int ei = INTVAL (e) & (2 * nelt - 1);
28404       which |= (ei < nelt ? 1 : 2);
28405       d.perm[i] = ei;
28406     }
28407
28408   switch (which)
28409     {
28410     default:
28411       gcc_unreachable();
28412
28413     case 3:
28414       d.one_vector_p = false;
28415       if (!rtx_equal_p (op0, op1))
28416         break;
28417
28418       /* The elements of PERM do not suggest that only the first operand
28419          is used, but both operands are identical.  Allow easier matching
28420          of the permutation by folding the permutation into the single
28421          input vector.  */
28422       /* FALLTHRU */
28423     case 2:
28424       for (i = 0; i < nelt; ++i)
28425         d.perm[i] &= nelt - 1;
28426       d.op0 = op1;
28427       d.one_vector_p = true;
28428       break;
28429
28430     case 1:
28431       d.op1 = op0;
28432       d.one_vector_p = true;
28433       break;
28434     }
28435
28436   return arm_expand_vec_perm_const_1 (&d);
28437 }
28438
28439 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
28440
28441 static bool
28442 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28443                                  const unsigned char *sel)
28444 {
28445   struct expand_vec_perm_d d;
28446   unsigned int i, nelt, which;
28447   bool ret;
28448
28449   d.vmode = vmode;
28450   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28451   d.testing_p = true;
28452   memcpy (d.perm, sel, nelt);
28453
28454   /* Categorize the set of elements in the selector.  */
28455   for (i = which = 0; i < nelt; ++i)
28456     {
28457       unsigned char e = d.perm[i];
28458       gcc_assert (e < 2 * nelt);
28459       which |= (e < nelt ? 1 : 2);
28460     }
28461
28462   /* For all elements from second vector, fold the elements to first.  */
28463   if (which == 2)
28464     for (i = 0; i < nelt; ++i)
28465       d.perm[i] -= nelt;
28466
28467   /* Check whether the mask can be applied to the vector type.  */
28468   d.one_vector_p = (which != 3);
28469
28470   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28471   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28472   if (!d.one_vector_p)
28473     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28474
28475   start_sequence ();
28476   ret = arm_expand_vec_perm_const_1 (&d);
28477   end_sequence ();
28478
28479   return ret;
28480 }
28481
28482 bool
28483 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28484 {
28485   /* If we are soft float and we do not have ldrd
28486      then all auto increment forms are ok.  */
28487   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28488     return true;
28489
28490   switch (code)
28491     {
28492       /* Post increment and Pre Decrement are supported for all
28493          instruction forms except for vector forms.  */
28494     case ARM_POST_INC:
28495     case ARM_PRE_DEC:
28496       if (VECTOR_MODE_P (mode))
28497         {
28498           if (code != ARM_PRE_DEC)
28499             return true;
28500           else
28501             return false;
28502         }
28503
28504       return true;
28505
28506     case ARM_POST_DEC:
28507     case ARM_PRE_INC:
28508       /* Without LDRD and mode size greater than
28509          word size, there is no point in auto-incrementing
28510          because ldm and stm will not have these forms.  */
28511       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28512         return false;
28513
28514       /* Vector and floating point modes do not support
28515          these auto increment forms.  */
28516       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28517         return false;
28518
28519       return true;
28520
28521     default:
28522       return false;
28523
28524     }
28525
28526   return false;
28527 }
28528
28529 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28530    on ARM, since we know that shifts by negative amounts are no-ops.
28531    Additionally, the default expansion code is not available or suitable
28532    for post-reload insn splits (this can occur when the register allocator
28533    chooses not to do a shift in NEON).
28534
28535    This function is used in both initial expand and post-reload splits, and
28536    handles all kinds of 64-bit shifts.
28537
28538    Input requirements:
28539     - It is safe for the input and output to be the same register, but
28540       early-clobber rules apply for the shift amount and scratch registers.
28541     - Shift by register requires both scratch registers.  In all other cases
28542       the scratch registers may be NULL.
28543     - Ashiftrt by a register also clobbers the CC register.  */
28544 void
28545 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28546                                rtx amount, rtx scratch1, rtx scratch2)
28547 {
28548   rtx out_high = gen_highpart (SImode, out);
28549   rtx out_low = gen_lowpart (SImode, out);
28550   rtx in_high = gen_highpart (SImode, in);
28551   rtx in_low = gen_lowpart (SImode, in);
28552
28553   /* Terminology:
28554         in = the register pair containing the input value.
28555         out = the destination register pair.
28556         up = the high- or low-part of each pair.
28557         down = the opposite part to "up".
28558      In a shift, we can consider bits to shift from "up"-stream to
28559      "down"-stream, so in a left-shift "up" is the low-part and "down"
28560      is the high-part of each register pair.  */
28561
28562   rtx out_up   = code == ASHIFT ? out_low : out_high;
28563   rtx out_down = code == ASHIFT ? out_high : out_low;
28564   rtx in_up   = code == ASHIFT ? in_low : in_high;
28565   rtx in_down = code == ASHIFT ? in_high : in_low;
28566
28567   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28568   gcc_assert (out
28569               && (REG_P (out) || GET_CODE (out) == SUBREG)
28570               && GET_MODE (out) == DImode);
28571   gcc_assert (in
28572               && (REG_P (in) || GET_CODE (in) == SUBREG)
28573               && GET_MODE (in) == DImode);
28574   gcc_assert (amount
28575               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28576                    && GET_MODE (amount) == SImode)
28577                   || CONST_INT_P (amount)));
28578   gcc_assert (scratch1 == NULL
28579               || (GET_CODE (scratch1) == SCRATCH)
28580               || (GET_MODE (scratch1) == SImode
28581                   && REG_P (scratch1)));
28582   gcc_assert (scratch2 == NULL
28583               || (GET_CODE (scratch2) == SCRATCH)
28584               || (GET_MODE (scratch2) == SImode
28585                   && REG_P (scratch2)));
28586   gcc_assert (!REG_P (out) || !REG_P (amount)
28587               || !HARD_REGISTER_P (out)
28588               || (REGNO (out) != REGNO (amount)
28589                   && REGNO (out) + 1 != REGNO (amount)));
28590
28591   /* Macros to make following code more readable.  */
28592   #define SUB_32(DEST,SRC) \
28593             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28594   #define RSB_32(DEST,SRC) \
28595             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28596   #define SUB_S_32(DEST,SRC) \
28597             gen_addsi3_compare0 ((DEST), (SRC), \
28598                                  GEN_INT (-32))
28599   #define SET(DEST,SRC) \
28600             gen_rtx_SET (SImode, (DEST), (SRC))
28601   #define SHIFT(CODE,SRC,AMOUNT) \
28602             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28603   #define LSHIFT(CODE,SRC,AMOUNT) \
28604             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28605                             SImode, (SRC), (AMOUNT))
28606   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28607             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28608                             SImode, (SRC), (AMOUNT))
28609   #define ORR(A,B) \
28610             gen_rtx_IOR (SImode, (A), (B))
28611   #define BRANCH(COND,LABEL) \
28612             gen_arm_cond_branch ((LABEL), \
28613                                  gen_rtx_ ## COND (CCmode, cc_reg, \
28614                                                    const0_rtx), \
28615                                  cc_reg)
28616
28617   /* Shifts by register and shifts by constant are handled separately.  */
28618   if (CONST_INT_P (amount))
28619     {
28620       /* We have a shift-by-constant.  */
28621
28622       /* First, handle out-of-range shift amounts.
28623          In both cases we try to match the result an ARM instruction in a
28624          shift-by-register would give.  This helps reduce execution
28625          differences between optimization levels, but it won't stop other
28626          parts of the compiler doing different things.  This is "undefined
28627          behaviour, in any case.  */
28628       if (INTVAL (amount) <= 0)
28629         emit_insn (gen_movdi (out, in));
28630       else if (INTVAL (amount) >= 64)
28631         {
28632           if (code == ASHIFTRT)
28633             {
28634               rtx const31_rtx = GEN_INT (31);
28635               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28636               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28637             }
28638           else
28639             emit_insn (gen_movdi (out, const0_rtx));
28640         }
28641
28642       /* Now handle valid shifts. */
28643       else if (INTVAL (amount) < 32)
28644         {
28645           /* Shifts by a constant less than 32.  */
28646           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28647
28648           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28649           emit_insn (SET (out_down,
28650                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
28651                                out_down)));
28652           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28653         }
28654       else
28655         {
28656           /* Shifts by a constant greater than 31.  */
28657           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28658
28659           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28660           if (code == ASHIFTRT)
28661             emit_insn (gen_ashrsi3 (out_up, in_up,
28662                                     GEN_INT (31)));
28663           else
28664             emit_insn (SET (out_up, const0_rtx));
28665         }
28666     }
28667   else
28668     {
28669       /* We have a shift-by-register.  */
28670       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28671
28672       /* This alternative requires the scratch registers.  */
28673       gcc_assert (scratch1 && REG_P (scratch1));
28674       gcc_assert (scratch2 && REG_P (scratch2));
28675
28676       /* We will need the values "amount-32" and "32-amount" later.
28677          Swapping them around now allows the later code to be more general. */
28678       switch (code)
28679         {
28680         case ASHIFT:
28681           emit_insn (SUB_32 (scratch1, amount));
28682           emit_insn (RSB_32 (scratch2, amount));
28683           break;
28684         case ASHIFTRT:
28685           emit_insn (RSB_32 (scratch1, amount));
28686           /* Also set CC = amount > 32.  */
28687           emit_insn (SUB_S_32 (scratch2, amount));
28688           break;
28689         case LSHIFTRT:
28690           emit_insn (RSB_32 (scratch1, amount));
28691           emit_insn (SUB_32 (scratch2, amount));
28692           break;
28693         default:
28694           gcc_unreachable ();
28695         }
28696
28697       /* Emit code like this:
28698
28699          arithmetic-left:
28700             out_down = in_down << amount;
28701             out_down = (in_up << (amount - 32)) | out_down;
28702             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28703             out_up = in_up << amount;
28704
28705          arithmetic-right:
28706             out_down = in_down >> amount;
28707             out_down = (in_up << (32 - amount)) | out_down;
28708             if (amount < 32)
28709               out_down = ((signed)in_up >> (amount - 32)) | out_down;
28710             out_up = in_up << amount;
28711
28712          logical-right:
28713             out_down = in_down >> amount;
28714             out_down = (in_up << (32 - amount)) | out_down;
28715             if (amount < 32)
28716               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28717             out_up = in_up << amount;
28718
28719           The ARM and Thumb2 variants are the same but implemented slightly
28720           differently.  If this were only called during expand we could just
28721           use the Thumb2 case and let combine do the right thing, but this
28722           can also be called from post-reload splitters.  */
28723
28724       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28725
28726       if (!TARGET_THUMB2)
28727         {
28728           /* Emit code for ARM mode.  */
28729           emit_insn (SET (out_down,
28730                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28731           if (code == ASHIFTRT)
28732             {
28733               rtx_code_label *done_label = gen_label_rtx ();
28734               emit_jump_insn (BRANCH (LT, done_label));
28735               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28736                                              out_down)));
28737               emit_label (done_label);
28738             }
28739           else
28740             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28741                                            out_down)));
28742         }
28743       else
28744         {
28745           /* Emit code for Thumb2 mode.
28746              Thumb2 can't do shift and or in one insn.  */
28747           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28748           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28749
28750           if (code == ASHIFTRT)
28751             {
28752               rtx_code_label *done_label = gen_label_rtx ();
28753               emit_jump_insn (BRANCH (LT, done_label));
28754               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28755               emit_insn (SET (out_down, ORR (out_down, scratch2)));
28756               emit_label (done_label);
28757             }
28758           else
28759             {
28760               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28761               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28762             }
28763         }
28764
28765       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28766     }
28767
28768   #undef SUB_32
28769   #undef RSB_32
28770   #undef SUB_S_32
28771   #undef SET
28772   #undef SHIFT
28773   #undef LSHIFT
28774   #undef REV_LSHIFT
28775   #undef ORR
28776   #undef BRANCH
28777 }
28778
28779
28780 /* Returns true if a valid comparison operation and makes
28781    the operands in a form that is valid.  */
28782 bool
28783 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28784 {
28785   enum rtx_code code = GET_CODE (*comparison);
28786   int code_int;
28787   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28788     ? GET_MODE (*op2) : GET_MODE (*op1);
28789
28790   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28791
28792   if (code == UNEQ || code == LTGT)
28793     return false;
28794
28795   code_int = (int)code;
28796   arm_canonicalize_comparison (&code_int, op1, op2, 0);
28797   PUT_CODE (*comparison, (enum rtx_code)code_int);
28798
28799   switch (mode)
28800     {
28801     case SImode:
28802       if (!arm_add_operand (*op1, mode))
28803         *op1 = force_reg (mode, *op1);
28804       if (!arm_add_operand (*op2, mode))
28805         *op2 = force_reg (mode, *op2);
28806       return true;
28807
28808     case DImode:
28809       if (!cmpdi_operand (*op1, mode))
28810         *op1 = force_reg (mode, *op1);
28811       if (!cmpdi_operand (*op2, mode))
28812         *op2 = force_reg (mode, *op2);
28813       return true;
28814
28815     case SFmode:
28816     case DFmode:
28817       if (!arm_float_compare_operand (*op1, mode))
28818         *op1 = force_reg (mode, *op1);
28819       if (!arm_float_compare_operand (*op2, mode))
28820         *op2 = force_reg (mode, *op2);
28821       return true;
28822     default:
28823       break;
28824     }
28825
28826   return false;
28827
28828 }
28829
28830 /* Maximum number of instructions to set block of memory.  */
28831 static int
28832 arm_block_set_max_insns (void)
28833 {
28834   if (optimize_function_for_size_p (cfun))
28835     return 4;
28836   else
28837     return current_tune->max_insns_inline_memset;
28838 }
28839
28840 /* Return TRUE if it's profitable to set block of memory for
28841    non-vectorized case.  VAL is the value to set the memory
28842    with.  LENGTH is the number of bytes to set.  ALIGN is the
28843    alignment of the destination memory in bytes.  UNALIGNED_P
28844    is TRUE if we can only set the memory with instructions
28845    meeting alignment requirements.  USE_STRD_P is TRUE if we
28846    can use strd to set the memory.  */
28847 static bool
28848 arm_block_set_non_vect_profit_p (rtx val,
28849                                  unsigned HOST_WIDE_INT length,
28850                                  unsigned HOST_WIDE_INT align,
28851                                  bool unaligned_p, bool use_strd_p)
28852 {
28853   int num = 0;
28854   /* For leftovers in bytes of 0-7, we can set the memory block using
28855      strb/strh/str with minimum instruction number.  */
28856   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28857
28858   if (unaligned_p)
28859     {
28860       num = arm_const_inline_cost (SET, val);
28861       num += length / align + length % align;
28862     }
28863   else if (use_strd_p)
28864     {
28865       num = arm_const_double_inline_cost (val);
28866       num += (length >> 3) + leftover[length & 7];
28867     }
28868   else
28869     {
28870       num = arm_const_inline_cost (SET, val);
28871       num += (length >> 2) + leftover[length & 3];
28872     }
28873
28874   /* We may be able to combine last pair STRH/STRB into a single STR
28875      by shifting one byte back.  */
28876   if (unaligned_access && length > 3 && (length & 3) == 3)
28877     num--;
28878
28879   return (num <= arm_block_set_max_insns ());
28880 }
28881
28882 /* Return TRUE if it's profitable to set block of memory for
28883    vectorized case.  LENGTH is the number of bytes to set.
28884    ALIGN is the alignment of destination memory in bytes.
28885    MODE is the vector mode used to set the memory.  */
28886 static bool
28887 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28888                              unsigned HOST_WIDE_INT align,
28889                              machine_mode mode)
28890 {
28891   int num;
28892   bool unaligned_p = ((align & 3) != 0);
28893   unsigned int nelt = GET_MODE_NUNITS (mode);
28894
28895   /* Instruction loading constant value.  */
28896   num = 1;
28897   /* Instructions storing the memory.  */
28898   num += (length + nelt - 1) / nelt;
28899   /* Instructions adjusting the address expression.  Only need to
28900      adjust address expression if it's 4 bytes aligned and bytes
28901      leftover can only be stored by mis-aligned store instruction.  */
28902   if (!unaligned_p && (length & 3) != 0)
28903     num++;
28904
28905   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
28906   if (!unaligned_p && mode == V16QImode)
28907     num--;
28908
28909   return (num <= arm_block_set_max_insns ());
28910 }
28911
28912 /* Set a block of memory using vectorization instructions for the
28913    unaligned case.  We fill the first LENGTH bytes of the memory
28914    area starting from DSTBASE with byte constant VALUE.  ALIGN is
28915    the alignment requirement of memory.  Return TRUE if succeeded.  */
28916 static bool
28917 arm_block_set_unaligned_vect (rtx dstbase,
28918                               unsigned HOST_WIDE_INT length,
28919                               unsigned HOST_WIDE_INT value,
28920                               unsigned HOST_WIDE_INT align)
28921 {
28922   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28923   rtx dst, mem;
28924   rtx val_elt, val_vec, reg;
28925   rtx rval[MAX_VECT_LEN];
28926   rtx (*gen_func) (rtx, rtx);
28927   machine_mode mode;
28928   unsigned HOST_WIDE_INT v = value;
28929
28930   gcc_assert ((align & 0x3) != 0);
28931   nelt_v8 = GET_MODE_NUNITS (V8QImode);
28932   nelt_v16 = GET_MODE_NUNITS (V16QImode);
28933   if (length >= nelt_v16)
28934     {
28935       mode = V16QImode;
28936       gen_func = gen_movmisalignv16qi;
28937     }
28938   else
28939     {
28940       mode = V8QImode;
28941       gen_func = gen_movmisalignv8qi;
28942     }
28943   nelt_mode = GET_MODE_NUNITS (mode);
28944   gcc_assert (length >= nelt_mode);
28945   /* Skip if it isn't profitable.  */
28946   if (!arm_block_set_vect_profit_p (length, align, mode))
28947     return false;
28948
28949   dst = copy_addr_to_reg (XEXP (dstbase, 0));
28950   mem = adjust_automodify_address (dstbase, mode, dst, 0);
28951
28952   v = sext_hwi (v, BITS_PER_WORD);
28953   val_elt = GEN_INT (v);
28954   for (j = 0; j < nelt_mode; j++)
28955     rval[j] = val_elt;
28956
28957   reg = gen_reg_rtx (mode);
28958   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28959   /* Emit instruction loading the constant value.  */
28960   emit_move_insn (reg, val_vec);
28961
28962   /* Handle nelt_mode bytes in a vector.  */
28963   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28964     {
28965       emit_insn ((*gen_func) (mem, reg));
28966       if (i + 2 * nelt_mode <= length)
28967         emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28968     }
28969
28970   /* If there are not less than nelt_v8 bytes leftover, we must be in
28971      V16QI mode.  */
28972   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28973
28974   /* Handle (8, 16) bytes leftover.  */
28975   if (i + nelt_v8 < length)
28976     {
28977       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28978       /* We are shifting bytes back, set the alignment accordingly.  */
28979       if ((length & 1) != 0 && align >= 2)
28980         set_mem_align (mem, BITS_PER_UNIT);
28981
28982       emit_insn (gen_movmisalignv16qi (mem, reg));
28983     }
28984   /* Handle (0, 8] bytes leftover.  */
28985   else if (i < length && i + nelt_v8 >= length)
28986     {
28987       if (mode == V16QImode)
28988         {
28989           reg = gen_lowpart (V8QImode, reg);
28990           mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28991         }
28992       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28993                                               + (nelt_mode - nelt_v8))));
28994       /* We are shifting bytes back, set the alignment accordingly.  */
28995       if ((length & 1) != 0 && align >= 2)
28996         set_mem_align (mem, BITS_PER_UNIT);
28997
28998       emit_insn (gen_movmisalignv8qi (mem, reg));
28999     }
29000
29001   return true;
29002 }
29003
29004 /* Set a block of memory using vectorization instructions for the
29005    aligned case.  We fill the first LENGTH bytes of the memory area
29006    starting from DSTBASE with byte constant VALUE.  ALIGN is the
29007    alignment requirement of memory.  Return TRUE if succeeded.  */
29008 static bool
29009 arm_block_set_aligned_vect (rtx dstbase,
29010                             unsigned HOST_WIDE_INT length,
29011                             unsigned HOST_WIDE_INT value,
29012                             unsigned HOST_WIDE_INT align)
29013 {
29014   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29015   rtx dst, addr, mem;
29016   rtx val_elt, val_vec, reg;
29017   rtx rval[MAX_VECT_LEN];
29018   machine_mode mode;
29019   unsigned HOST_WIDE_INT v = value;
29020
29021   gcc_assert ((align & 0x3) == 0);
29022   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29023   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29024   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29025     mode = V16QImode;
29026   else
29027     mode = V8QImode;
29028
29029   nelt_mode = GET_MODE_NUNITS (mode);
29030   gcc_assert (length >= nelt_mode);
29031   /* Skip if it isn't profitable.  */
29032   if (!arm_block_set_vect_profit_p (length, align, mode))
29033     return false;
29034
29035   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29036
29037   v = sext_hwi (v, BITS_PER_WORD);
29038   val_elt = GEN_INT (v);
29039   for (j = 0; j < nelt_mode; j++)
29040     rval[j] = val_elt;
29041
29042   reg = gen_reg_rtx (mode);
29043   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29044   /* Emit instruction loading the constant value.  */
29045   emit_move_insn (reg, val_vec);
29046
29047   i = 0;
29048   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
29049   if (mode == V16QImode)
29050     {
29051       mem = adjust_automodify_address (dstbase, mode, dst, 0);
29052       emit_insn (gen_movmisalignv16qi (mem, reg));
29053       i += nelt_mode;
29054       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
29055       if (i + nelt_v8 < length && i + nelt_v16 > length)
29056         {
29057           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29058           mem = adjust_automodify_address (dstbase, mode, dst, 0);
29059           /* We are shifting bytes back, set the alignment accordingly.  */
29060           if ((length & 0x3) == 0)
29061             set_mem_align (mem, BITS_PER_UNIT * 4);
29062           else if ((length & 0x1) == 0)
29063             set_mem_align (mem, BITS_PER_UNIT * 2);
29064           else
29065             set_mem_align (mem, BITS_PER_UNIT);
29066
29067           emit_insn (gen_movmisalignv16qi (mem, reg));
29068           return true;
29069         }
29070       /* Fall through for bytes leftover.  */
29071       mode = V8QImode;
29072       nelt_mode = GET_MODE_NUNITS (mode);
29073       reg = gen_lowpart (V8QImode, reg);
29074     }
29075
29076   /* Handle 8 bytes in a vector.  */
29077   for (; (i + nelt_mode <= length); i += nelt_mode)
29078     {
29079       addr = plus_constant (Pmode, dst, i);
29080       mem = adjust_automodify_address (dstbase, mode, addr, i);
29081       emit_move_insn (mem, reg);
29082     }
29083
29084   /* Handle single word leftover by shifting 4 bytes back.  We can
29085      use aligned access for this case.  */
29086   if (i + UNITS_PER_WORD == length)
29087     {
29088       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29089       mem = adjust_automodify_address (dstbase, mode,
29090                                        addr, i - UNITS_PER_WORD);
29091       /* We are shifting 4 bytes back, set the alignment accordingly.  */
29092       if (align > UNITS_PER_WORD)
29093         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29094
29095       emit_move_insn (mem, reg);
29096     }
29097   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29098      We have to use unaligned access for this case.  */
29099   else if (i < length)
29100     {
29101       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29102       mem = adjust_automodify_address (dstbase, mode, dst, 0);
29103       /* We are shifting bytes back, set the alignment accordingly.  */
29104       if ((length & 1) == 0)
29105         set_mem_align (mem, BITS_PER_UNIT * 2);
29106       else
29107         set_mem_align (mem, BITS_PER_UNIT);
29108
29109       emit_insn (gen_movmisalignv8qi (mem, reg));
29110     }
29111
29112   return true;
29113 }
29114
29115 /* Set a block of memory using plain strh/strb instructions, only
29116    using instructions allowed by ALIGN on processor.  We fill the
29117    first LENGTH bytes of the memory area starting from DSTBASE
29118    with byte constant VALUE.  ALIGN is the alignment requirement
29119    of memory.  */
29120 static bool
29121 arm_block_set_unaligned_non_vect (rtx dstbase,
29122                                   unsigned HOST_WIDE_INT length,
29123                                   unsigned HOST_WIDE_INT value,
29124                                   unsigned HOST_WIDE_INT align)
29125 {
29126   unsigned int i;
29127   rtx dst, addr, mem;
29128   rtx val_exp, val_reg, reg;
29129   machine_mode mode;
29130   HOST_WIDE_INT v = value;
29131
29132   gcc_assert (align == 1 || align == 2);
29133
29134   if (align == 2)
29135     v |= (value << BITS_PER_UNIT);
29136
29137   v = sext_hwi (v, BITS_PER_WORD);
29138   val_exp = GEN_INT (v);
29139   /* Skip if it isn't profitable.  */
29140   if (!arm_block_set_non_vect_profit_p (val_exp, length,
29141                                         align, true, false))
29142     return false;
29143
29144   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29145   mode = (align == 2 ? HImode : QImode);
29146   val_reg = force_reg (SImode, val_exp);
29147   reg = gen_lowpart (mode, val_reg);
29148
29149   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29150     {
29151       addr = plus_constant (Pmode, dst, i);
29152       mem = adjust_automodify_address (dstbase, mode, addr, i);
29153       emit_move_insn (mem, reg);
29154     }
29155
29156   /* Handle single byte leftover.  */
29157   if (i + 1 == length)
29158     {
29159       reg = gen_lowpart (QImode, val_reg);
29160       addr = plus_constant (Pmode, dst, i);
29161       mem = adjust_automodify_address (dstbase, QImode, addr, i);
29162       emit_move_insn (mem, reg);
29163       i++;
29164     }
29165
29166   gcc_assert (i == length);
29167   return true;
29168 }
29169
29170 /* Set a block of memory using plain strd/str/strh/strb instructions,
29171    to permit unaligned copies on processors which support unaligned
29172    semantics for those instructions.  We fill the first LENGTH bytes
29173    of the memory area starting from DSTBASE with byte constant VALUE.
29174    ALIGN is the alignment requirement of memory.  */
29175 static bool
29176 arm_block_set_aligned_non_vect (rtx dstbase,
29177                                 unsigned HOST_WIDE_INT length,
29178                                 unsigned HOST_WIDE_INT value,
29179                                 unsigned HOST_WIDE_INT align)
29180 {
29181   unsigned int i;
29182   rtx dst, addr, mem;
29183   rtx val_exp, val_reg, reg;
29184   unsigned HOST_WIDE_INT v;
29185   bool use_strd_p;
29186
29187   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29188                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29189
29190   v = (value | (value << 8) | (value << 16) | (value << 24));
29191   if (length < UNITS_PER_WORD)
29192     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29193
29194   if (use_strd_p)
29195     v |= (v << BITS_PER_WORD);
29196   else
29197     v = sext_hwi (v, BITS_PER_WORD);
29198
29199   val_exp = GEN_INT (v);
29200   /* Skip if it isn't profitable.  */
29201   if (!arm_block_set_non_vect_profit_p (val_exp, length,
29202                                         align, false, use_strd_p))
29203     {
29204       if (!use_strd_p)
29205         return false;
29206
29207       /* Try without strd.  */
29208       v = (v >> BITS_PER_WORD);
29209       v = sext_hwi (v, BITS_PER_WORD);
29210       val_exp = GEN_INT (v);
29211       use_strd_p = false;
29212       if (!arm_block_set_non_vect_profit_p (val_exp, length,
29213                                             align, false, use_strd_p))
29214         return false;
29215     }
29216
29217   i = 0;
29218   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29219   /* Handle double words using strd if possible.  */
29220   if (use_strd_p)
29221     {
29222       val_reg = force_reg (DImode, val_exp);
29223       reg = val_reg;
29224       for (; (i + 8 <= length); i += 8)
29225         {
29226           addr = plus_constant (Pmode, dst, i);
29227           mem = adjust_automodify_address (dstbase, DImode, addr, i);
29228           emit_move_insn (mem, reg);
29229         }
29230     }
29231   else
29232     val_reg = force_reg (SImode, val_exp);
29233
29234   /* Handle words.  */
29235   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29236   for (; (i + 4 <= length); i += 4)
29237     {
29238       addr = plus_constant (Pmode, dst, i);
29239       mem = adjust_automodify_address (dstbase, SImode, addr, i);
29240       if ((align & 3) == 0)
29241         emit_move_insn (mem, reg);
29242       else
29243         emit_insn (gen_unaligned_storesi (mem, reg));
29244     }
29245
29246   /* Merge last pair of STRH and STRB into a STR if possible.  */
29247   if (unaligned_access && i > 0 && (i + 3) == length)
29248     {
29249       addr = plus_constant (Pmode, dst, i - 1);
29250       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29251       /* We are shifting one byte back, set the alignment accordingly.  */
29252       if ((align & 1) == 0)
29253         set_mem_align (mem, BITS_PER_UNIT);
29254
29255       /* Most likely this is an unaligned access, and we can't tell at
29256          compilation time.  */
29257       emit_insn (gen_unaligned_storesi (mem, reg));
29258       return true;
29259     }
29260
29261   /* Handle half word leftover.  */
29262   if (i + 2 <= length)
29263     {
29264       reg = gen_lowpart (HImode, val_reg);
29265       addr = plus_constant (Pmode, dst, i);
29266       mem = adjust_automodify_address (dstbase, HImode, addr, i);
29267       if ((align & 1) == 0)
29268         emit_move_insn (mem, reg);
29269       else
29270         emit_insn (gen_unaligned_storehi (mem, reg));
29271
29272       i += 2;
29273     }
29274
29275   /* Handle single byte leftover.  */
29276   if (i + 1 == length)
29277     {
29278       reg = gen_lowpart (QImode, val_reg);
29279       addr = plus_constant (Pmode, dst, i);
29280       mem = adjust_automodify_address (dstbase, QImode, addr, i);
29281       emit_move_insn (mem, reg);
29282     }
29283
29284   return true;
29285 }
29286
29287 /* Set a block of memory using vectorization instructions for both
29288    aligned and unaligned cases.  We fill the first LENGTH bytes of
29289    the memory area starting from DSTBASE with byte constant VALUE.
29290    ALIGN is the alignment requirement of memory.  */
29291 static bool
29292 arm_block_set_vect (rtx dstbase,
29293                     unsigned HOST_WIDE_INT length,
29294                     unsigned HOST_WIDE_INT value,
29295                     unsigned HOST_WIDE_INT align)
29296 {
29297   /* Check whether we need to use unaligned store instruction.  */
29298   if (((align & 3) != 0 || (length & 3) != 0)
29299       /* Check whether unaligned store instruction is available.  */
29300       && (!unaligned_access || BYTES_BIG_ENDIAN))
29301     return false;
29302
29303   if ((align & 3) == 0)
29304     return arm_block_set_aligned_vect (dstbase, length, value, align);
29305   else
29306     return arm_block_set_unaligned_vect (dstbase, length, value, align);
29307 }
29308
29309 /* Expand string store operation.  Firstly we try to do that by using
29310    vectorization instructions, then try with ARM unaligned access and
29311    double-word store if profitable.  OPERANDS[0] is the destination,
29312    OPERANDS[1] is the number of bytes, operands[2] is the value to
29313    initialize the memory, OPERANDS[3] is the known alignment of the
29314    destination.  */
29315 bool
29316 arm_gen_setmem (rtx *operands)
29317 {
29318   rtx dstbase = operands[0];
29319   unsigned HOST_WIDE_INT length;
29320   unsigned HOST_WIDE_INT value;
29321   unsigned HOST_WIDE_INT align;
29322
29323   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29324     return false;
29325
29326   length = UINTVAL (operands[1]);
29327   if (length > 64)
29328     return false;
29329
29330   value = (UINTVAL (operands[2]) & 0xFF);
29331   align = UINTVAL (operands[3]);
29332   if (TARGET_NEON && length >= 8
29333       && current_tune->string_ops_prefer_neon
29334       && arm_block_set_vect (dstbase, length, value, align))
29335     return true;
29336
29337   if (!unaligned_access && (align & 3) != 0)
29338     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29339
29340   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29341 }
29342
29343
29344 static bool
29345 arm_macro_fusion_p (void)
29346 {
29347   return current_tune->fuseable_ops != ARM_FUSE_NOTHING;
29348 }
29349
29350
29351 static bool
29352 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29353 {
29354   rtx set_dest;
29355   rtx prev_set = single_set (prev);
29356   rtx curr_set = single_set (curr);
29357
29358   if (!prev_set
29359       || !curr_set)
29360     return false;
29361
29362   if (any_condjump_p (curr))
29363     return false;
29364
29365   if (!arm_macro_fusion_p ())
29366     return false;
29367
29368   if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT)
29369     {
29370       /* We are trying to fuse
29371          movw imm / movt imm
29372          instructions as a group that gets scheduled together.  */
29373
29374       set_dest = SET_DEST (curr_set);
29375
29376       if (GET_MODE (set_dest) != SImode)
29377         return false;
29378
29379       /* We are trying to match:
29380          prev (movw)  == (set (reg r0) (const_int imm16))
29381          curr (movt) == (set (zero_extract (reg r0)
29382                                            (const_int 16)
29383                                            (const_int 16))
29384                              (const_int imm16_1))
29385          or
29386          prev (movw) == (set (reg r1)
29387                               (high (symbol_ref ("SYM"))))
29388          curr (movt) == (set (reg r0)
29389                              (lo_sum (reg r1)
29390                                      (symbol_ref ("SYM"))))  */
29391       if (GET_CODE (set_dest) == ZERO_EXTRACT)
29392         {
29393           if (CONST_INT_P (SET_SRC (curr_set))
29394               && CONST_INT_P (SET_SRC (prev_set))
29395               && REG_P (XEXP (set_dest, 0))
29396               && REG_P (SET_DEST (prev_set))
29397               && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29398             return true;
29399         }
29400       else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29401                && REG_P (SET_DEST (curr_set))
29402                && REG_P (SET_DEST (prev_set))
29403                && GET_CODE (SET_SRC (prev_set)) == HIGH
29404                && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29405              return true;
29406     }
29407   return false;
29408 }
29409
29410 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
29411
29412 static unsigned HOST_WIDE_INT
29413 arm_asan_shadow_offset (void)
29414 {
29415   return (unsigned HOST_WIDE_INT) 1 << 29;
29416 }
29417
29418
29419 /* This is a temporary fix for PR60655.  Ideally we need
29420    to handle most of these cases in the generic part but
29421    currently we reject minus (..) (sym_ref).  We try to
29422    ameliorate the case with minus (sym_ref1) (sym_ref2)
29423    where they are in the same section.  */
29424
29425 static bool
29426 arm_const_not_ok_for_debug_p (rtx p)
29427 {
29428   tree decl_op0 = NULL;
29429   tree decl_op1 = NULL;
29430
29431   if (GET_CODE (p) == MINUS)
29432     {
29433       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29434         {
29435           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29436           if (decl_op1
29437               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29438               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29439             {
29440               if ((TREE_CODE (decl_op1) == VAR_DECL
29441                    || TREE_CODE (decl_op1) == CONST_DECL)
29442                   && (TREE_CODE (decl_op0) == VAR_DECL
29443                       || TREE_CODE (decl_op0) == CONST_DECL))
29444                 return (get_variable_section (decl_op1, false)
29445                         != get_variable_section (decl_op0, false));
29446
29447               if (TREE_CODE (decl_op1) == LABEL_DECL
29448                   && TREE_CODE (decl_op0) == LABEL_DECL)
29449                 return (DECL_CONTEXT (decl_op1)
29450                         != DECL_CONTEXT (decl_op0));
29451             }
29452
29453           return true;
29454         }
29455     }
29456
29457   return false;
29458 }
29459
29460 /* return TRUE if x is a reference to a value in a constant pool */
29461 extern bool
29462 arm_is_constant_pool_ref (rtx x)
29463 {
29464   return (MEM_P (x)
29465           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29466           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29467 }
29468
29469 /* If MEM is in the form of [base+offset], extract the two parts
29470    of address and set to BASE and OFFSET, otherwise return false
29471    after clearing BASE and OFFSET.  */
29472
29473 static bool
29474 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29475 {
29476   rtx addr;
29477
29478   gcc_assert (MEM_P (mem));
29479
29480   addr = XEXP (mem, 0);
29481
29482   /* Strip off const from addresses like (const (addr)).  */
29483   if (GET_CODE (addr) == CONST)
29484     addr = XEXP (addr, 0);
29485
29486   if (GET_CODE (addr) == REG)
29487     {
29488       *base = addr;
29489       *offset = const0_rtx;
29490       return true;
29491     }
29492
29493   if (GET_CODE (addr) == PLUS
29494       && GET_CODE (XEXP (addr, 0)) == REG
29495       && CONST_INT_P (XEXP (addr, 1)))
29496     {
29497       *base = XEXP (addr, 0);
29498       *offset = XEXP (addr, 1);
29499       return true;
29500     }
29501
29502   *base = NULL_RTX;
29503   *offset = NULL_RTX;
29504
29505   return false;
29506 }
29507
29508 /* If INSN is a load or store of address in the form of [base+offset],
29509    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
29510    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
29511    otherwise return FALSE.  */
29512
29513 static bool
29514 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29515 {
29516   rtx x, dest, src;
29517
29518   gcc_assert (INSN_P (insn));
29519   x = PATTERN (insn);
29520   if (GET_CODE (x) != SET)
29521     return false;
29522
29523   src = SET_SRC (x);
29524   dest = SET_DEST (x);
29525   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29526     {
29527       *is_load = false;
29528       extract_base_offset_in_addr (dest, base, offset);
29529     }
29530   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29531     {
29532       *is_load = true;
29533       extract_base_offset_in_addr (src, base, offset);
29534     }
29535   else
29536     return false;
29537
29538   return (*base != NULL_RTX && *offset != NULL_RTX);
29539 }
29540
29541 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29542
29543    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29544    and PRI are only calculated for these instructions.  For other instruction,
29545    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
29546    instruction fusion can be supported by returning different priorities.
29547
29548    It's important that irrelevant instructions get the largest FUSION_PRI.  */
29549
29550 static void
29551 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29552                            int *fusion_pri, int *pri)
29553 {
29554   int tmp, off_val;
29555   bool is_load;
29556   rtx base, offset;
29557
29558   gcc_assert (INSN_P (insn));
29559
29560   tmp = max_pri - 1;
29561   if (!fusion_load_store (insn, &base, &offset, &is_load))
29562     {
29563       *pri = tmp;
29564       *fusion_pri = tmp;
29565       return;
29566     }
29567
29568   /* Load goes first.  */
29569   if (is_load)
29570     *fusion_pri = tmp - 1;
29571   else
29572     *fusion_pri = tmp - 2;
29573
29574   tmp /= 2;
29575
29576   /* INSN with smaller base register goes first.  */
29577   tmp -= ((REGNO (base) & 0xff) << 20);
29578
29579   /* INSN with smaller offset goes first.  */
29580   off_val = (int)(INTVAL (offset));
29581   if (off_val >= 0)
29582     tmp -= (off_val & 0xfffff);
29583   else
29584     tmp += ((- off_val) & 0xfffff);
29585
29586   *pri = tmp;
29587   return;
29588 }
29589 #include "gt-arm.h"