gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2014 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "hash-table.h"
  27 #include "tm.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "stringpool.h"
  31 #include "stor-layout.h"
  32 #include "calls.h"
  33 #include "varasm.h"
  34 #include "obstack.h"
  35 #include "regs.h"
  36 #include "hard-reg-set.h"
  37 #include "insn-config.h"
  38 #include "conditions.h"
  39 #include "output.h"
  40 #include "insn-attr.h"
  41 #include "flags.h"
  42 #include "reload.h"
  43 #include "hashtab.h"
  44 #include "hash-set.h"
  45 #include "vec.h"
  46 #include "machmode.h"
  47 #include "input.h"
  48 #include "function.h"
  49 #include "expr.h"
  50 #include "insn-codes.h"
  51 #include "optabs.h"
  52 #include "diagnostic-core.h"
  53 #include "recog.h"
  54 #include "predict.h"
  55 #include "dominance.h"
  56 #include "cfg.h"
  57 #include "cfgrtl.h"
  58 #include "cfganal.h"
  59 #include "lcm.h"
  60 #include "cfgbuild.h"
  61 #include "cfgcleanup.h"
  62 #include "basic-block.h"
  63 #include "hash-map.h"
  64 #include "is-a.h"
  65 #include "plugin-api.h"
  66 #include "ipa-ref.h"
  67 #include "cgraph.h"
  68 #include "ggc.h"
  69 #include "except.h"
  70 #include "tm_p.h"
  71 #include "target.h"
  72 #include "sched-int.h"
  73 #include "target-def.h"
  74 #include "debug.h"
  75 #include "langhooks.h"
  76 #include "df.h"
  77 #include "intl.h"
  78 #include "libfuncs.h"
  79 #include "params.h"
  80 #include "opts.h"
  81 #include "dumpfile.h"
  82 #include "gimple-expr.h"
  83 #include "builtins.h"
  84 #include "tm-constrs.h"
  85 #include "rtl-iter.h"
  86
  87 /* Forward definitions of types.  */
  88 typedef struct minipool_node    Mnode;
  89 typedef struct minipool_fixup   Mfix;
  90
  91 void (*arm_lang_output_object_attributes_hook)(void);
  92
  93 struct four_ints
  94 {
  95   int i[4];
  96 };
  97
  98 /* Forward function declarations.  */
  99 static bool arm_const_not_ok_for_debug_p (rtx);
 100 static bool arm_lra_p (void);
 101 static bool arm_needs_doubleword_align (machine_mode, const_tree);
 102 static int arm_compute_static_chain_stack_bytes (void);
 103 static arm_stack_offsets *arm_get_frame_offsets (void);
 104 static void arm_add_gc_roots (void);
 105 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 106                              HOST_WIDE_INT, rtx, rtx, int, int);
 107 static unsigned bit_count (unsigned long);
 108 static int arm_address_register_rtx_p (rtx, int);
 109 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 110 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 111 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 112 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 113 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 114 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 115 inline static int thumb1_index_register_rtx_p (rtx, int);
 116 static int thumb_far_jump_used_p (void);
 117 static bool thumb_force_lr_save (void);
 118 static unsigned arm_size_return_regs (void);
 119 static bool arm_assemble_integer (rtx, unsigned int, int);
 120 static void arm_print_operand (FILE *, rtx, int);
 121 static void arm_print_operand_address (FILE *, rtx);
 122 static bool arm_print_operand_punct_valid_p (unsigned char code);
 123 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 124 static arm_cc get_arm_condition_code (rtx);
 125 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
 126 static const char *output_multi_immediate (rtx *, const char *, const char *,
 127                                            int, HOST_WIDE_INT);
 128 static const char *shift_op (rtx, HOST_WIDE_INT *);
 129 static struct machine_function *arm_init_machine_status (void);
 130 static void thumb_exit (FILE *, int);
 131 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 132 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 133 static Mnode *add_minipool_forward_ref (Mfix *);
 134 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 135 static Mnode *add_minipool_backward_ref (Mfix *);
 136 static void assign_minipool_offsets (Mfix *);
 137 static void arm_print_value (FILE *, rtx);
 138 static void dump_minipool (rtx_insn *);
 139 static int arm_barrier_cost (rtx);
 140 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 141 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 142 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 143                                machine_mode, rtx);
 144 static void arm_reorg (void);
 145 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 146 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 147 static unsigned long arm_compute_save_reg_mask (void);
 148 static unsigned long arm_isr_value (tree);
 149 static unsigned long arm_compute_func_type (void);
 150 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 151 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 152 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 153 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 154 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 155 #endif
 156 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
 157 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
 158 static int arm_comp_type_attributes (const_tree, const_tree);
 159 static void arm_set_default_type_attributes (tree);
 160 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
 161 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 162 static int optimal_immediate_sequence (enum rtx_code code,
 163                                        unsigned HOST_WIDE_INT val,
 164                                        struct four_ints *return_sequence);
 165 static int optimal_immediate_sequence_1 (enum rtx_code code,
 166                                          unsigned HOST_WIDE_INT val,
 167                                          struct four_ints *return_sequence,
 168                                          int i);
 169 static int arm_get_strip_length (int);
 170 static bool arm_function_ok_for_sibcall (tree, tree);
 171 static machine_mode arm_promote_function_mode (const_tree,
 172                                                     machine_mode, int *,
 173                                                     const_tree, int);
 174 static bool arm_return_in_memory (const_tree, const_tree);
 175 static rtx arm_function_value (const_tree, const_tree, bool);
 176 static rtx arm_libcall_value_1 (machine_mode);
 177 static rtx arm_libcall_value (machine_mode, const_rtx);
 178 static bool arm_function_value_regno_p (const unsigned int);
 179 static void arm_internal_label (FILE *, const char *, unsigned long);
 180 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 181                                  tree);
 182 static bool arm_have_conditional_execution (void);
 183 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 184 static bool arm_legitimate_constant_p (machine_mode, rtx);
 185 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
 186 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
 187 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 188 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 189 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 190 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 191 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
 192 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 193 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 194 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 195 static void emit_constant_insn (rtx cond, rtx pattern);
 196 static rtx_insn *emit_set_insn (rtx, rtx);
 197 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 198 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 199                                   tree, bool);
 200 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 201                              const_tree, bool);
 202 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 203                                       const_tree, bool);
 204 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 205 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 206                                       const_tree);
 207 static rtx aapcs_libcall_value (machine_mode);
 208 static int aapcs_select_return_coproc (const_tree, const_tree);
 209
 210 #ifdef OBJECT_FORMAT_ELF
 211 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 212 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 213 #endif
 214 #ifndef ARM_PE
 215 static void arm_encode_section_info (tree, rtx, int);
 216 #endif
 217
 218 static void arm_file_end (void);
 219 static void arm_file_start (void);
 220
 221 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 222                                         tree, int *, int);
 223 static bool arm_pass_by_reference (cumulative_args_t,
 224                                    machine_mode, const_tree, bool);
 225 static bool arm_promote_prototypes (const_tree);
 226 static bool arm_default_short_enums (void);
 227 static bool arm_align_anon_bitfield (void);
 228 static bool arm_return_in_msb (const_tree);
 229 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 230 static bool arm_return_in_memory (const_tree, const_tree);
 231 #if ARM_UNWIND_INFO
 232 static void arm_unwind_emit (FILE *, rtx_insn *);
 233 static bool arm_output_ttype (rtx);
 234 static void arm_asm_emit_except_personality (rtx);
 235 static void arm_asm_init_sections (void);
 236 #endif
 237 static rtx arm_dwarf_register_span (rtx);
 238
 239 static tree arm_cxx_guard_type (void);
 240 static bool arm_cxx_guard_mask_bit (void);
 241 static tree arm_get_cookie_size (tree);
 242 static bool arm_cookie_has_size (void);
 243 static bool arm_cxx_cdtor_returns_this (void);
 244 static bool arm_cxx_key_method_may_be_inline (void);
 245 static void arm_cxx_determine_class_data_visibility (tree);
 246 static bool arm_cxx_class_data_always_comdat (void);
 247 static bool arm_cxx_use_aeabi_atexit (void);
 248 static void arm_init_libfuncs (void);
 249 static tree arm_build_builtin_va_list (void);
 250 static void arm_expand_builtin_va_start (tree, rtx);
 251 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 252 static void arm_option_override (void);
 253 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 254 static bool arm_cannot_copy_insn_p (rtx_insn *);
 255 static int arm_issue_rate (void);
 256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 257 static bool arm_output_addr_const_extra (FILE *, rtx);
 258 static bool arm_allocate_stack_slots_for_args (void);
 259 static bool arm_warn_func_return (tree);
 260 static const char *arm_invalid_parameter_type (const_tree t);
 261 static const char *arm_invalid_return_type (const_tree t);
 262 static tree arm_promoted_type (const_tree t);
 263 static tree arm_convert_to_type (tree type, tree expr);
 264 static bool arm_scalar_mode_supported_p (machine_mode);
 265 static bool arm_frame_pointer_required (void);
 266 static bool arm_can_eliminate (const int, const int);
 267 static void arm_asm_trampoline_template (FILE *);
 268 static void arm_trampoline_init (rtx, tree, rtx);
 269 static rtx arm_trampoline_adjust_address (rtx);
 270 static rtx arm_pic_static_addr (rtx orig, rtx reg);
 271 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 272 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 273 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 274 static bool arm_array_mode_supported_p (machine_mode,
 275                                         unsigned HOST_WIDE_INT);
 276 static machine_mode arm_preferred_simd_mode (machine_mode);
 277 static bool arm_class_likely_spilled_p (reg_class_t);
 278 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 279 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 280 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 281                                                      const_tree type,
 282                                                      int misalignment,
 283                                                      bool is_packed);
 284 static void arm_conditional_register_usage (void);
 285 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 286 static unsigned int arm_autovectorize_vector_sizes (void);
 287 static int arm_default_branch_cost (bool, bool);
 288 static int arm_cortex_a5_branch_cost (bool, bool);
 289 static int arm_cortex_m_branch_cost (bool, bool);
 290
 291 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
 292                                              const unsigned char *sel);
 293
 294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 295                                            tree vectype,
 296                                            int misalign ATTRIBUTE_UNUSED);
 297 static unsigned arm_add_stmt_cost (void *data, int count,
 298                                    enum vect_cost_for_stmt kind,
 299                                    struct _stmt_vec_info *stmt_info,
 300                                    int misalign,
 301                                    enum vect_cost_model_location where);
 302
 303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 304                                          bool op0_preserve_value);
 305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 306
 307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 308 \f
 309 /* Table of machine attributes.  */
 310 static const struct attribute_spec arm_attribute_table[] =
 311 {
 312   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 313        affects_type_identity } */
 314   /* Function calls made to this symbol must be done indirectly, because
 315      it may lie outside of the 26 bit addressing range of a normal function
 316      call.  */
 317   { "long_call",    0, 0, false, true,  true,  NULL, false },
 318   /* Whereas these functions are always known to reside within the 26 bit
 319      addressing range.  */
 320   { "short_call",   0, 0, false, true,  true,  NULL, false },
 321   /* Specify the procedure call conventions for a function.  */
 322   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 323     false },
 324   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 325   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 326     false },
 327   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 328     false },
 329   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 330     false },
 331 #ifdef ARM_PE
 332   /* ARM/PE has three new attributes:
 333      interfacearm - ?
 334      dllexport - for exporting a function/variable that will live in a dll
 335      dllimport - for importing a function/variable from a dll
 336
 337      Microsoft allows multiple declspecs in one __declspec, separating
 338      them with spaces.  We do NOT support this.  Instead, use __declspec
 339      multiple times.
 340   */
 341   { "dllimport",    0, 0, true,  false, false, NULL, false },
 342   { "dllexport",    0, 0, true,  false, false, NULL, false },
 343   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 344     false },
 345 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 346   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 347   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 348   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 349     false },
 350 #endif
 351   { NULL,           0, 0, false, false, false, NULL, false }
 352 };
 353 \f
 354 /* Initialize the GCC target structure.  */
 355 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 356 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 357 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 358 #endif
 359
 360 #undef TARGET_LEGITIMIZE_ADDRESS
 361 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 362
 363 #undef TARGET_LRA_P
 364 #define TARGET_LRA_P arm_lra_p
 365
 366 #undef  TARGET_ATTRIBUTE_TABLE
 367 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 368
 369 #undef TARGET_ASM_FILE_START
 370 #define TARGET_ASM_FILE_START arm_file_start
 371 #undef TARGET_ASM_FILE_END
 372 #define TARGET_ASM_FILE_END arm_file_end
 373
 374 #undef  TARGET_ASM_ALIGNED_SI_OP
 375 #define TARGET_ASM_ALIGNED_SI_OP NULL
 376 #undef  TARGET_ASM_INTEGER
 377 #define TARGET_ASM_INTEGER arm_assemble_integer
 378
 379 #undef TARGET_PRINT_OPERAND
 380 #define TARGET_PRINT_OPERAND arm_print_operand
 381 #undef TARGET_PRINT_OPERAND_ADDRESS
 382 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 383 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 384 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 385
 386 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 387 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 388
 389 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 390 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 391
 392 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 393 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 394
 395 #undef  TARGET_OPTION_OVERRIDE
 396 #define TARGET_OPTION_OVERRIDE arm_option_override
 397
 398 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 399 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 400
 401 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 402 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 403
 404 #undef  TARGET_SCHED_ADJUST_COST
 405 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 406
 407 #undef TARGET_SCHED_REORDER
 408 #define TARGET_SCHED_REORDER arm_sched_reorder
 409
 410 #undef TARGET_REGISTER_MOVE_COST
 411 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 412
 413 #undef TARGET_MEMORY_MOVE_COST
 414 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 415
 416 #undef TARGET_ENCODE_SECTION_INFO
 417 #ifdef ARM_PE
 418 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 419 #else
 420 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 421 #endif
 422
 423 #undef  TARGET_STRIP_NAME_ENCODING
 424 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 425
 426 #undef  TARGET_ASM_INTERNAL_LABEL
 427 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 428
 429 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 430 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 431
 432 #undef  TARGET_FUNCTION_VALUE
 433 #define TARGET_FUNCTION_VALUE arm_function_value
 434
 435 #undef  TARGET_LIBCALL_VALUE
 436 #define TARGET_LIBCALL_VALUE arm_libcall_value
 437
 438 #undef TARGET_FUNCTION_VALUE_REGNO_P
 439 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 440
 441 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 442 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 443 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 444 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
 445
 446 #undef  TARGET_RTX_COSTS
 447 #define TARGET_RTX_COSTS arm_rtx_costs
 448 #undef  TARGET_ADDRESS_COST
 449 #define TARGET_ADDRESS_COST arm_address_cost
 450
 451 #undef TARGET_SHIFT_TRUNCATION_MASK
 452 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 453 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 454 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 455 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 456 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 457 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 458 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 459 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 460 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 461   arm_autovectorize_vector_sizes
 462
 463 #undef  TARGET_MACHINE_DEPENDENT_REORG
 464 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 465
 466 #undef  TARGET_INIT_BUILTINS
 467 #define TARGET_INIT_BUILTINS  arm_init_builtins
 468 #undef  TARGET_EXPAND_BUILTIN
 469 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 470 #undef  TARGET_BUILTIN_DECL
 471 #define TARGET_BUILTIN_DECL arm_builtin_decl
 472
 473 #undef TARGET_INIT_LIBFUNCS
 474 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 475
 476 #undef TARGET_PROMOTE_FUNCTION_MODE
 477 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 478 #undef TARGET_PROMOTE_PROTOTYPES
 479 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 480 #undef TARGET_PASS_BY_REFERENCE
 481 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 482 #undef TARGET_ARG_PARTIAL_BYTES
 483 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 484 #undef TARGET_FUNCTION_ARG
 485 #define TARGET_FUNCTION_ARG arm_function_arg
 486 #undef TARGET_FUNCTION_ARG_ADVANCE
 487 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 488 #undef TARGET_FUNCTION_ARG_BOUNDARY
 489 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 490
 491 #undef  TARGET_SETUP_INCOMING_VARARGS
 492 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 493
 494 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 495 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 496
 497 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 498 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 499 #undef TARGET_TRAMPOLINE_INIT
 500 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 501 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 502 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 503
 504 #undef TARGET_WARN_FUNC_RETURN
 505 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 506
 507 #undef TARGET_DEFAULT_SHORT_ENUMS
 508 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 509
 510 #undef TARGET_ALIGN_ANON_BITFIELD
 511 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 512
 513 #undef TARGET_NARROW_VOLATILE_BITFIELD
 514 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 515
 516 #undef TARGET_CXX_GUARD_TYPE
 517 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 518
 519 #undef TARGET_CXX_GUARD_MASK_BIT
 520 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 521
 522 #undef TARGET_CXX_GET_COOKIE_SIZE
 523 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 524
 525 #undef TARGET_CXX_COOKIE_HAS_SIZE
 526 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 527
 528 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 529 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 530
 531 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 532 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 533
 534 #undef TARGET_CXX_USE_AEABI_ATEXIT
 535 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 536
 537 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 538 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 539   arm_cxx_determine_class_data_visibility
 540
 541 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 542 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 543
 544 #undef TARGET_RETURN_IN_MSB
 545 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 546
 547 #undef TARGET_RETURN_IN_MEMORY
 548 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 549
 550 #undef TARGET_MUST_PASS_IN_STACK
 551 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 552
 553 #if ARM_UNWIND_INFO
 554 #undef TARGET_ASM_UNWIND_EMIT
 555 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 556
 557 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 558 #undef TARGET_ASM_TTYPE
 559 #define TARGET_ASM_TTYPE arm_output_ttype
 560
 561 #undef TARGET_ARM_EABI_UNWINDER
 562 #define TARGET_ARM_EABI_UNWINDER true
 563
 564 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 565 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 566
 567 #undef TARGET_ASM_INIT_SECTIONS
 568 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 569 #endif /* ARM_UNWIND_INFO */
 570
 571 #undef TARGET_DWARF_REGISTER_SPAN
 572 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 573
 574 #undef  TARGET_CANNOT_COPY_INSN_P
 575 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 576
 577 #ifdef HAVE_AS_TLS
 578 #undef TARGET_HAVE_TLS
 579 #define TARGET_HAVE_TLS true
 580 #endif
 581
 582 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 583 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 584
 585 #undef TARGET_LEGITIMATE_CONSTANT_P
 586 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 587
 588 #undef TARGET_CANNOT_FORCE_CONST_MEM
 589 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 590
 591 #undef TARGET_MAX_ANCHOR_OFFSET
 592 #define TARGET_MAX_ANCHOR_OFFSET 4095
 593
 594 /* The minimum is set such that the total size of the block
 595    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 596    divisible by eight, ensuring natural spacing of anchors.  */
 597 #undef TARGET_MIN_ANCHOR_OFFSET
 598 #define TARGET_MIN_ANCHOR_OFFSET -4088
 599
 600 #undef TARGET_SCHED_ISSUE_RATE
 601 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 602
 603 #undef TARGET_MANGLE_TYPE
 604 #define TARGET_MANGLE_TYPE arm_mangle_type
 605
 606 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 607 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 608
 609 #undef TARGET_BUILD_BUILTIN_VA_LIST
 610 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 611 #undef TARGET_EXPAND_BUILTIN_VA_START
 612 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 613 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 614 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 615
 616 #ifdef HAVE_AS_TLS
 617 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 618 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 619 #endif
 620
 621 #undef TARGET_LEGITIMATE_ADDRESS_P
 622 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 623
 624 #undef TARGET_PREFERRED_RELOAD_CLASS
 625 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 626
 627 #undef TARGET_INVALID_PARAMETER_TYPE
 628 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
 629
 630 #undef TARGET_INVALID_RETURN_TYPE
 631 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
 632
 633 #undef TARGET_PROMOTED_TYPE
 634 #define TARGET_PROMOTED_TYPE arm_promoted_type
 635
 636 #undef TARGET_CONVERT_TO_TYPE
 637 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
 638
 639 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 640 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 641
 642 #undef TARGET_FRAME_POINTER_REQUIRED
 643 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 644
 645 #undef TARGET_CAN_ELIMINATE
 646 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 647
 648 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 649 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 650
 651 #undef TARGET_CLASS_LIKELY_SPILLED_P
 652 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 653
 654 #undef TARGET_VECTORIZE_BUILTINS
 655 #define TARGET_VECTORIZE_BUILTINS
 656
 657 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 658 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 659   arm_builtin_vectorized_function
 660
 661 #undef TARGET_VECTOR_ALIGNMENT
 662 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 663
 664 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 665 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 666   arm_vector_alignment_reachable
 667
 668 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 669 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 670   arm_builtin_support_vector_misalignment
 671
 672 #undef TARGET_PREFERRED_RENAME_CLASS
 673 #define TARGET_PREFERRED_RENAME_CLASS \
 674   arm_preferred_rename_class
 675
 676 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 677 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 678   arm_vectorize_vec_perm_const_ok
 679
 680 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 681 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 682   arm_builtin_vectorization_cost
 683 #undef TARGET_VECTORIZE_ADD_STMT_COST
 684 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 685
 686 #undef TARGET_CANONICALIZE_COMPARISON
 687 #define TARGET_CANONICALIZE_COMPARISON \
 688   arm_canonicalize_comparison
 689
 690 #undef TARGET_ASAN_SHADOW_OFFSET
 691 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 692
 693 #undef MAX_INSN_PER_IT_BLOCK
 694 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 695
 696 #undef TARGET_CAN_USE_DOLOOP_P
 697 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 698
 699 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 700 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 701
 702 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 703 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 704
 705 #undef TARGET_SCHED_FUSION_PRIORITY
 706 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 707
 708 struct gcc_target targetm = TARGET_INITIALIZER;
 709 \f
 710 /* Obstack for minipool constant handling.  */
 711 static struct obstack minipool_obstack;
 712 static char *         minipool_startobj;
 713
 714 /* The maximum number of insns skipped which
 715    will be conditionalised if possible.  */
 716 static int max_insns_skipped = 5;
 717
 718 extern FILE * asm_out_file;
 719
 720 /* True if we are currently building a constant table.  */
 721 int making_const_table;
 722
 723 /* The processor for which instructions should be scheduled.  */
 724 enum processor_type arm_tune = arm_none;
 725
 726 /* The current tuning set.  */
 727 const struct tune_params *current_tune;
 728
 729 /* Which floating point hardware to schedule for.  */
 730 int arm_fpu_attr;
 731
 732 /* Which floating popint hardware to use.  */
 733 const struct arm_fpu_desc *arm_fpu_desc;
 734
 735 /* Used for Thumb call_via trampolines.  */
 736 rtx thumb_call_via_label[14];
 737 static int thumb_call_reg_needed;
 738
 739 /* The bits in this mask specify which
 740    instructions we are allowed to generate.  */
 741 unsigned long insn_flags = 0;
 742
 743 /* The bits in this mask specify which instruction scheduling options should
 744    be used.  */
 745 unsigned long tune_flags = 0;
 746
 747 /* The highest ARM architecture version supported by the
 748    target.  */
 749 enum base_architecture arm_base_arch = BASE_ARCH_0;
 750
 751 /* The following are used in the arm.md file as equivalents to bits
 752    in the above two flag variables.  */
 753
 754 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 755 int arm_arch3m = 0;
 756
 757 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 758 int arm_arch4 = 0;
 759
 760 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 761 int arm_arch4t = 0;
 762
 763 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 764 int arm_arch5 = 0;
 765
 766 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 767 int arm_arch5e = 0;
 768
 769 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 770 int arm_arch6 = 0;
 771
 772 /* Nonzero if this chip supports the ARM 6K extensions.  */
 773 int arm_arch6k = 0;
 774
 775 /* Nonzero if instructions present in ARMv6-M can be used.  */
 776 int arm_arch6m = 0;
 777
 778 /* Nonzero if this chip supports the ARM 7 extensions.  */
 779 int arm_arch7 = 0;
 780
 781 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 782 int arm_arch_notm = 0;
 783
 784 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 785 int arm_arch7em = 0;
 786
 787 /* Nonzero if instructions present in ARMv8 can be used.  */
 788 int arm_arch8 = 0;
 789
 790 /* Nonzero if this chip can benefit from load scheduling.  */
 791 int arm_ld_sched = 0;
 792
 793 /* Nonzero if this chip is a StrongARM.  */
 794 int arm_tune_strongarm = 0;
 795
 796 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 797 int arm_arch_iwmmxt = 0;
 798
 799 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 800 int arm_arch_iwmmxt2 = 0;
 801
 802 /* Nonzero if this chip is an XScale.  */
 803 int arm_arch_xscale = 0;
 804
 805 /* Nonzero if tuning for XScale  */
 806 int arm_tune_xscale = 0;
 807
 808 /* Nonzero if we want to tune for stores that access the write-buffer.
 809    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 810 int arm_tune_wbuf = 0;
 811
 812 /* Nonzero if tuning for Cortex-A9.  */
 813 int arm_tune_cortex_a9 = 0;
 814
 815 /* Nonzero if generating Thumb instructions.  */
 816 int thumb_code = 0;
 817
 818 /* Nonzero if generating Thumb-1 instructions.  */
 819 int thumb1_code = 0;
 820
 821 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 822    preprocessor.
 823    XXX This is a bit of a hack, it's intended to help work around
 824    problems in GLD which doesn't understand that armv5t code is
 825    interworking clean.  */
 826 int arm_cpp_interwork = 0;
 827
 828 /* Nonzero if chip supports Thumb 2.  */
 829 int arm_arch_thumb2;
 830
 831 /* Nonzero if chip supports integer division instruction.  */
 832 int arm_arch_arm_hwdiv;
 833 int arm_arch_thumb_hwdiv;
 834
 835 /* Nonzero if we should use Neon to handle 64-bits operations rather
 836    than core registers.  */
 837 int prefer_neon_for_64bits = 0;
 838
 839 /* Nonzero if we shouldn't use literal pools.  */
 840 bool arm_disable_literal_pool = false;
 841
 842 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
 843    we must report the mode of the memory reference from
 844    TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
 845 machine_mode output_memory_reference_mode;
 846
 847 /* The register number to be used for the PIC offset register.  */
 848 unsigned arm_pic_register = INVALID_REGNUM;
 849
 850 enum arm_pcs arm_pcs_default;
 851
 852 /* For an explanation of these variables, see final_prescan_insn below.  */
 853 int arm_ccfsm_state;
 854 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 855 enum arm_cond_code arm_current_cc;
 856
 857 rtx arm_target_insn;
 858 int arm_target_label;
 859 /* The number of conditionally executed insns, including the current insn.  */
 860 int arm_condexec_count = 0;
 861 /* A bitmask specifying the patterns for the IT block.
 862    Zero means do not output an IT block before this insn. */
 863 int arm_condexec_mask = 0;
 864 /* The number of bits used in arm_condexec_mask.  */
 865 int arm_condexec_masklen = 0;
 866
 867 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 868 int arm_arch_crc = 0;
 869
 870 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 871 int arm_m_profile_small_mul = 0;
 872
 873 /* The condition codes of the ARM, and the inverse function.  */
 874 static const char * const arm_condition_codes[] =
 875 {
 876   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 877   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 878 };
 879
 880 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 881 int arm_regs_in_sequence[] =
 882 {
 883   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 884 };
 885
 886 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
 887 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 888
 889 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 890                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 891                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 892 \f
 893 /* Initialization code.  */
 894
 895 struct processors
 896 {
 897   const char *const name;
 898   enum processor_type core;
 899   const char *arch;
 900   enum base_architecture base_arch;
 901   const unsigned long flags;
 902   const struct tune_params *const tune;
 903 };
 904
 905
 906 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
 907 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
 908   prefetch_slots, \
 909   l1_size, \
 910   l1_line_size
 911
 912 /* arm generic vectorizer costs.  */
 913 static const
 914 struct cpu_vec_costs arm_default_vec_cost = {
 915   1,                                    /* scalar_stmt_cost.  */
 916   1,                                    /* scalar load_cost.  */
 917   1,                                    /* scalar_store_cost.  */
 918   1,                                    /* vec_stmt_cost.  */
 919   1,                                    /* vec_to_scalar_cost.  */
 920   1,                                    /* scalar_to_vec_cost.  */
 921   1,                                    /* vec_align_load_cost.  */
 922   1,                                    /* vec_unalign_load_cost.  */
 923   1,                                    /* vec_unalign_store_cost.  */
 924   1,                                    /* vec_store_cost.  */
 925   3,                                    /* cond_taken_branch_cost.  */
 926   1,                                    /* cond_not_taken_branch_cost.  */
 927 };
 928
 929 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
 930 #include "aarch-cost-tables.h"
 931
 932
 933
 934 const struct cpu_cost_table cortexa9_extra_costs =
 935 {
 936   /* ALU */
 937   {
 938     0,                  /* arith.  */
 939     0,                  /* logical.  */
 940     0,                  /* shift.  */
 941     COSTS_N_INSNS (1),  /* shift_reg.  */
 942     COSTS_N_INSNS (1),  /* arith_shift.  */
 943     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
 944     0,                  /* log_shift.  */
 945     COSTS_N_INSNS (1),  /* log_shift_reg.  */
 946     COSTS_N_INSNS (1),  /* extend.  */
 947     COSTS_N_INSNS (2),  /* extend_arith.  */
 948     COSTS_N_INSNS (1),  /* bfi.  */
 949     COSTS_N_INSNS (1),  /* bfx.  */
 950     0,                  /* clz.  */
 951     0,                  /* rev.  */
 952     0,                  /* non_exec.  */
 953     true                /* non_exec_costs_exec.  */
 954   },
 955   {
 956     /* MULT SImode */
 957     {
 958       COSTS_N_INSNS (3),        /* simple.  */
 959       COSTS_N_INSNS (3),        /* flag_setting.  */
 960       COSTS_N_INSNS (2),        /* extend.  */
 961       COSTS_N_INSNS (3),        /* add.  */
 962       COSTS_N_INSNS (2),        /* extend_add.  */
 963       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
 964     },
 965     /* MULT DImode */
 966     {
 967       0,                        /* simple (N/A).  */
 968       0,                        /* flag_setting (N/A).  */
 969       COSTS_N_INSNS (4),        /* extend.  */
 970       0,                        /* add (N/A).  */
 971       COSTS_N_INSNS (4),        /* extend_add.  */
 972       0                         /* idiv (N/A).  */
 973     }
 974   },
 975   /* LD/ST */
 976   {
 977     COSTS_N_INSNS (2),  /* load.  */
 978     COSTS_N_INSNS (2),  /* load_sign_extend.  */
 979     COSTS_N_INSNS (2),  /* ldrd.  */
 980     COSTS_N_INSNS (2),  /* ldm_1st.  */
 981     1,                  /* ldm_regs_per_insn_1st.  */
 982     2,                  /* ldm_regs_per_insn_subsequent.  */
 983     COSTS_N_INSNS (5),  /* loadf.  */
 984     COSTS_N_INSNS (5),  /* loadd.  */
 985     COSTS_N_INSNS (1),  /* load_unaligned.  */
 986     COSTS_N_INSNS (2),  /* store.  */
 987     COSTS_N_INSNS (2),  /* strd.  */
 988     COSTS_N_INSNS (2),  /* stm_1st.  */
 989     1,                  /* stm_regs_per_insn_1st.  */
 990     2,                  /* stm_regs_per_insn_subsequent.  */
 991     COSTS_N_INSNS (1),  /* storef.  */
 992     COSTS_N_INSNS (1),  /* stored.  */
 993     COSTS_N_INSNS (1)   /* store_unaligned.  */
 994   },
 995   {
 996     /* FP SFmode */
 997     {
 998       COSTS_N_INSNS (14),       /* div.  */
 999       COSTS_N_INSNS (4),        /* mult.  */
1000       COSTS_N_INSNS (7),        /* mult_addsub. */
1001       COSTS_N_INSNS (30),       /* fma.  */
1002       COSTS_N_INSNS (3),        /* addsub.  */
1003       COSTS_N_INSNS (1),        /* fpconst.  */
1004       COSTS_N_INSNS (1),        /* neg.  */
1005       COSTS_N_INSNS (3),        /* compare.  */
1006       COSTS_N_INSNS (3),        /* widen.  */
1007       COSTS_N_INSNS (3),        /* narrow.  */
1008       COSTS_N_INSNS (3),        /* toint.  */
1009       COSTS_N_INSNS (3),        /* fromint.  */
1010       COSTS_N_INSNS (3)         /* roundint.  */
1011     },
1012     /* FP DFmode */
1013     {
1014       COSTS_N_INSNS (24),       /* div.  */
1015       COSTS_N_INSNS (5),        /* mult.  */
1016       COSTS_N_INSNS (8),        /* mult_addsub.  */
1017       COSTS_N_INSNS (30),       /* fma.  */
1018       COSTS_N_INSNS (3),        /* addsub.  */
1019       COSTS_N_INSNS (1),        /* fpconst.  */
1020       COSTS_N_INSNS (1),        /* neg.  */
1021       COSTS_N_INSNS (3),        /* compare.  */
1022       COSTS_N_INSNS (3),        /* widen.  */
1023       COSTS_N_INSNS (3),        /* narrow.  */
1024       COSTS_N_INSNS (3),        /* toint.  */
1025       COSTS_N_INSNS (3),        /* fromint.  */
1026       COSTS_N_INSNS (3)         /* roundint.  */
1027     }
1028   },
1029   /* Vector */
1030   {
1031     COSTS_N_INSNS (1)   /* alu.  */
1032   }
1033 };
1034
1035 const struct cpu_cost_table cortexa8_extra_costs =
1036 {
1037   /* ALU */
1038   {
1039     0,                  /* arith.  */
1040     0,                  /* logical.  */
1041     COSTS_N_INSNS (1),  /* shift.  */
1042     0,                  /* shift_reg.  */
1043     COSTS_N_INSNS (1),  /* arith_shift.  */
1044     0,                  /* arith_shift_reg.  */
1045     COSTS_N_INSNS (1),  /* log_shift.  */
1046     0,                  /* log_shift_reg.  */
1047     0,                  /* extend.  */
1048     0,                  /* extend_arith.  */
1049     0,                  /* bfi.  */
1050     0,                  /* bfx.  */
1051     0,                  /* clz.  */
1052     0,                  /* rev.  */
1053     0,                  /* non_exec.  */
1054     true                /* non_exec_costs_exec.  */
1055   },
1056   {
1057     /* MULT SImode */
1058     {
1059       COSTS_N_INSNS (1),        /* simple.  */
1060       COSTS_N_INSNS (1),        /* flag_setting.  */
1061       COSTS_N_INSNS (1),        /* extend.  */
1062       COSTS_N_INSNS (1),        /* add.  */
1063       COSTS_N_INSNS (1),        /* extend_add.  */
1064       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1065     },
1066     /* MULT DImode */
1067     {
1068       0,                        /* simple (N/A).  */
1069       0,                        /* flag_setting (N/A).  */
1070       COSTS_N_INSNS (2),        /* extend.  */
1071       0,                        /* add (N/A).  */
1072       COSTS_N_INSNS (2),        /* extend_add.  */
1073       0                         /* idiv (N/A).  */
1074     }
1075   },
1076   /* LD/ST */
1077   {
1078     COSTS_N_INSNS (1),  /* load.  */
1079     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1080     COSTS_N_INSNS (1),  /* ldrd.  */
1081     COSTS_N_INSNS (1),  /* ldm_1st.  */
1082     1,                  /* ldm_regs_per_insn_1st.  */
1083     2,                  /* ldm_regs_per_insn_subsequent.  */
1084     COSTS_N_INSNS (1),  /* loadf.  */
1085     COSTS_N_INSNS (1),  /* loadd.  */
1086     COSTS_N_INSNS (1),  /* load_unaligned.  */
1087     COSTS_N_INSNS (1),  /* store.  */
1088     COSTS_N_INSNS (1),  /* strd.  */
1089     COSTS_N_INSNS (1),  /* stm_1st.  */
1090     1,                  /* stm_regs_per_insn_1st.  */
1091     2,                  /* stm_regs_per_insn_subsequent.  */
1092     COSTS_N_INSNS (1),  /* storef.  */
1093     COSTS_N_INSNS (1),  /* stored.  */
1094     COSTS_N_INSNS (1)   /* store_unaligned.  */
1095   },
1096   {
1097     /* FP SFmode */
1098     {
1099       COSTS_N_INSNS (36),       /* div.  */
1100       COSTS_N_INSNS (11),       /* mult.  */
1101       COSTS_N_INSNS (20),       /* mult_addsub. */
1102       COSTS_N_INSNS (30),       /* fma.  */
1103       COSTS_N_INSNS (9),        /* addsub.  */
1104       COSTS_N_INSNS (3),        /* fpconst.  */
1105       COSTS_N_INSNS (3),        /* neg.  */
1106       COSTS_N_INSNS (6),        /* compare.  */
1107       COSTS_N_INSNS (4),        /* widen.  */
1108       COSTS_N_INSNS (4),        /* narrow.  */
1109       COSTS_N_INSNS (8),        /* toint.  */
1110       COSTS_N_INSNS (8),        /* fromint.  */
1111       COSTS_N_INSNS (8)         /* roundint.  */
1112     },
1113     /* FP DFmode */
1114     {
1115       COSTS_N_INSNS (64),       /* div.  */
1116       COSTS_N_INSNS (16),       /* mult.  */
1117       COSTS_N_INSNS (25),       /* mult_addsub.  */
1118       COSTS_N_INSNS (30),       /* fma.  */
1119       COSTS_N_INSNS (9),        /* addsub.  */
1120       COSTS_N_INSNS (3),        /* fpconst.  */
1121       COSTS_N_INSNS (3),        /* neg.  */
1122       COSTS_N_INSNS (6),        /* compare.  */
1123       COSTS_N_INSNS (6),        /* widen.  */
1124       COSTS_N_INSNS (6),        /* narrow.  */
1125       COSTS_N_INSNS (8),        /* toint.  */
1126       COSTS_N_INSNS (8),        /* fromint.  */
1127       COSTS_N_INSNS (8)         /* roundint.  */
1128     }
1129   },
1130   /* Vector */
1131   {
1132     COSTS_N_INSNS (1)   /* alu.  */
1133   }
1134 };
1135
1136 const struct cpu_cost_table cortexa5_extra_costs =
1137 {
1138   /* ALU */
1139   {
1140     0,                  /* arith.  */
1141     0,                  /* logical.  */
1142     COSTS_N_INSNS (1),  /* shift.  */
1143     COSTS_N_INSNS (1),  /* shift_reg.  */
1144     COSTS_N_INSNS (1),  /* arith_shift.  */
1145     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1146     COSTS_N_INSNS (1),  /* log_shift.  */
1147     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1148     COSTS_N_INSNS (1),  /* extend.  */
1149     COSTS_N_INSNS (1),  /* extend_arith.  */
1150     COSTS_N_INSNS (1),  /* bfi.  */
1151     COSTS_N_INSNS (1),  /* bfx.  */
1152     COSTS_N_INSNS (1),  /* clz.  */
1153     COSTS_N_INSNS (1),  /* rev.  */
1154     0,                  /* non_exec.  */
1155     true                /* non_exec_costs_exec.  */
1156   },
1157
1158   {
1159     /* MULT SImode */
1160     {
1161       0,                        /* simple.  */
1162       COSTS_N_INSNS (1),        /* flag_setting.  */
1163       COSTS_N_INSNS (1),        /* extend.  */
1164       COSTS_N_INSNS (1),        /* add.  */
1165       COSTS_N_INSNS (1),        /* extend_add.  */
1166       COSTS_N_INSNS (7)         /* idiv.  */
1167     },
1168     /* MULT DImode */
1169     {
1170       0,                        /* simple (N/A).  */
1171       0,                        /* flag_setting (N/A).  */
1172       COSTS_N_INSNS (1),        /* extend.  */
1173       0,                        /* add.  */
1174       COSTS_N_INSNS (2),        /* extend_add.  */
1175       0                         /* idiv (N/A).  */
1176     }
1177   },
1178   /* LD/ST */
1179   {
1180     COSTS_N_INSNS (1),  /* load.  */
1181     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1182     COSTS_N_INSNS (6),  /* ldrd.  */
1183     COSTS_N_INSNS (1),  /* ldm_1st.  */
1184     1,                  /* ldm_regs_per_insn_1st.  */
1185     2,                  /* ldm_regs_per_insn_subsequent.  */
1186     COSTS_N_INSNS (2),  /* loadf.  */
1187     COSTS_N_INSNS (4),  /* loadd.  */
1188     COSTS_N_INSNS (1),  /* load_unaligned.  */
1189     COSTS_N_INSNS (1),  /* store.  */
1190     COSTS_N_INSNS (3),  /* strd.  */
1191     COSTS_N_INSNS (1),  /* stm_1st.  */
1192     1,                  /* stm_regs_per_insn_1st.  */
1193     2,                  /* stm_regs_per_insn_subsequent.  */
1194     COSTS_N_INSNS (2),  /* storef.  */
1195     COSTS_N_INSNS (2),  /* stored.  */
1196     COSTS_N_INSNS (1)   /* store_unaligned.  */
1197   },
1198   {
1199     /* FP SFmode */
1200     {
1201       COSTS_N_INSNS (15),       /* div.  */
1202       COSTS_N_INSNS (3),        /* mult.  */
1203       COSTS_N_INSNS (7),        /* mult_addsub. */
1204       COSTS_N_INSNS (7),        /* fma.  */
1205       COSTS_N_INSNS (3),        /* addsub.  */
1206       COSTS_N_INSNS (3),        /* fpconst.  */
1207       COSTS_N_INSNS (3),        /* neg.  */
1208       COSTS_N_INSNS (3),        /* compare.  */
1209       COSTS_N_INSNS (3),        /* widen.  */
1210       COSTS_N_INSNS (3),        /* narrow.  */
1211       COSTS_N_INSNS (3),        /* toint.  */
1212       COSTS_N_INSNS (3),        /* fromint.  */
1213       COSTS_N_INSNS (3)         /* roundint.  */
1214     },
1215     /* FP DFmode */
1216     {
1217       COSTS_N_INSNS (30),       /* div.  */
1218       COSTS_N_INSNS (6),        /* mult.  */
1219       COSTS_N_INSNS (10),       /* mult_addsub.  */
1220       COSTS_N_INSNS (7),        /* fma.  */
1221       COSTS_N_INSNS (3),        /* addsub.  */
1222       COSTS_N_INSNS (3),        /* fpconst.  */
1223       COSTS_N_INSNS (3),        /* neg.  */
1224       COSTS_N_INSNS (3),        /* compare.  */
1225       COSTS_N_INSNS (3),        /* widen.  */
1226       COSTS_N_INSNS (3),        /* narrow.  */
1227       COSTS_N_INSNS (3),        /* toint.  */
1228       COSTS_N_INSNS (3),        /* fromint.  */
1229       COSTS_N_INSNS (3)         /* roundint.  */
1230     }
1231   },
1232   /* Vector */
1233   {
1234     COSTS_N_INSNS (1)   /* alu.  */
1235   }
1236 };
1237
1238
1239 const struct cpu_cost_table cortexa7_extra_costs =
1240 {
1241   /* ALU */
1242   {
1243     0,                  /* arith.  */
1244     0,                  /* logical.  */
1245     COSTS_N_INSNS (1),  /* shift.  */
1246     COSTS_N_INSNS (1),  /* shift_reg.  */
1247     COSTS_N_INSNS (1),  /* arith_shift.  */
1248     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1249     COSTS_N_INSNS (1),  /* log_shift.  */
1250     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1251     COSTS_N_INSNS (1),  /* extend.  */
1252     COSTS_N_INSNS (1),  /* extend_arith.  */
1253     COSTS_N_INSNS (1),  /* bfi.  */
1254     COSTS_N_INSNS (1),  /* bfx.  */
1255     COSTS_N_INSNS (1),  /* clz.  */
1256     COSTS_N_INSNS (1),  /* rev.  */
1257     0,                  /* non_exec.  */
1258     true                /* non_exec_costs_exec.  */
1259   },
1260
1261   {
1262     /* MULT SImode */
1263     {
1264       0,                        /* simple.  */
1265       COSTS_N_INSNS (1),        /* flag_setting.  */
1266       COSTS_N_INSNS (1),        /* extend.  */
1267       COSTS_N_INSNS (1),        /* add.  */
1268       COSTS_N_INSNS (1),        /* extend_add.  */
1269       COSTS_N_INSNS (7)         /* idiv.  */
1270     },
1271     /* MULT DImode */
1272     {
1273       0,                        /* simple (N/A).  */
1274       0,                        /* flag_setting (N/A).  */
1275       COSTS_N_INSNS (1),        /* extend.  */
1276       0,                        /* add.  */
1277       COSTS_N_INSNS (2),        /* extend_add.  */
1278       0                         /* idiv (N/A).  */
1279     }
1280   },
1281   /* LD/ST */
1282   {
1283     COSTS_N_INSNS (1),  /* load.  */
1284     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1285     COSTS_N_INSNS (3),  /* ldrd.  */
1286     COSTS_N_INSNS (1),  /* ldm_1st.  */
1287     1,                  /* ldm_regs_per_insn_1st.  */
1288     2,                  /* ldm_regs_per_insn_subsequent.  */
1289     COSTS_N_INSNS (2),  /* loadf.  */
1290     COSTS_N_INSNS (2),  /* loadd.  */
1291     COSTS_N_INSNS (1),  /* load_unaligned.  */
1292     COSTS_N_INSNS (1),  /* store.  */
1293     COSTS_N_INSNS (3),  /* strd.  */
1294     COSTS_N_INSNS (1),  /* stm_1st.  */
1295     1,                  /* stm_regs_per_insn_1st.  */
1296     2,                  /* stm_regs_per_insn_subsequent.  */
1297     COSTS_N_INSNS (2),  /* storef.  */
1298     COSTS_N_INSNS (2),  /* stored.  */
1299     COSTS_N_INSNS (1)   /* store_unaligned.  */
1300   },
1301   {
1302     /* FP SFmode */
1303     {
1304       COSTS_N_INSNS (15),       /* div.  */
1305       COSTS_N_INSNS (3),        /* mult.  */
1306       COSTS_N_INSNS (7),        /* mult_addsub. */
1307       COSTS_N_INSNS (7),        /* fma.  */
1308       COSTS_N_INSNS (3),        /* addsub.  */
1309       COSTS_N_INSNS (3),        /* fpconst.  */
1310       COSTS_N_INSNS (3),        /* neg.  */
1311       COSTS_N_INSNS (3),        /* compare.  */
1312       COSTS_N_INSNS (3),        /* widen.  */
1313       COSTS_N_INSNS (3),        /* narrow.  */
1314       COSTS_N_INSNS (3),        /* toint.  */
1315       COSTS_N_INSNS (3),        /* fromint.  */
1316       COSTS_N_INSNS (3)         /* roundint.  */
1317     },
1318     /* FP DFmode */
1319     {
1320       COSTS_N_INSNS (30),       /* div.  */
1321       COSTS_N_INSNS (6),        /* mult.  */
1322       COSTS_N_INSNS (10),       /* mult_addsub.  */
1323       COSTS_N_INSNS (7),        /* fma.  */
1324       COSTS_N_INSNS (3),        /* addsub.  */
1325       COSTS_N_INSNS (3),        /* fpconst.  */
1326       COSTS_N_INSNS (3),        /* neg.  */
1327       COSTS_N_INSNS (3),        /* compare.  */
1328       COSTS_N_INSNS (3),        /* widen.  */
1329       COSTS_N_INSNS (3),        /* narrow.  */
1330       COSTS_N_INSNS (3),        /* toint.  */
1331       COSTS_N_INSNS (3),        /* fromint.  */
1332       COSTS_N_INSNS (3)         /* roundint.  */
1333     }
1334   },
1335   /* Vector */
1336   {
1337     COSTS_N_INSNS (1)   /* alu.  */
1338   }
1339 };
1340
1341 const struct cpu_cost_table cortexa12_extra_costs =
1342 {
1343   /* ALU */
1344   {
1345     0,                  /* arith.  */
1346     0,                  /* logical.  */
1347     0,                  /* shift.  */
1348     COSTS_N_INSNS (1),  /* shift_reg.  */
1349     COSTS_N_INSNS (1),  /* arith_shift.  */
1350     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1351     COSTS_N_INSNS (1),  /* log_shift.  */
1352     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1353     0,                  /* extend.  */
1354     COSTS_N_INSNS (1),  /* extend_arith.  */
1355     0,                  /* bfi.  */
1356     COSTS_N_INSNS (1),  /* bfx.  */
1357     COSTS_N_INSNS (1),  /* clz.  */
1358     COSTS_N_INSNS (1),  /* rev.  */
1359     0,                  /* non_exec.  */
1360     true                /* non_exec_costs_exec.  */
1361   },
1362   /* MULT SImode */
1363   {
1364     {
1365       COSTS_N_INSNS (2),        /* simple.  */
1366       COSTS_N_INSNS (3),        /* flag_setting.  */
1367       COSTS_N_INSNS (2),        /* extend.  */
1368       COSTS_N_INSNS (3),        /* add.  */
1369       COSTS_N_INSNS (2),        /* extend_add.  */
1370       COSTS_N_INSNS (18)        /* idiv.  */
1371     },
1372     /* MULT DImode */
1373     {
1374       0,                        /* simple (N/A).  */
1375       0,                        /* flag_setting (N/A).  */
1376       COSTS_N_INSNS (3),        /* extend.  */
1377       0,                        /* add (N/A).  */
1378       COSTS_N_INSNS (3),        /* extend_add.  */
1379       0                         /* idiv (N/A).  */
1380     }
1381   },
1382   /* LD/ST */
1383   {
1384     COSTS_N_INSNS (3),  /* load.  */
1385     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1386     COSTS_N_INSNS (3),  /* ldrd.  */
1387     COSTS_N_INSNS (3),  /* ldm_1st.  */
1388     1,                  /* ldm_regs_per_insn_1st.  */
1389     2,                  /* ldm_regs_per_insn_subsequent.  */
1390     COSTS_N_INSNS (3),  /* loadf.  */
1391     COSTS_N_INSNS (3),  /* loadd.  */
1392     0,                  /* load_unaligned.  */
1393     0,                  /* store.  */
1394     0,                  /* strd.  */
1395     0,                  /* stm_1st.  */
1396     1,                  /* stm_regs_per_insn_1st.  */
1397     2,                  /* stm_regs_per_insn_subsequent.  */
1398     COSTS_N_INSNS (2),  /* storef.  */
1399     COSTS_N_INSNS (2),  /* stored.  */
1400     0                   /* store_unaligned.  */
1401   },
1402   {
1403     /* FP SFmode */
1404     {
1405       COSTS_N_INSNS (17),       /* div.  */
1406       COSTS_N_INSNS (4),        /* mult.  */
1407       COSTS_N_INSNS (8),        /* mult_addsub. */
1408       COSTS_N_INSNS (8),        /* fma.  */
1409       COSTS_N_INSNS (4),        /* addsub.  */
1410       COSTS_N_INSNS (2),        /* fpconst. */
1411       COSTS_N_INSNS (2),        /* neg.  */
1412       COSTS_N_INSNS (2),        /* compare.  */
1413       COSTS_N_INSNS (4),        /* widen.  */
1414       COSTS_N_INSNS (4),        /* narrow.  */
1415       COSTS_N_INSNS (4),        /* toint.  */
1416       COSTS_N_INSNS (4),        /* fromint.  */
1417       COSTS_N_INSNS (4)         /* roundint.  */
1418     },
1419     /* FP DFmode */
1420     {
1421       COSTS_N_INSNS (31),       /* div.  */
1422       COSTS_N_INSNS (4),        /* mult.  */
1423       COSTS_N_INSNS (8),        /* mult_addsub.  */
1424       COSTS_N_INSNS (8),        /* fma.  */
1425       COSTS_N_INSNS (4),        /* addsub.  */
1426       COSTS_N_INSNS (2),        /* fpconst.  */
1427       COSTS_N_INSNS (2),        /* neg.  */
1428       COSTS_N_INSNS (2),        /* compare.  */
1429       COSTS_N_INSNS (4),        /* widen.  */
1430       COSTS_N_INSNS (4),        /* narrow.  */
1431       COSTS_N_INSNS (4),        /* toint.  */
1432       COSTS_N_INSNS (4),        /* fromint.  */
1433       COSTS_N_INSNS (4)         /* roundint.  */
1434     }
1435   },
1436   /* Vector */
1437   {
1438     COSTS_N_INSNS (1)   /* alu.  */
1439   }
1440 };
1441
1442 const struct cpu_cost_table cortexa15_extra_costs =
1443 {
1444   /* ALU */
1445   {
1446     0,                  /* arith.  */
1447     0,                  /* logical.  */
1448     0,                  /* shift.  */
1449     0,                  /* shift_reg.  */
1450     COSTS_N_INSNS (1),  /* arith_shift.  */
1451     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1452     COSTS_N_INSNS (1),  /* log_shift.  */
1453     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1454     0,                  /* extend.  */
1455     COSTS_N_INSNS (1),  /* extend_arith.  */
1456     COSTS_N_INSNS (1),  /* bfi.  */
1457     0,                  /* bfx.  */
1458     0,                  /* clz.  */
1459     0,                  /* rev.  */
1460     0,                  /* non_exec.  */
1461     true                /* non_exec_costs_exec.  */
1462   },
1463   /* MULT SImode */
1464   {
1465     {
1466       COSTS_N_INSNS (2),        /* simple.  */
1467       COSTS_N_INSNS (3),        /* flag_setting.  */
1468       COSTS_N_INSNS (2),        /* extend.  */
1469       COSTS_N_INSNS (2),        /* add.  */
1470       COSTS_N_INSNS (2),        /* extend_add.  */
1471       COSTS_N_INSNS (18)        /* idiv.  */
1472     },
1473     /* MULT DImode */
1474     {
1475       0,                        /* simple (N/A).  */
1476       0,                        /* flag_setting (N/A).  */
1477       COSTS_N_INSNS (3),        /* extend.  */
1478       0,                        /* add (N/A).  */
1479       COSTS_N_INSNS (3),        /* extend_add.  */
1480       0                         /* idiv (N/A).  */
1481     }
1482   },
1483   /* LD/ST */
1484   {
1485     COSTS_N_INSNS (3),  /* load.  */
1486     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1487     COSTS_N_INSNS (3),  /* ldrd.  */
1488     COSTS_N_INSNS (4),  /* ldm_1st.  */
1489     1,                  /* ldm_regs_per_insn_1st.  */
1490     2,                  /* ldm_regs_per_insn_subsequent.  */
1491     COSTS_N_INSNS (4),  /* loadf.  */
1492     COSTS_N_INSNS (4),  /* loadd.  */
1493     0,                  /* load_unaligned.  */
1494     0,                  /* store.  */
1495     0,                  /* strd.  */
1496     COSTS_N_INSNS (1),  /* stm_1st.  */
1497     1,                  /* stm_regs_per_insn_1st.  */
1498     2,                  /* stm_regs_per_insn_subsequent.  */
1499     0,                  /* storef.  */
1500     0,                  /* stored.  */
1501     0                   /* store_unaligned.  */
1502   },
1503   {
1504     /* FP SFmode */
1505     {
1506       COSTS_N_INSNS (17),       /* div.  */
1507       COSTS_N_INSNS (4),        /* mult.  */
1508       COSTS_N_INSNS (8),        /* mult_addsub. */
1509       COSTS_N_INSNS (8),        /* fma.  */
1510       COSTS_N_INSNS (4),        /* addsub.  */
1511       COSTS_N_INSNS (2),        /* fpconst. */
1512       COSTS_N_INSNS (2),        /* neg.  */
1513       COSTS_N_INSNS (5),        /* compare.  */
1514       COSTS_N_INSNS (4),        /* widen.  */
1515       COSTS_N_INSNS (4),        /* narrow.  */
1516       COSTS_N_INSNS (4),        /* toint.  */
1517       COSTS_N_INSNS (4),        /* fromint.  */
1518       COSTS_N_INSNS (4)         /* roundint.  */
1519     },
1520     /* FP DFmode */
1521     {
1522       COSTS_N_INSNS (31),       /* div.  */
1523       COSTS_N_INSNS (4),        /* mult.  */
1524       COSTS_N_INSNS (8),        /* mult_addsub.  */
1525       COSTS_N_INSNS (8),        /* fma.  */
1526       COSTS_N_INSNS (4),        /* addsub.  */
1527       COSTS_N_INSNS (2),        /* fpconst.  */
1528       COSTS_N_INSNS (2),        /* neg.  */
1529       COSTS_N_INSNS (2),        /* compare.  */
1530       COSTS_N_INSNS (4),        /* widen.  */
1531       COSTS_N_INSNS (4),        /* narrow.  */
1532       COSTS_N_INSNS (4),        /* toint.  */
1533       COSTS_N_INSNS (4),        /* fromint.  */
1534       COSTS_N_INSNS (4)         /* roundint.  */
1535     }
1536   },
1537   /* Vector */
1538   {
1539     COSTS_N_INSNS (1)   /* alu.  */
1540   }
1541 };
1542
1543 const struct cpu_cost_table v7m_extra_costs =
1544 {
1545   /* ALU */
1546   {
1547     0,                  /* arith.  */
1548     0,                  /* logical.  */
1549     0,                  /* shift.  */
1550     0,                  /* shift_reg.  */
1551     0,                  /* arith_shift.  */
1552     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1553     0,                  /* log_shift.  */
1554     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1555     0,                  /* extend.  */
1556     COSTS_N_INSNS (1),  /* extend_arith.  */
1557     0,                  /* bfi.  */
1558     0,                  /* bfx.  */
1559     0,                  /* clz.  */
1560     0,                  /* rev.  */
1561     COSTS_N_INSNS (1),  /* non_exec.  */
1562     false               /* non_exec_costs_exec.  */
1563   },
1564   {
1565     /* MULT SImode */
1566     {
1567       COSTS_N_INSNS (1),        /* simple.  */
1568       COSTS_N_INSNS (1),        /* flag_setting.  */
1569       COSTS_N_INSNS (2),        /* extend.  */
1570       COSTS_N_INSNS (1),        /* add.  */
1571       COSTS_N_INSNS (3),        /* extend_add.  */
1572       COSTS_N_INSNS (8)         /* idiv.  */
1573     },
1574     /* MULT DImode */
1575     {
1576       0,                        /* simple (N/A).  */
1577       0,                        /* flag_setting (N/A).  */
1578       COSTS_N_INSNS (2),        /* extend.  */
1579       0,                        /* add (N/A).  */
1580       COSTS_N_INSNS (3),        /* extend_add.  */
1581       0                         /* idiv (N/A).  */
1582     }
1583   },
1584   /* LD/ST */
1585   {
1586     COSTS_N_INSNS (2),  /* load.  */
1587     0,                  /* load_sign_extend.  */
1588     COSTS_N_INSNS (3),  /* ldrd.  */
1589     COSTS_N_INSNS (2),  /* ldm_1st.  */
1590     1,                  /* ldm_regs_per_insn_1st.  */
1591     1,                  /* ldm_regs_per_insn_subsequent.  */
1592     COSTS_N_INSNS (2),  /* loadf.  */
1593     COSTS_N_INSNS (3),  /* loadd.  */
1594     COSTS_N_INSNS (1),  /* load_unaligned.  */
1595     COSTS_N_INSNS (2),  /* store.  */
1596     COSTS_N_INSNS (3),  /* strd.  */
1597     COSTS_N_INSNS (2),  /* stm_1st.  */
1598     1,                  /* stm_regs_per_insn_1st.  */
1599     1,                  /* stm_regs_per_insn_subsequent.  */
1600     COSTS_N_INSNS (2),  /* storef.  */
1601     COSTS_N_INSNS (3),  /* stored.  */
1602     COSTS_N_INSNS (1)  /* store_unaligned.  */
1603   },
1604   {
1605     /* FP SFmode */
1606     {
1607       COSTS_N_INSNS (7),        /* div.  */
1608       COSTS_N_INSNS (2),        /* mult.  */
1609       COSTS_N_INSNS (5),        /* mult_addsub.  */
1610       COSTS_N_INSNS (3),        /* fma.  */
1611       COSTS_N_INSNS (1),        /* addsub.  */
1612       0,                        /* fpconst.  */
1613       0,                        /* neg.  */
1614       0,                        /* compare.  */
1615       0,                        /* widen.  */
1616       0,                        /* narrow.  */
1617       0,                        /* toint.  */
1618       0,                        /* fromint.  */
1619       0                         /* roundint.  */
1620     },
1621     /* FP DFmode */
1622     {
1623       COSTS_N_INSNS (15),       /* div.  */
1624       COSTS_N_INSNS (5),        /* mult.  */
1625       COSTS_N_INSNS (7),        /* mult_addsub.  */
1626       COSTS_N_INSNS (7),        /* fma.  */
1627       COSTS_N_INSNS (3),        /* addsub.  */
1628       0,                        /* fpconst.  */
1629       0,                        /* neg.  */
1630       0,                        /* compare.  */
1631       0,                        /* widen.  */
1632       0,                        /* narrow.  */
1633       0,                        /* toint.  */
1634       0,                        /* fromint.  */
1635       0                         /* roundint.  */
1636     }
1637   },
1638   /* Vector */
1639   {
1640     COSTS_N_INSNS (1)   /* alu.  */
1641   }
1642 };
1643
1644 const struct tune_params arm_slowmul_tune =
1645 {
1646   arm_slowmul_rtx_costs,
1647   NULL,
1648   NULL,                                         /* Sched adj cost.  */
1649   3,                                            /* Constant limit.  */
1650   5,                                            /* Max cond insns.  */
1651   ARM_PREFETCH_NOT_BENEFICIAL,
1652   true,                                         /* Prefer constant pool.  */
1653   arm_default_branch_cost,
1654   false,                                        /* Prefer LDRD/STRD.  */
1655   {true, true},                                 /* Prefer non short circuit.  */
1656   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1657   false,                                        /* Prefer Neon for 64-bits bitops.  */
1658   false, false,                                 /* Prefer 32-bit encodings.  */
1659   false,                                        /* Prefer Neon for stringops.  */
1660   8                                             /* Maximum insns to inline memset.  */
1661 };
1662
1663 const struct tune_params arm_fastmul_tune =
1664 {
1665   arm_fastmul_rtx_costs,
1666   NULL,
1667   NULL,                                         /* Sched adj cost.  */
1668   1,                                            /* Constant limit.  */
1669   5,                                            /* Max cond insns.  */
1670   ARM_PREFETCH_NOT_BENEFICIAL,
1671   true,                                         /* Prefer constant pool.  */
1672   arm_default_branch_cost,
1673   false,                                        /* Prefer LDRD/STRD.  */
1674   {true, true},                                 /* Prefer non short circuit.  */
1675   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1676   false,                                        /* Prefer Neon for 64-bits bitops.  */
1677   false, false,                                 /* Prefer 32-bit encodings.  */
1678   false,                                        /* Prefer Neon for stringops.  */
1679   8                                             /* Maximum insns to inline memset.  */
1680 };
1681
1682 /* StrongARM has early execution of branches, so a sequence that is worth
1683    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1684
1685 const struct tune_params arm_strongarm_tune =
1686 {
1687   arm_fastmul_rtx_costs,
1688   NULL,
1689   NULL,                                         /* Sched adj cost.  */
1690   1,                                            /* Constant limit.  */
1691   3,                                            /* Max cond insns.  */
1692   ARM_PREFETCH_NOT_BENEFICIAL,
1693   true,                                         /* Prefer constant pool.  */
1694   arm_default_branch_cost,
1695   false,                                        /* Prefer LDRD/STRD.  */
1696   {true, true},                                 /* Prefer non short circuit.  */
1697   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1698   false,                                        /* Prefer Neon for 64-bits bitops.  */
1699   false, false,                                 /* Prefer 32-bit encodings.  */
1700   false,                                        /* Prefer Neon for stringops.  */
1701   8                                             /* Maximum insns to inline memset.  */
1702 };
1703
1704 const struct tune_params arm_xscale_tune =
1705 {
1706   arm_xscale_rtx_costs,
1707   NULL,
1708   xscale_sched_adjust_cost,
1709   2,                                            /* Constant limit.  */
1710   3,                                            /* Max cond insns.  */
1711   ARM_PREFETCH_NOT_BENEFICIAL,
1712   true,                                         /* Prefer constant pool.  */
1713   arm_default_branch_cost,
1714   false,                                        /* Prefer LDRD/STRD.  */
1715   {true, true},                                 /* Prefer non short circuit.  */
1716   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1717   false,                                        /* Prefer Neon for 64-bits bitops.  */
1718   false, false,                                 /* Prefer 32-bit encodings.  */
1719   false,                                        /* Prefer Neon for stringops.  */
1720   8                                             /* Maximum insns to inline memset.  */
1721 };
1722
1723 const struct tune_params arm_9e_tune =
1724 {
1725   arm_9e_rtx_costs,
1726   NULL,
1727   NULL,                                         /* Sched adj cost.  */
1728   1,                                            /* Constant limit.  */
1729   5,                                            /* Max cond insns.  */
1730   ARM_PREFETCH_NOT_BENEFICIAL,
1731   true,                                         /* Prefer constant pool.  */
1732   arm_default_branch_cost,
1733   false,                                        /* Prefer LDRD/STRD.  */
1734   {true, true},                                 /* Prefer non short circuit.  */
1735   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1736   false,                                        /* Prefer Neon for 64-bits bitops.  */
1737   false, false,                                 /* Prefer 32-bit encodings.  */
1738   false,                                        /* Prefer Neon for stringops.  */
1739   8                                             /* Maximum insns to inline memset.  */
1740 };
1741
1742 const struct tune_params arm_v6t2_tune =
1743 {
1744   arm_9e_rtx_costs,
1745   NULL,
1746   NULL,                                         /* Sched adj cost.  */
1747   1,                                            /* Constant limit.  */
1748   5,                                            /* Max cond insns.  */
1749   ARM_PREFETCH_NOT_BENEFICIAL,
1750   false,                                        /* Prefer constant pool.  */
1751   arm_default_branch_cost,
1752   false,                                        /* Prefer LDRD/STRD.  */
1753   {true, true},                                 /* Prefer non short circuit.  */
1754   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1755   false,                                        /* Prefer Neon for 64-bits bitops.  */
1756   false, false,                                 /* Prefer 32-bit encodings.  */
1757   false,                                        /* Prefer Neon for stringops.  */
1758   8                                             /* Maximum insns to inline memset.  */
1759 };
1760
1761 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1762 const struct tune_params arm_cortex_tune =
1763 {
1764   arm_9e_rtx_costs,
1765   &generic_extra_costs,
1766   NULL,                                         /* Sched adj cost.  */
1767   1,                                            /* Constant limit.  */
1768   5,                                            /* Max cond insns.  */
1769   ARM_PREFETCH_NOT_BENEFICIAL,
1770   false,                                        /* Prefer constant pool.  */
1771   arm_default_branch_cost,
1772   false,                                        /* Prefer LDRD/STRD.  */
1773   {true, true},                                 /* Prefer non short circuit.  */
1774   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1775   false,                                        /* Prefer Neon for 64-bits bitops.  */
1776   false, false,                                 /* Prefer 32-bit encodings.  */
1777   false,                                        /* Prefer Neon for stringops.  */
1778   8                                             /* Maximum insns to inline memset.  */
1779 };
1780
1781 const struct tune_params arm_cortex_a8_tune =
1782 {
1783   arm_9e_rtx_costs,
1784   &cortexa8_extra_costs,
1785   NULL,                                         /* Sched adj cost.  */
1786   1,                                            /* Constant limit.  */
1787   5,                                            /* Max cond insns.  */
1788   ARM_PREFETCH_NOT_BENEFICIAL,
1789   false,                                        /* Prefer constant pool.  */
1790   arm_default_branch_cost,
1791   false,                                        /* Prefer LDRD/STRD.  */
1792   {true, true},                                 /* Prefer non short circuit.  */
1793   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1794   false,                                        /* Prefer Neon for 64-bits bitops.  */
1795   false, false,                                 /* Prefer 32-bit encodings.  */
1796   true,                                         /* Prefer Neon for stringops.  */
1797   8                                             /* Maximum insns to inline memset.  */
1798 };
1799
1800 const struct tune_params arm_cortex_a7_tune =
1801 {
1802   arm_9e_rtx_costs,
1803   &cortexa7_extra_costs,
1804   NULL,
1805   1,                                            /* Constant limit.  */
1806   5,                                            /* Max cond insns.  */
1807   ARM_PREFETCH_NOT_BENEFICIAL,
1808   false,                                        /* Prefer constant pool.  */
1809   arm_default_branch_cost,
1810   false,                                        /* Prefer LDRD/STRD.  */
1811   {true, true},                                 /* Prefer non short circuit.  */
1812   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1813   false,                                        /* Prefer Neon for 64-bits bitops.  */
1814   false, false,                                 /* Prefer 32-bit encodings.  */
1815   true,                                         /* Prefer Neon for stringops.  */
1816   8                                             /* Maximum insns to inline memset.  */
1817 };
1818
1819 const struct tune_params arm_cortex_a15_tune =
1820 {
1821   arm_9e_rtx_costs,
1822   &cortexa15_extra_costs,
1823   NULL,                                         /* Sched adj cost.  */
1824   1,                                            /* Constant limit.  */
1825   2,                                            /* Max cond insns.  */
1826   ARM_PREFETCH_NOT_BENEFICIAL,
1827   false,                                        /* Prefer constant pool.  */
1828   arm_default_branch_cost,
1829   true,                                         /* Prefer LDRD/STRD.  */
1830   {true, true},                                 /* Prefer non short circuit.  */
1831   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1832   false,                                        /* Prefer Neon for 64-bits bitops.  */
1833   true, true,                                   /* Prefer 32-bit encodings.  */
1834   true,                                         /* Prefer Neon for stringops.  */
1835   8                                             /* Maximum insns to inline memset.  */
1836 };
1837
1838 const struct tune_params arm_cortex_a53_tune =
1839 {
1840   arm_9e_rtx_costs,
1841   &cortexa53_extra_costs,
1842   NULL,                                         /* Scheduler cost adjustment.  */
1843   1,                                            /* Constant limit.  */
1844   5,                                            /* Max cond insns.  */
1845   ARM_PREFETCH_NOT_BENEFICIAL,
1846   false,                                        /* Prefer constant pool.  */
1847   arm_default_branch_cost,
1848   false,                                        /* Prefer LDRD/STRD.  */
1849   {true, true},                                 /* Prefer non short circuit.  */
1850   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1851   false,                                        /* Prefer Neon for 64-bits bitops.  */
1852   false, false,                                 /* Prefer 32-bit encodings.  */
1853   false,                                        /* Prefer Neon for stringops.  */
1854   8                                             /* Maximum insns to inline memset.  */
1855 };
1856
1857 const struct tune_params arm_cortex_a57_tune =
1858 {
1859   arm_9e_rtx_costs,
1860   &cortexa57_extra_costs,
1861   NULL,                                         /* Scheduler cost adjustment.  */
1862   1,                                           /* Constant limit.  */
1863   2,                                           /* Max cond insns.  */
1864   ARM_PREFETCH_NOT_BENEFICIAL,
1865   false,                                       /* Prefer constant pool.  */
1866   arm_default_branch_cost,
1867   true,                                       /* Prefer LDRD/STRD.  */
1868   {true, true},                                /* Prefer non short circuit.  */
1869   &arm_default_vec_cost,                       /* Vectorizer costs.  */
1870   false,                                       /* Prefer Neon for 64-bits bitops.  */
1871   true, true,                                  /* Prefer 32-bit encodings.  */
1872   false,                                        /* Prefer Neon for stringops.  */
1873   8                                             /* Maximum insns to inline memset.  */
1874 };
1875
1876 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1877    less appealing.  Set max_insns_skipped to a low value.  */
1878
1879 const struct tune_params arm_cortex_a5_tune =
1880 {
1881   arm_9e_rtx_costs,
1882   &cortexa5_extra_costs,
1883   NULL,                                         /* Sched adj cost.  */
1884   1,                                            /* Constant limit.  */
1885   1,                                            /* Max cond insns.  */
1886   ARM_PREFETCH_NOT_BENEFICIAL,
1887   false,                                        /* Prefer constant pool.  */
1888   arm_cortex_a5_branch_cost,
1889   false,                                        /* Prefer LDRD/STRD.  */
1890   {false, false},                               /* Prefer non short circuit.  */
1891   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1892   false,                                        /* Prefer Neon for 64-bits bitops.  */
1893   false, false,                                 /* Prefer 32-bit encodings.  */
1894   true,                                         /* Prefer Neon for stringops.  */
1895   8                                             /* Maximum insns to inline memset.  */
1896 };
1897
1898 const struct tune_params arm_cortex_a9_tune =
1899 {
1900   arm_9e_rtx_costs,
1901   &cortexa9_extra_costs,
1902   cortex_a9_sched_adjust_cost,
1903   1,                                            /* Constant limit.  */
1904   5,                                            /* Max cond insns.  */
1905   ARM_PREFETCH_BENEFICIAL(4,32,32),
1906   false,                                        /* Prefer constant pool.  */
1907   arm_default_branch_cost,
1908   false,                                        /* Prefer LDRD/STRD.  */
1909   {true, true},                                 /* Prefer non short circuit.  */
1910   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1911   false,                                        /* Prefer Neon for 64-bits bitops.  */
1912   false, false,                                 /* Prefer 32-bit encodings.  */
1913   false,                                        /* Prefer Neon for stringops.  */
1914   8                                             /* Maximum insns to inline memset.  */
1915 };
1916
1917 const struct tune_params arm_cortex_a12_tune =
1918 {
1919   arm_9e_rtx_costs,
1920   &cortexa12_extra_costs,
1921   NULL,
1922   1,                                            /* Constant limit.  */
1923   5,                                            /* Max cond insns.  */
1924   ARM_PREFETCH_BENEFICIAL(4,32,32),
1925   false,                                        /* Prefer constant pool.  */
1926   arm_default_branch_cost,
1927   true,                                         /* Prefer LDRD/STRD.  */
1928   {true, true},                                 /* Prefer non short circuit.  */
1929   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1930   false,                                        /* Prefer Neon for 64-bits bitops.  */
1931   false, false,                                 /* Prefer 32-bit encodings.  */
1932   true,                                         /* Prefer Neon for stringops.  */
1933   8                                             /* Maximum insns to inline memset.  */
1934 };
1935
1936 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
1937    cycle to execute each.  An LDR from the constant pool also takes two cycles
1938    to execute, but mildly increases pipelining opportunity (consecutive
1939    loads/stores can be pipelined together, saving one cycle), and may also
1940    improve icache utilisation.  Hence we prefer the constant pool for such
1941    processors.  */
1942
1943 const struct tune_params arm_v7m_tune =
1944 {
1945   arm_9e_rtx_costs,
1946   &v7m_extra_costs,
1947   NULL,                                         /* Sched adj cost.  */
1948   1,                                            /* Constant limit.  */
1949   2,                                            /* Max cond insns.  */
1950   ARM_PREFETCH_NOT_BENEFICIAL,
1951   true,                                         /* Prefer constant pool.  */
1952   arm_cortex_m_branch_cost,
1953   false,                                        /* Prefer LDRD/STRD.  */
1954   {false, false},                               /* Prefer non short circuit.  */
1955   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1956   false,                                        /* Prefer Neon for 64-bits bitops.  */
1957   false, false,                                 /* Prefer 32-bit encodings.  */
1958   false,                                        /* Prefer Neon for stringops.  */
1959   8                                             /* Maximum insns to inline memset.  */
1960 };
1961
1962 /* Cortex-M7 tuning.  */
1963
1964 const struct tune_params arm_cortex_m7_tune =
1965 {
1966   arm_9e_rtx_costs,
1967   &v7m_extra_costs,
1968   NULL,                                         /* Sched adj cost.  */
1969   0,                                            /* Constant limit.  */
1970   0,                                            /* Max cond insns.  */
1971   ARM_PREFETCH_NOT_BENEFICIAL,
1972   true,                                         /* Prefer constant pool.  */
1973   arm_cortex_m_branch_cost,
1974   false,                                        /* Prefer LDRD/STRD.  */
1975   {true, true},                                 /* Prefer non short circuit.  */
1976   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1977   false,                                        /* Prefer Neon for 64-bits bitops.  */
1978   false, false,                                 /* Prefer 32-bit encodings.  */
1979   false,                                        /* Prefer Neon for stringops.  */
1980   8                                             /* Maximum insns to inline memset.  */
1981 };
1982
1983 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1984    arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
1985 const struct tune_params arm_v6m_tune =
1986 {
1987   arm_9e_rtx_costs,
1988   NULL,
1989   NULL,                                         /* Sched adj cost.  */
1990   1,                                            /* Constant limit.  */
1991   5,                                            /* Max cond insns.  */
1992   ARM_PREFETCH_NOT_BENEFICIAL,
1993   false,                                        /* Prefer constant pool.  */
1994   arm_default_branch_cost,
1995   false,                                        /* Prefer LDRD/STRD.  */
1996   {false, false},                               /* Prefer non short circuit.  */
1997   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1998   false,                                        /* Prefer Neon for 64-bits bitops.  */
1999   false, false,                                 /* Prefer 32-bit encodings.  */
2000   false,                                        /* Prefer Neon for stringops.  */
2001   8                                             /* Maximum insns to inline memset.  */
2002 };
2003
2004 const struct tune_params arm_fa726te_tune =
2005 {
2006   arm_9e_rtx_costs,
2007   NULL,
2008   fa726te_sched_adjust_cost,
2009   1,                                            /* Constant limit.  */
2010   5,                                            /* Max cond insns.  */
2011   ARM_PREFETCH_NOT_BENEFICIAL,
2012   true,                                         /* Prefer constant pool.  */
2013   arm_default_branch_cost,
2014   false,                                        /* Prefer LDRD/STRD.  */
2015   {true, true},                                 /* Prefer non short circuit.  */
2016   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2017   false,                                        /* Prefer Neon for 64-bits bitops.  */
2018   false, false,                                 /* Prefer 32-bit encodings.  */
2019   false,                                        /* Prefer Neon for stringops.  */
2020   8                                             /* Maximum insns to inline memset.  */
2021 };
2022
2023
2024 /* Not all of these give usefully different compilation alternatives,
2025    but there is no simple way of generalizing them.  */
2026 static const struct processors all_cores[] =
2027 {
2028   /* ARM Cores */
2029 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2030   {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH,          \
2031     FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2032 #include "arm-cores.def"
2033 #undef ARM_CORE
2034   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2035 };
2036
2037 static const struct processors all_architectures[] =
2038 {
2039   /* ARM Architectures */
2040   /* We don't specify tuning costs here as it will be figured out
2041      from the core.  */
2042
2043 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2044   {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2045 #include "arm-arches.def"
2046 #undef ARM_ARCH
2047   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2048 };
2049
2050
2051 /* These are populated as commandline arguments are processed, or NULL
2052    if not specified.  */
2053 static const struct processors *arm_selected_arch;
2054 static const struct processors *arm_selected_cpu;
2055 static const struct processors *arm_selected_tune;
2056
2057 /* The name of the preprocessor macro to define for this architecture.  */
2058
2059 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2060
2061 /* Available values for -mfpu=.  */
2062
2063 static const struct arm_fpu_desc all_fpus[] =
2064 {
2065 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2066   { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2067 #include "arm-fpus.def"
2068 #undef ARM_FPU
2069 };
2070
2071
2072 /* Supported TLS relocations.  */
2073
2074 enum tls_reloc {
2075   TLS_GD32,
2076   TLS_LDM32,
2077   TLS_LDO32,
2078   TLS_IE32,
2079   TLS_LE32,
2080   TLS_DESCSEQ   /* GNU scheme */
2081 };
2082
2083 /* The maximum number of insns to be used when loading a constant.  */
2084 inline static int
2085 arm_constant_limit (bool size_p)
2086 {
2087   return size_p ? 1 : current_tune->constant_limit;
2088 }
2089
2090 /* Emit an insn that's a simple single-set.  Both the operands must be known
2091    to be valid.  */
2092 inline static rtx_insn *
2093 emit_set_insn (rtx x, rtx y)
2094 {
2095   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2096 }
2097
2098 /* Return the number of bits set in VALUE.  */
2099 static unsigned
2100 bit_count (unsigned long value)
2101 {
2102   unsigned long count = 0;
2103
2104   while (value)
2105     {
2106       count++;
2107       value &= value - 1;  /* Clear the least-significant set bit.  */
2108     }
2109
2110   return count;
2111 }
2112
2113 typedef struct
2114 {
2115   machine_mode mode;
2116   const char *name;
2117 } arm_fixed_mode_set;
2118
2119 /* A small helper for setting fixed-point library libfuncs.  */
2120
2121 static void
2122 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2123                              const char *funcname, const char *modename,
2124                              int num_suffix)
2125 {
2126   char buffer[50];
2127
2128   if (num_suffix == 0)
2129     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2130   else
2131     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2132
2133   set_optab_libfunc (optable, mode, buffer);
2134 }
2135
2136 static void
2137 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2138                             machine_mode from, const char *funcname,
2139                             const char *toname, const char *fromname)
2140 {
2141   char buffer[50];
2142   const char *maybe_suffix_2 = "";
2143
2144   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2145   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2146       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2147       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2148     maybe_suffix_2 = "2";
2149
2150   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2151            maybe_suffix_2);
2152
2153   set_conv_libfunc (optable, to, from, buffer);
2154 }
2155
2156 /* Set up library functions unique to ARM.  */
2157
2158 static void
2159 arm_init_libfuncs (void)
2160 {
2161   /* For Linux, we have access to kernel support for atomic operations.  */
2162   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2163     init_sync_libfuncs (2 * UNITS_PER_WORD);
2164
2165   /* There are no special library functions unless we are using the
2166      ARM BPABI.  */
2167   if (!TARGET_BPABI)
2168     return;
2169
2170   /* The functions below are described in Section 4 of the "Run-Time
2171      ABI for the ARM architecture", Version 1.0.  */
2172
2173   /* Double-precision floating-point arithmetic.  Table 2.  */
2174   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2175   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2176   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2177   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2178   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2179
2180   /* Double-precision comparisons.  Table 3.  */
2181   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2182   set_optab_libfunc (ne_optab, DFmode, NULL);
2183   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2184   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2185   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2186   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2187   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2188
2189   /* Single-precision floating-point arithmetic.  Table 4.  */
2190   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2191   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2192   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2193   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2194   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2195
2196   /* Single-precision comparisons.  Table 5.  */
2197   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2198   set_optab_libfunc (ne_optab, SFmode, NULL);
2199   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2200   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2201   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2202   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2203   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2204
2205   /* Floating-point to integer conversions.  Table 6.  */
2206   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2207   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2208   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2209   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2210   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2211   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2212   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2213   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2214
2215   /* Conversions between floating types.  Table 7.  */
2216   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2217   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2218
2219   /* Integer to floating-point conversions.  Table 8.  */
2220   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2221   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2222   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2223   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2224   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2225   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2226   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2227   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2228
2229   /* Long long.  Table 9.  */
2230   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2231   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2232   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2233   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2234   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2235   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2236   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2237   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2238
2239   /* Integer (32/32->32) division.  \S 4.3.1.  */
2240   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2241   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2242
2243   /* The divmod functions are designed so that they can be used for
2244      plain division, even though they return both the quotient and the
2245      remainder.  The quotient is returned in the usual location (i.e.,
2246      r0 for SImode, {r0, r1} for DImode), just as would be expected
2247      for an ordinary division routine.  Because the AAPCS calling
2248      conventions specify that all of { r0, r1, r2, r3 } are
2249      callee-saved registers, there is no need to tell the compiler
2250      explicitly that those registers are clobbered by these
2251      routines.  */
2252   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2253   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2254
2255   /* For SImode division the ABI provides div-without-mod routines,
2256      which are faster.  */
2257   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2258   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2259
2260   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2261      divmod libcalls instead.  */
2262   set_optab_libfunc (smod_optab, DImode, NULL);
2263   set_optab_libfunc (umod_optab, DImode, NULL);
2264   set_optab_libfunc (smod_optab, SImode, NULL);
2265   set_optab_libfunc (umod_optab, SImode, NULL);
2266
2267   /* Half-precision float operations.  The compiler handles all operations
2268      with NULL libfuncs by converting the SFmode.  */
2269   switch (arm_fp16_format)
2270     {
2271     case ARM_FP16_FORMAT_IEEE:
2272     case ARM_FP16_FORMAT_ALTERNATIVE:
2273
2274       /* Conversions.  */
2275       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2276                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2277                          ? "__gnu_f2h_ieee"
2278                          : "__gnu_f2h_alternative"));
2279       set_conv_libfunc (sext_optab, SFmode, HFmode,
2280                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2281                          ? "__gnu_h2f_ieee"
2282                          : "__gnu_h2f_alternative"));
2283
2284       /* Arithmetic.  */
2285       set_optab_libfunc (add_optab, HFmode, NULL);
2286       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2287       set_optab_libfunc (smul_optab, HFmode, NULL);
2288       set_optab_libfunc (neg_optab, HFmode, NULL);
2289       set_optab_libfunc (sub_optab, HFmode, NULL);
2290
2291       /* Comparisons.  */
2292       set_optab_libfunc (eq_optab, HFmode, NULL);
2293       set_optab_libfunc (ne_optab, HFmode, NULL);
2294       set_optab_libfunc (lt_optab, HFmode, NULL);
2295       set_optab_libfunc (le_optab, HFmode, NULL);
2296       set_optab_libfunc (ge_optab, HFmode, NULL);
2297       set_optab_libfunc (gt_optab, HFmode, NULL);
2298       set_optab_libfunc (unord_optab, HFmode, NULL);
2299       break;
2300
2301     default:
2302       break;
2303     }
2304
2305   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2306   {
2307     const arm_fixed_mode_set fixed_arith_modes[] =
2308       {
2309         { QQmode, "qq" },
2310         { UQQmode, "uqq" },
2311         { HQmode, "hq" },
2312         { UHQmode, "uhq" },
2313         { SQmode, "sq" },
2314         { USQmode, "usq" },
2315         { DQmode, "dq" },
2316         { UDQmode, "udq" },
2317         { TQmode, "tq" },
2318         { UTQmode, "utq" },
2319         { HAmode, "ha" },
2320         { UHAmode, "uha" },
2321         { SAmode, "sa" },
2322         { USAmode, "usa" },
2323         { DAmode, "da" },
2324         { UDAmode, "uda" },
2325         { TAmode, "ta" },
2326         { UTAmode, "uta" }
2327       };
2328     const arm_fixed_mode_set fixed_conv_modes[] =
2329       {
2330         { QQmode, "qq" },
2331         { UQQmode, "uqq" },
2332         { HQmode, "hq" },
2333         { UHQmode, "uhq" },
2334         { SQmode, "sq" },
2335         { USQmode, "usq" },
2336         { DQmode, "dq" },
2337         { UDQmode, "udq" },
2338         { TQmode, "tq" },
2339         { UTQmode, "utq" },
2340         { HAmode, "ha" },
2341         { UHAmode, "uha" },
2342         { SAmode, "sa" },
2343         { USAmode, "usa" },
2344         { DAmode, "da" },
2345         { UDAmode, "uda" },
2346         { TAmode, "ta" },
2347         { UTAmode, "uta" },
2348         { QImode, "qi" },
2349         { HImode, "hi" },
2350         { SImode, "si" },
2351         { DImode, "di" },
2352         { TImode, "ti" },
2353         { SFmode, "sf" },
2354         { DFmode, "df" }
2355       };
2356     unsigned int i, j;
2357
2358     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2359       {
2360         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2361                                      "add", fixed_arith_modes[i].name, 3);
2362         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2363                                      "ssadd", fixed_arith_modes[i].name, 3);
2364         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2365                                      "usadd", fixed_arith_modes[i].name, 3);
2366         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2367                                      "sub", fixed_arith_modes[i].name, 3);
2368         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2369                                      "sssub", fixed_arith_modes[i].name, 3);
2370         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2371                                      "ussub", fixed_arith_modes[i].name, 3);
2372         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2373                                      "mul", fixed_arith_modes[i].name, 3);
2374         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2375                                      "ssmul", fixed_arith_modes[i].name, 3);
2376         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2377                                      "usmul", fixed_arith_modes[i].name, 3);
2378         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2379                                      "div", fixed_arith_modes[i].name, 3);
2380         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2381                                      "udiv", fixed_arith_modes[i].name, 3);
2382         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2383                                      "ssdiv", fixed_arith_modes[i].name, 3);
2384         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2385                                      "usdiv", fixed_arith_modes[i].name, 3);
2386         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2387                                      "neg", fixed_arith_modes[i].name, 2);
2388         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2389                                      "ssneg", fixed_arith_modes[i].name, 2);
2390         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2391                                      "usneg", fixed_arith_modes[i].name, 2);
2392         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2393                                      "ashl", fixed_arith_modes[i].name, 3);
2394         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2395                                      "ashr", fixed_arith_modes[i].name, 3);
2396         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2397                                      "lshr", fixed_arith_modes[i].name, 3);
2398         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2399                                      "ssashl", fixed_arith_modes[i].name, 3);
2400         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2401                                      "usashl", fixed_arith_modes[i].name, 3);
2402         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2403                                      "cmp", fixed_arith_modes[i].name, 2);
2404       }
2405
2406     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2407       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2408         {
2409           if (i == j
2410               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2411                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2412             continue;
2413
2414           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2415                                       fixed_conv_modes[j].mode, "fract",
2416                                       fixed_conv_modes[i].name,
2417                                       fixed_conv_modes[j].name);
2418           arm_set_fixed_conv_libfunc (satfract_optab,
2419                                       fixed_conv_modes[i].mode,
2420                                       fixed_conv_modes[j].mode, "satfract",
2421                                       fixed_conv_modes[i].name,
2422                                       fixed_conv_modes[j].name);
2423           arm_set_fixed_conv_libfunc (fractuns_optab,
2424                                       fixed_conv_modes[i].mode,
2425                                       fixed_conv_modes[j].mode, "fractuns",
2426                                       fixed_conv_modes[i].name,
2427                                       fixed_conv_modes[j].name);
2428           arm_set_fixed_conv_libfunc (satfractuns_optab,
2429                                       fixed_conv_modes[i].mode,
2430                                       fixed_conv_modes[j].mode, "satfractuns",
2431                                       fixed_conv_modes[i].name,
2432                                       fixed_conv_modes[j].name);
2433         }
2434   }
2435
2436   if (TARGET_AAPCS_BASED)
2437     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2438 }
2439
2440 /* On AAPCS systems, this is the "struct __va_list".  */
2441 static GTY(()) tree va_list_type;
2442
2443 /* Return the type to use as __builtin_va_list.  */
2444 static tree
2445 arm_build_builtin_va_list (void)
2446 {
2447   tree va_list_name;
2448   tree ap_field;
2449
2450   if (!TARGET_AAPCS_BASED)
2451     return std_build_builtin_va_list ();
2452
2453   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2454      defined as:
2455
2456        struct __va_list
2457        {
2458          void *__ap;
2459        };
2460
2461      The C Library ABI further reinforces this definition in \S
2462      4.1.
2463
2464      We must follow this definition exactly.  The structure tag
2465      name is visible in C++ mangled names, and thus forms a part
2466      of the ABI.  The field name may be used by people who
2467      #include <stdarg.h>.  */
2468   /* Create the type.  */
2469   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2470   /* Give it the required name.  */
2471   va_list_name = build_decl (BUILTINS_LOCATION,
2472                              TYPE_DECL,
2473                              get_identifier ("__va_list"),
2474                              va_list_type);
2475   DECL_ARTIFICIAL (va_list_name) = 1;
2476   TYPE_NAME (va_list_type) = va_list_name;
2477   TYPE_STUB_DECL (va_list_type) = va_list_name;
2478   /* Create the __ap field.  */
2479   ap_field = build_decl (BUILTINS_LOCATION,
2480                          FIELD_DECL,
2481                          get_identifier ("__ap"),
2482                          ptr_type_node);
2483   DECL_ARTIFICIAL (ap_field) = 1;
2484   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2485   TYPE_FIELDS (va_list_type) = ap_field;
2486   /* Compute its layout.  */
2487   layout_type (va_list_type);
2488
2489   return va_list_type;
2490 }
2491
2492 /* Return an expression of type "void *" pointing to the next
2493    available argument in a variable-argument list.  VALIST is the
2494    user-level va_list object, of type __builtin_va_list.  */
2495 static tree
2496 arm_extract_valist_ptr (tree valist)
2497 {
2498   if (TREE_TYPE (valist) == error_mark_node)
2499     return error_mark_node;
2500
2501   /* On an AAPCS target, the pointer is stored within "struct
2502      va_list".  */
2503   if (TARGET_AAPCS_BASED)
2504     {
2505       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2506       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2507                        valist, ap_field, NULL_TREE);
2508     }
2509
2510   return valist;
2511 }
2512
2513 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2514 static void
2515 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2516 {
2517   valist = arm_extract_valist_ptr (valist);
2518   std_expand_builtin_va_start (valist, nextarg);
2519 }
2520
2521 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2522 static tree
2523 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2524                           gimple_seq *post_p)
2525 {
2526   valist = arm_extract_valist_ptr (valist);
2527   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2528 }
2529
2530 /* Fix up any incompatible options that the user has specified.  */
2531 static void
2532 arm_option_override (void)
2533 {
2534   if (global_options_set.x_arm_arch_option)
2535     arm_selected_arch = &all_architectures[arm_arch_option];
2536
2537   if (global_options_set.x_arm_cpu_option)
2538     {
2539       arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2540       arm_selected_tune = &all_cores[(int) arm_cpu_option];
2541     }
2542
2543   if (global_options_set.x_arm_tune_option)
2544     arm_selected_tune = &all_cores[(int) arm_tune_option];
2545
2546 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2547   SUBTARGET_OVERRIDE_OPTIONS;
2548 #endif
2549
2550   if (arm_selected_arch)
2551     {
2552       if (arm_selected_cpu)
2553         {
2554           /* Check for conflict between mcpu and march.  */
2555           if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2556             {
2557               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2558                        arm_selected_cpu->name, arm_selected_arch->name);
2559               /* -march wins for code generation.
2560                  -mcpu wins for default tuning.  */
2561               if (!arm_selected_tune)
2562                 arm_selected_tune = arm_selected_cpu;
2563
2564               arm_selected_cpu = arm_selected_arch;
2565             }
2566           else
2567             /* -mcpu wins.  */
2568             arm_selected_arch = NULL;
2569         }
2570       else
2571         /* Pick a CPU based on the architecture.  */
2572         arm_selected_cpu = arm_selected_arch;
2573     }
2574
2575   /* If the user did not specify a processor, choose one for them.  */
2576   if (!arm_selected_cpu)
2577     {
2578       const struct processors * sel;
2579       unsigned int        sought;
2580
2581       arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2582       if (!arm_selected_cpu->name)
2583         {
2584 #ifdef SUBTARGET_CPU_DEFAULT
2585           /* Use the subtarget default CPU if none was specified by
2586              configure.  */
2587           arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2588 #endif
2589           /* Default to ARM6.  */
2590           if (!arm_selected_cpu->name)
2591             arm_selected_cpu = &all_cores[arm6];
2592         }
2593
2594       sel = arm_selected_cpu;
2595       insn_flags = sel->flags;
2596
2597       /* Now check to see if the user has specified some command line
2598          switch that require certain abilities from the cpu.  */
2599       sought = 0;
2600
2601       if (TARGET_INTERWORK || TARGET_THUMB)
2602         {
2603           sought |= (FL_THUMB | FL_MODE32);
2604
2605           /* There are no ARM processors that support both APCS-26 and
2606              interworking.  Therefore we force FL_MODE26 to be removed
2607              from insn_flags here (if it was set), so that the search
2608              below will always be able to find a compatible processor.  */
2609           insn_flags &= ~FL_MODE26;
2610         }
2611
2612       if (sought != 0 && ((sought & insn_flags) != sought))
2613         {
2614           /* Try to locate a CPU type that supports all of the abilities
2615              of the default CPU, plus the extra abilities requested by
2616              the user.  */
2617           for (sel = all_cores; sel->name != NULL; sel++)
2618             if ((sel->flags & sought) == (sought | insn_flags))
2619               break;
2620
2621           if (sel->name == NULL)
2622             {
2623               unsigned current_bit_count = 0;
2624               const struct processors * best_fit = NULL;
2625
2626               /* Ideally we would like to issue an error message here
2627                  saying that it was not possible to find a CPU compatible
2628                  with the default CPU, but which also supports the command
2629                  line options specified by the programmer, and so they
2630                  ought to use the -mcpu=<name> command line option to
2631                  override the default CPU type.
2632
2633                  If we cannot find a cpu that has both the
2634                  characteristics of the default cpu and the given
2635                  command line options we scan the array again looking
2636                  for a best match.  */
2637               for (sel = all_cores; sel->name != NULL; sel++)
2638                 if ((sel->flags & sought) == sought)
2639                   {
2640                     unsigned count;
2641
2642                     count = bit_count (sel->flags & insn_flags);
2643
2644                     if (count >= current_bit_count)
2645                       {
2646                         best_fit = sel;
2647                         current_bit_count = count;
2648                       }
2649                   }
2650
2651               gcc_assert (best_fit);
2652               sel = best_fit;
2653             }
2654
2655           arm_selected_cpu = sel;
2656         }
2657     }
2658
2659   gcc_assert (arm_selected_cpu);
2660   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
2661   if (!arm_selected_tune)
2662     arm_selected_tune = &all_cores[arm_selected_cpu->core];
2663
2664   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2665   insn_flags = arm_selected_cpu->flags;
2666   arm_base_arch = arm_selected_cpu->base_arch;
2667
2668   arm_tune = arm_selected_tune->core;
2669   tune_flags = arm_selected_tune->flags;
2670   current_tune = arm_selected_tune->tune;
2671
2672   /* Make sure that the processor choice does not conflict with any of the
2673      other command line choices.  */
2674   if (TARGET_ARM && !(insn_flags & FL_NOTM))
2675     error ("target CPU does not support ARM mode");
2676
2677   /* BPABI targets use linker tricks to allow interworking on cores
2678      without thumb support.  */
2679   if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2680     {
2681       warning (0, "target CPU does not support interworking" );
2682       target_flags &= ~MASK_INTERWORK;
2683     }
2684
2685   if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2686     {
2687       warning (0, "target CPU does not support THUMB instructions");
2688       target_flags &= ~MASK_THUMB;
2689     }
2690
2691   if (TARGET_APCS_FRAME && TARGET_THUMB)
2692     {
2693       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2694       target_flags &= ~MASK_APCS_FRAME;
2695     }
2696
2697   /* Callee super interworking implies thumb interworking.  Adding
2698      this to the flags here simplifies the logic elsewhere.  */
2699   if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2700     target_flags |= MASK_INTERWORK;
2701
2702   /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2703      from here where no function is being compiled currently.  */
2704   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2705     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2706
2707   if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2708     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2709
2710   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2711     {
2712       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2713       target_flags |= MASK_APCS_FRAME;
2714     }
2715
2716   if (TARGET_POKE_FUNCTION_NAME)
2717     target_flags |= MASK_APCS_FRAME;
2718
2719   if (TARGET_APCS_REENT && flag_pic)
2720     error ("-fpic and -mapcs-reent are incompatible");
2721
2722   if (TARGET_APCS_REENT)
2723     warning (0, "APCS reentrant code not supported.  Ignored");
2724
2725   /* If this target is normally configured to use APCS frames, warn if they
2726      are turned off and debugging is turned on.  */
2727   if (TARGET_ARM
2728       && write_symbols != NO_DEBUG
2729       && !TARGET_APCS_FRAME
2730       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2731     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2732
2733   if (TARGET_APCS_FLOAT)
2734     warning (0, "passing floating point arguments in fp regs not yet supported");
2735
2736   /* Initialize boolean versions of the flags, for use in the arm.md file.  */
2737   arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2738   arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2739   arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2740   arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2741   arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2742   arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2743   arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2744   arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2745   arm_arch6m = arm_arch6 && !arm_arch_notm;
2746   arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2747   arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2748   arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2749   arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2750   arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2751
2752   arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2753   arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2754   thumb_code = TARGET_ARM == 0;
2755   thumb1_code = TARGET_THUMB1 != 0;
2756   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2757   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2758   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2759   arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2760   arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2761   arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2762   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2763   arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2764   arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2765   if (arm_restrict_it == 2)
2766     arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2767
2768   if (!TARGET_THUMB2)
2769     arm_restrict_it = 0;
2770
2771   /* If we are not using the default (ARM mode) section anchor offset
2772      ranges, then set the correct ranges now.  */
2773   if (TARGET_THUMB1)
2774     {
2775       /* Thumb-1 LDR instructions cannot have negative offsets.
2776          Permissible positive offset ranges are 5-bit (for byte loads),
2777          6-bit (for halfword loads), or 7-bit (for word loads).
2778          Empirical results suggest a 7-bit anchor range gives the best
2779          overall code size.  */
2780       targetm.min_anchor_offset = 0;
2781       targetm.max_anchor_offset = 127;
2782     }
2783   else if (TARGET_THUMB2)
2784     {
2785       /* The minimum is set such that the total size of the block
2786          for a particular anchor is 248 + 1 + 4095 bytes, which is
2787          divisible by eight, ensuring natural spacing of anchors.  */
2788       targetm.min_anchor_offset = -248;
2789       targetm.max_anchor_offset = 4095;
2790     }
2791
2792   /* V5 code we generate is completely interworking capable, so we turn off
2793      TARGET_INTERWORK here to avoid many tests later on.  */
2794
2795   /* XXX However, we must pass the right pre-processor defines to CPP
2796      or GLD can get confused.  This is a hack.  */
2797   if (TARGET_INTERWORK)
2798     arm_cpp_interwork = 1;
2799
2800   if (arm_arch5)
2801     target_flags &= ~MASK_INTERWORK;
2802
2803   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2804     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2805
2806   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2807     error ("iwmmxt abi requires an iwmmxt capable cpu");
2808
2809   if (!global_options_set.x_arm_fpu_index)
2810     {
2811       const char *target_fpu_name;
2812       bool ok;
2813
2814 #ifdef FPUTYPE_DEFAULT
2815       target_fpu_name = FPUTYPE_DEFAULT;
2816 #else
2817       target_fpu_name = "vfp";
2818 #endif
2819
2820       ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2821                                   CL_TARGET);
2822       gcc_assert (ok);
2823     }
2824
2825   arm_fpu_desc = &all_fpus[arm_fpu_index];
2826
2827   if (TARGET_NEON && !arm_arch7)
2828     error ("target CPU does not support NEON");
2829
2830   switch (arm_fpu_desc->model)
2831     {
2832     case ARM_FP_MODEL_VFP:
2833       arm_fpu_attr = FPU_VFP;
2834       break;
2835
2836     default:
2837       gcc_unreachable();
2838     }
2839
2840   if (TARGET_AAPCS_BASED)
2841     {
2842       if (TARGET_CALLER_INTERWORKING)
2843         error ("AAPCS does not support -mcaller-super-interworking");
2844       else
2845         if (TARGET_CALLEE_INTERWORKING)
2846           error ("AAPCS does not support -mcallee-super-interworking");
2847     }
2848
2849   /* iWMMXt and NEON are incompatible.  */
2850   if (TARGET_IWMMXT && TARGET_NEON)
2851     error ("iWMMXt and NEON are incompatible");
2852
2853   /* iWMMXt unsupported under Thumb mode.  */
2854   if (TARGET_THUMB && TARGET_IWMMXT)
2855     error ("iWMMXt unsupported under Thumb mode");
2856
2857   /* __fp16 support currently assumes the core has ldrh.  */
2858   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2859     sorry ("__fp16 and no ldrh");
2860
2861   /* If soft-float is specified then don't use FPU.  */
2862   if (TARGET_SOFT_FLOAT)
2863     arm_fpu_attr = FPU_NONE;
2864
2865   if (TARGET_AAPCS_BASED)
2866     {
2867       if (arm_abi == ARM_ABI_IWMMXT)
2868         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2869       else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2870                && TARGET_HARD_FLOAT
2871                && TARGET_VFP)
2872         arm_pcs_default = ARM_PCS_AAPCS_VFP;
2873       else
2874         arm_pcs_default = ARM_PCS_AAPCS;
2875     }
2876   else
2877     {
2878       if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2879         sorry ("-mfloat-abi=hard and VFP");
2880
2881       if (arm_abi == ARM_ABI_APCS)
2882         arm_pcs_default = ARM_PCS_APCS;
2883       else
2884         arm_pcs_default = ARM_PCS_ATPCS;
2885     }
2886
2887   /* For arm2/3 there is no need to do any scheduling if we are doing
2888      software floating-point.  */
2889   if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2890     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2891
2892   /* Use the cp15 method if it is available.  */
2893   if (target_thread_pointer == TP_AUTO)
2894     {
2895       if (arm_arch6k && !TARGET_THUMB1)
2896         target_thread_pointer = TP_CP15;
2897       else
2898         target_thread_pointer = TP_SOFT;
2899     }
2900
2901   if (TARGET_HARD_TP && TARGET_THUMB1)
2902     error ("can not use -mtp=cp15 with 16-bit Thumb");
2903
2904   /* Override the default structure alignment for AAPCS ABI.  */
2905   if (!global_options_set.x_arm_structure_size_boundary)
2906     {
2907       if (TARGET_AAPCS_BASED)
2908         arm_structure_size_boundary = 8;
2909     }
2910   else
2911     {
2912       if (arm_structure_size_boundary != 8
2913           && arm_structure_size_boundary != 32
2914           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2915         {
2916           if (ARM_DOUBLEWORD_ALIGN)
2917             warning (0,
2918                      "structure size boundary can only be set to 8, 32 or 64");
2919           else
2920             warning (0, "structure size boundary can only be set to 8 or 32");
2921           arm_structure_size_boundary
2922             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2923         }
2924     }
2925
2926   if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2927     {
2928       error ("RTP PIC is incompatible with Thumb");
2929       flag_pic = 0;
2930     }
2931
2932   /* If stack checking is disabled, we can use r10 as the PIC register,
2933      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
2934   if (flag_pic && TARGET_SINGLE_PIC_BASE)
2935     {
2936       if (TARGET_VXWORKS_RTP)
2937         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2938       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2939     }
2940
2941   if (flag_pic && TARGET_VXWORKS_RTP)
2942     arm_pic_register = 9;
2943
2944   if (arm_pic_register_string != NULL)
2945     {
2946       int pic_register = decode_reg_name (arm_pic_register_string);
2947
2948       if (!flag_pic)
2949         warning (0, "-mpic-register= is useless without -fpic");
2950
2951       /* Prevent the user from choosing an obviously stupid PIC register.  */
2952       else if (pic_register < 0 || call_used_regs[pic_register]
2953                || pic_register == HARD_FRAME_POINTER_REGNUM
2954                || pic_register == STACK_POINTER_REGNUM
2955                || pic_register >= PC_REGNUM
2956                || (TARGET_VXWORKS_RTP
2957                    && (unsigned int) pic_register != arm_pic_register))
2958         error ("unable to use '%s' for PIC register", arm_pic_register_string);
2959       else
2960         arm_pic_register = pic_register;
2961     }
2962
2963   if (TARGET_VXWORKS_RTP
2964       && !global_options_set.x_arm_pic_data_is_text_relative)
2965     arm_pic_data_is_text_relative = 0;
2966
2967   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
2968   if (fix_cm3_ldrd == 2)
2969     {
2970       if (arm_selected_cpu->core == cortexm3)
2971         fix_cm3_ldrd = 1;
2972       else
2973         fix_cm3_ldrd = 0;
2974     }
2975
2976   /* Enable -munaligned-access by default for
2977      - all ARMv6 architecture-based processors
2978      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2979      - ARMv8 architecture-base processors.
2980
2981      Disable -munaligned-access by default for
2982      - all pre-ARMv6 architecture-based processors
2983      - ARMv6-M architecture-based processors.  */
2984
2985   if (unaligned_access == 2)
2986     {
2987       if (arm_arch6 && (arm_arch_notm || arm_arch7))
2988         unaligned_access = 1;
2989       else
2990         unaligned_access = 0;
2991     }
2992   else if (unaligned_access == 1
2993            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2994     {
2995       warning (0, "target CPU does not support unaligned accesses");
2996       unaligned_access = 0;
2997     }
2998
2999   if (TARGET_THUMB1 && flag_schedule_insns)
3000     {
3001       /* Don't warn since it's on by default in -O2.  */
3002       flag_schedule_insns = 0;
3003     }
3004
3005   if (optimize_size)
3006     {
3007       /* If optimizing for size, bump the number of instructions that we
3008          are prepared to conditionally execute (even on a StrongARM).  */
3009       max_insns_skipped = 6;
3010
3011       /* For THUMB2, we limit the conditional sequence to one IT block.  */
3012       if (TARGET_THUMB2)
3013         max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3014     }
3015   else
3016     max_insns_skipped = current_tune->max_insns_skipped;
3017
3018   /* Hot/Cold partitioning is not currently supported, since we can't
3019      handle literal pool placement in that case.  */
3020   if (flag_reorder_blocks_and_partition)
3021     {
3022       inform (input_location,
3023               "-freorder-blocks-and-partition not supported on this architecture");
3024       flag_reorder_blocks_and_partition = 0;
3025       flag_reorder_blocks = 1;
3026     }
3027
3028   if (flag_pic)
3029     /* Hoisting PIC address calculations more aggressively provides a small,
3030        but measurable, size reduction for PIC code.  Therefore, we decrease
3031        the bar for unrestricted expression hoisting to the cost of PIC address
3032        calculation, which is 2 instructions.  */
3033     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3034                            global_options.x_param_values,
3035                            global_options_set.x_param_values);
3036
3037   /* ARM EABI defaults to strict volatile bitfields.  */
3038   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3039       && abi_version_at_least(2))
3040     flag_strict_volatile_bitfields = 1;
3041
3042   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3043      it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
3044   if (flag_prefetch_loop_arrays < 0
3045       && HAVE_prefetch
3046       && optimize >= 3
3047       && current_tune->num_prefetch_slots > 0)
3048     flag_prefetch_loop_arrays = 1;
3049
3050   /* Set up parameters to be used in prefetching algorithm.  Do not override the
3051      defaults unless we are tuning for a core we have researched values for.  */
3052   if (current_tune->num_prefetch_slots > 0)
3053     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3054                            current_tune->num_prefetch_slots,
3055                            global_options.x_param_values,
3056                            global_options_set.x_param_values);
3057   if (current_tune->l1_cache_line_size >= 0)
3058     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3059                            current_tune->l1_cache_line_size,
3060                            global_options.x_param_values,
3061                            global_options_set.x_param_values);
3062   if (current_tune->l1_cache_size >= 0)
3063     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3064                            current_tune->l1_cache_size,
3065                            global_options.x_param_values,
3066                            global_options_set.x_param_values);
3067
3068   /* Use Neon to perform 64-bits operations rather than core
3069      registers.  */
3070   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3071   if (use_neon_for_64bits == 1)
3072      prefer_neon_for_64bits = true;
3073
3074   /* Use the alternative scheduling-pressure algorithm by default.  */
3075   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3076                          global_options.x_param_values,
3077                          global_options_set.x_param_values);
3078
3079   /* Disable shrink-wrap when optimizing function for size, since it tends to
3080      generate additional returns.  */
3081   if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3082     flag_shrink_wrap = false;
3083   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3084   if (TARGET_APCS_FRAME)
3085     flag_shrink_wrap = false;
3086
3087   /* We only support -mslow-flash-data on armv7-m targets.  */
3088   if (target_slow_flash_data
3089       && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3090           || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3091     error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3092
3093   /* Currently, for slow flash data, we just disable literal pools.  */
3094   if (target_slow_flash_data)
3095     arm_disable_literal_pool = true;
3096
3097   /* Thumb2 inline assembly code should always use unified syntax.
3098      This will apply to ARM and Thumb1 eventually.  */
3099   if (TARGET_THUMB2)
3100     inline_asm_unified = 1;
3101
3102   /* Disable scheduling fusion by default if it's not armv7 processor
3103      or doesn't prefer ldrd/strd.  */
3104   if (flag_schedule_fusion == 2
3105       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3106     flag_schedule_fusion = 0;
3107
3108   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3109      - epilogue_insns - does not accurately model the corresponding insns
3110      emitted in the asm file.  In particular, see the comment in thumb_exit
3111      'Find out how many of the (return) argument registers we can corrupt'.
3112      As a consequence, the epilogue may clobber registers without
3113      fuse-caller-save finding out about it.  Therefore, disable fuse-caller-save
3114      in Thumb1 mode.
3115      TODO: Accurately model clobbers for epilogue_insns and reenable
3116      fuse-caller-save.  */
3117   if (TARGET_THUMB1)
3118     flag_use_caller_save = 0;
3119
3120   /* Register global variables with the garbage collector.  */
3121   arm_add_gc_roots ();
3122 }
3123
3124 static void
3125 arm_add_gc_roots (void)
3126 {
3127   gcc_obstack_init(&minipool_obstack);
3128   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3129 }
3130 \f
3131 /* A table of known ARM exception types.
3132    For use with the interrupt function attribute.  */
3133
3134 typedef struct
3135 {
3136   const char *const arg;
3137   const unsigned long return_value;
3138 }
3139 isr_attribute_arg;
3140
3141 static const isr_attribute_arg isr_attribute_args [] =
3142 {
3143   { "IRQ",   ARM_FT_ISR },
3144   { "irq",   ARM_FT_ISR },
3145   { "FIQ",   ARM_FT_FIQ },
3146   { "fiq",   ARM_FT_FIQ },
3147   { "ABORT", ARM_FT_ISR },
3148   { "abort", ARM_FT_ISR },
3149   { "ABORT", ARM_FT_ISR },
3150   { "abort", ARM_FT_ISR },
3151   { "UNDEF", ARM_FT_EXCEPTION },
3152   { "undef", ARM_FT_EXCEPTION },
3153   { "SWI",   ARM_FT_EXCEPTION },
3154   { "swi",   ARM_FT_EXCEPTION },
3155   { NULL,    ARM_FT_NORMAL }
3156 };
3157
3158 /* Returns the (interrupt) function type of the current
3159    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3160
3161 static unsigned long
3162 arm_isr_value (tree argument)
3163 {
3164   const isr_attribute_arg * ptr;
3165   const char *              arg;
3166
3167   if (!arm_arch_notm)
3168     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3169
3170   /* No argument - default to IRQ.  */
3171   if (argument == NULL_TREE)
3172     return ARM_FT_ISR;
3173
3174   /* Get the value of the argument.  */
3175   if (TREE_VALUE (argument) == NULL_TREE
3176       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3177     return ARM_FT_UNKNOWN;
3178
3179   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3180
3181   /* Check it against the list of known arguments.  */
3182   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3183     if (streq (arg, ptr->arg))
3184       return ptr->return_value;
3185
3186   /* An unrecognized interrupt type.  */
3187   return ARM_FT_UNKNOWN;
3188 }
3189
3190 /* Computes the type of the current function.  */
3191
3192 static unsigned long
3193 arm_compute_func_type (void)
3194 {
3195   unsigned long type = ARM_FT_UNKNOWN;
3196   tree a;
3197   tree attr;
3198
3199   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3200
3201   /* Decide if the current function is volatile.  Such functions
3202      never return, and many memory cycles can be saved by not storing
3203      register values that will never be needed again.  This optimization
3204      was added to speed up context switching in a kernel application.  */
3205   if (optimize > 0
3206       && (TREE_NOTHROW (current_function_decl)
3207           || !(flag_unwind_tables
3208                || (flag_exceptions
3209                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3210       && TREE_THIS_VOLATILE (current_function_decl))
3211     type |= ARM_FT_VOLATILE;
3212
3213   if (cfun->static_chain_decl != NULL)
3214     type |= ARM_FT_NESTED;
3215
3216   attr = DECL_ATTRIBUTES (current_function_decl);
3217
3218   a = lookup_attribute ("naked", attr);
3219   if (a != NULL_TREE)
3220     type |= ARM_FT_NAKED;
3221
3222   a = lookup_attribute ("isr", attr);
3223   if (a == NULL_TREE)
3224     a = lookup_attribute ("interrupt", attr);
3225
3226   if (a == NULL_TREE)
3227     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3228   else
3229     type |= arm_isr_value (TREE_VALUE (a));
3230
3231   return type;
3232 }
3233
3234 /* Returns the type of the current function.  */
3235
3236 unsigned long
3237 arm_current_func_type (void)
3238 {
3239   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3240     cfun->machine->func_type = arm_compute_func_type ();
3241
3242   return cfun->machine->func_type;
3243 }
3244
3245 bool
3246 arm_allocate_stack_slots_for_args (void)
3247 {
3248   /* Naked functions should not allocate stack slots for arguments.  */
3249   return !IS_NAKED (arm_current_func_type ());
3250 }
3251
3252 static bool
3253 arm_warn_func_return (tree decl)
3254 {
3255   /* Naked functions are implemented entirely in assembly, including the
3256      return sequence, so suppress warnings about this.  */
3257   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3258 }
3259
3260 \f
3261 /* Output assembler code for a block containing the constant parts
3262    of a trampoline, leaving space for the variable parts.
3263
3264    On the ARM, (if r8 is the static chain regnum, and remembering that
3265    referencing pc adds an offset of 8) the trampoline looks like:
3266            ldr          r8, [pc, #0]
3267            ldr          pc, [pc]
3268            .word        static chain value
3269            .word        function's address
3270    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3271
3272 static void
3273 arm_asm_trampoline_template (FILE *f)
3274 {
3275   if (TARGET_ARM)
3276     {
3277       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3278       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3279     }
3280   else if (TARGET_THUMB2)
3281     {
3282       /* The Thumb-2 trampoline is similar to the arm implementation.
3283          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3284       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3285                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3286       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3287     }
3288   else
3289     {
3290       ASM_OUTPUT_ALIGN (f, 2);
3291       fprintf (f, "\t.code\t16\n");
3292       fprintf (f, ".Ltrampoline_start:\n");
3293       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3294       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3295       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3296       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3297       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3298       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3299     }
3300   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3301   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3302 }
3303
3304 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3305
3306 static void
3307 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3308 {
3309   rtx fnaddr, mem, a_tramp;
3310
3311   emit_block_move (m_tramp, assemble_trampoline_template (),
3312                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3313
3314   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3315   emit_move_insn (mem, chain_value);
3316
3317   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3318   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3319   emit_move_insn (mem, fnaddr);
3320
3321   a_tramp = XEXP (m_tramp, 0);
3322   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3323                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3324                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3325 }
3326
3327 /* Thumb trampolines should be entered in thumb mode, so set
3328    the bottom bit of the address.  */
3329
3330 static rtx
3331 arm_trampoline_adjust_address (rtx addr)
3332 {
3333   if (TARGET_THUMB)
3334     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3335                                 NULL, 0, OPTAB_LIB_WIDEN);
3336   return addr;
3337 }
3338 \f
3339 /* Return 1 if it is possible to return using a single instruction.
3340    If SIBLING is non-null, this is a test for a return before a sibling
3341    call.  SIBLING is the call insn, so we can examine its register usage.  */
3342
3343 int
3344 use_return_insn (int iscond, rtx sibling)
3345 {
3346   int regno;
3347   unsigned int func_type;
3348   unsigned long saved_int_regs;
3349   unsigned HOST_WIDE_INT stack_adjust;
3350   arm_stack_offsets *offsets;
3351
3352   /* Never use a return instruction before reload has run.  */
3353   if (!reload_completed)
3354     return 0;
3355
3356   func_type = arm_current_func_type ();
3357
3358   /* Naked, volatile and stack alignment functions need special
3359      consideration.  */
3360   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3361     return 0;
3362
3363   /* So do interrupt functions that use the frame pointer and Thumb
3364      interrupt functions.  */
3365   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3366     return 0;
3367
3368   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3369       && !optimize_function_for_size_p (cfun))
3370     return 0;
3371
3372   offsets = arm_get_frame_offsets ();
3373   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3374
3375   /* As do variadic functions.  */
3376   if (crtl->args.pretend_args_size
3377       || cfun->machine->uses_anonymous_args
3378       /* Or if the function calls __builtin_eh_return () */
3379       || crtl->calls_eh_return
3380       /* Or if the function calls alloca */
3381       || cfun->calls_alloca
3382       /* Or if there is a stack adjustment.  However, if the stack pointer
3383          is saved on the stack, we can use a pre-incrementing stack load.  */
3384       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3385                                  && stack_adjust == 4)))
3386     return 0;
3387
3388   saved_int_regs = offsets->saved_regs_mask;
3389
3390   /* Unfortunately, the insn
3391
3392        ldmib sp, {..., sp, ...}
3393
3394      triggers a bug on most SA-110 based devices, such that the stack
3395      pointer won't be correctly restored if the instruction takes a
3396      page fault.  We work around this problem by popping r3 along with
3397      the other registers, since that is never slower than executing
3398      another instruction.
3399
3400      We test for !arm_arch5 here, because code for any architecture
3401      less than this could potentially be run on one of the buggy
3402      chips.  */
3403   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3404     {
3405       /* Validate that r3 is a call-clobbered register (always true in
3406          the default abi) ...  */
3407       if (!call_used_regs[3])
3408         return 0;
3409
3410       /* ... that it isn't being used for a return value ... */
3411       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3412         return 0;
3413
3414       /* ... or for a tail-call argument ...  */
3415       if (sibling)
3416         {
3417           gcc_assert (CALL_P (sibling));
3418
3419           if (find_regno_fusage (sibling, USE, 3))
3420             return 0;
3421         }
3422
3423       /* ... and that there are no call-saved registers in r0-r2
3424          (always true in the default ABI).  */
3425       if (saved_int_regs & 0x7)
3426         return 0;
3427     }
3428
3429   /* Can't be done if interworking with Thumb, and any registers have been
3430      stacked.  */
3431   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3432     return 0;
3433
3434   /* On StrongARM, conditional returns are expensive if they aren't
3435      taken and multiple registers have been stacked.  */
3436   if (iscond && arm_tune_strongarm)
3437     {
3438       /* Conditional return when just the LR is stored is a simple
3439          conditional-load instruction, that's not expensive.  */
3440       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3441         return 0;
3442
3443       if (flag_pic
3444           && arm_pic_register != INVALID_REGNUM
3445           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3446         return 0;
3447     }
3448
3449   /* If there are saved registers but the LR isn't saved, then we need
3450      two instructions for the return.  */
3451   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3452     return 0;
3453
3454   /* Can't be done if any of the VFP regs are pushed,
3455      since this also requires an insn.  */
3456   if (TARGET_HARD_FLOAT && TARGET_VFP)
3457     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3458       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3459         return 0;
3460
3461   if (TARGET_REALLY_IWMMXT)
3462     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3463       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3464         return 0;
3465
3466   return 1;
3467 }
3468
3469 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3470    shrink-wrapping if possible.  This is the case if we need to emit a
3471    prologue, which we can test by looking at the offsets.  */
3472 bool
3473 use_simple_return_p (void)
3474 {
3475   arm_stack_offsets *offsets;
3476
3477   offsets = arm_get_frame_offsets ();
3478   return offsets->outgoing_args != 0;
3479 }
3480
3481 /* Return TRUE if int I is a valid immediate ARM constant.  */
3482
3483 int
3484 const_ok_for_arm (HOST_WIDE_INT i)
3485 {
3486   int lowbit;
3487
3488   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3489      be all zero, or all one.  */
3490   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3491       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3492           != ((~(unsigned HOST_WIDE_INT) 0)
3493               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3494     return FALSE;
3495
3496   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3497
3498   /* Fast return for 0 and small values.  We must do this for zero, since
3499      the code below can't handle that one case.  */
3500   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3501     return TRUE;
3502
3503   /* Get the number of trailing zeros.  */
3504   lowbit = ffs((int) i) - 1;
3505
3506   /* Only even shifts are allowed in ARM mode so round down to the
3507      nearest even number.  */
3508   if (TARGET_ARM)
3509     lowbit &= ~1;
3510
3511   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3512     return TRUE;
3513
3514   if (TARGET_ARM)
3515     {
3516       /* Allow rotated constants in ARM mode.  */
3517       if (lowbit <= 4
3518            && ((i & ~0xc000003f) == 0
3519                || (i & ~0xf000000f) == 0
3520                || (i & ~0xfc000003) == 0))
3521         return TRUE;
3522     }
3523   else
3524     {
3525       HOST_WIDE_INT v;
3526
3527       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
3528       v = i & 0xff;
3529       v |= v << 16;
3530       if (i == v || i == (v | (v << 8)))
3531         return TRUE;
3532
3533       /* Allow repeated pattern 0xXY00XY00.  */
3534       v = i & 0xff00;
3535       v |= v << 16;
3536       if (i == v)
3537         return TRUE;
3538     }
3539
3540   return FALSE;
3541 }
3542
3543 /* Return true if I is a valid constant for the operation CODE.  */
3544 int
3545 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3546 {
3547   if (const_ok_for_arm (i))
3548     return 1;
3549
3550   switch (code)
3551     {
3552     case SET:
3553       /* See if we can use movw.  */
3554       if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3555         return 1;
3556       else
3557         /* Otherwise, try mvn.  */
3558         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3559
3560     case PLUS:
3561       /* See if we can use addw or subw.  */
3562       if (TARGET_THUMB2
3563           && ((i & 0xfffff000) == 0
3564               || ((-i) & 0xfffff000) == 0))
3565         return 1;
3566       /* else fall through.  */
3567
3568     case COMPARE:
3569     case EQ:
3570     case NE:
3571     case GT:
3572     case LE:
3573     case LT:
3574     case GE:
3575     case GEU:
3576     case LTU:
3577     case GTU:
3578     case LEU:
3579     case UNORDERED:
3580     case ORDERED:
3581     case UNEQ:
3582     case UNGE:
3583     case UNLT:
3584     case UNGT:
3585     case UNLE:
3586       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3587
3588     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
3589     case XOR:
3590       return 0;
3591
3592     case IOR:
3593       if (TARGET_THUMB2)
3594         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3595       return 0;
3596
3597     case AND:
3598       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3599
3600     default:
3601       gcc_unreachable ();
3602     }
3603 }
3604
3605 /* Return true if I is a valid di mode constant for the operation CODE.  */
3606 int
3607 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3608 {
3609   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3610   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3611   rtx hi = GEN_INT (hi_val);
3612   rtx lo = GEN_INT (lo_val);
3613
3614   if (TARGET_THUMB1)
3615     return 0;
3616
3617   switch (code)
3618     {
3619     case AND:
3620     case IOR:
3621     case XOR:
3622       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3623               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3624     case PLUS:
3625       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3626
3627     default:
3628       return 0;
3629     }
3630 }
3631
3632 /* Emit a sequence of insns to handle a large constant.
3633    CODE is the code of the operation required, it can be any of SET, PLUS,
3634    IOR, AND, XOR, MINUS;
3635    MODE is the mode in which the operation is being performed;
3636    VAL is the integer to operate on;
3637    SOURCE is the other operand (a register, or a null-pointer for SET);
3638    SUBTARGETS means it is safe to create scratch registers if that will
3639    either produce a simpler sequence, or we will want to cse the values.
3640    Return value is the number of insns emitted.  */
3641
3642 /* ??? Tweak this for thumb2.  */
3643 int
3644 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3645                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3646 {
3647   rtx cond;
3648
3649   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3650     cond = COND_EXEC_TEST (PATTERN (insn));
3651   else
3652     cond = NULL_RTX;
3653
3654   if (subtargets || code == SET
3655       || (REG_P (target) && REG_P (source)
3656           && REGNO (target) != REGNO (source)))
3657     {
3658       /* After arm_reorg has been called, we can't fix up expensive
3659          constants by pushing them into memory so we must synthesize
3660          them in-line, regardless of the cost.  This is only likely to
3661          be more costly on chips that have load delay slots and we are
3662          compiling without running the scheduler (so no splitting
3663          occurred before the final instruction emission).
3664
3665          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3666       */
3667       if (!cfun->machine->after_arm_reorg
3668           && !cond
3669           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3670                                 1, 0)
3671               > (arm_constant_limit (optimize_function_for_size_p (cfun))
3672                  + (code != SET))))
3673         {
3674           if (code == SET)
3675             {
3676               /* Currently SET is the only monadic value for CODE, all
3677                  the rest are diadic.  */
3678               if (TARGET_USE_MOVT)
3679                 arm_emit_movpair (target, GEN_INT (val));
3680               else
3681                 emit_set_insn (target, GEN_INT (val));
3682
3683               return 1;
3684             }
3685           else
3686             {
3687               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3688
3689               if (TARGET_USE_MOVT)
3690                 arm_emit_movpair (temp, GEN_INT (val));
3691               else
3692                 emit_set_insn (temp, GEN_INT (val));
3693
3694               /* For MINUS, the value is subtracted from, since we never
3695                  have subtraction of a constant.  */
3696               if (code == MINUS)
3697                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3698               else
3699                 emit_set_insn (target,
3700                                gen_rtx_fmt_ee (code, mode, source, temp));
3701               return 2;
3702             }
3703         }
3704     }
3705
3706   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3707                            1);
3708 }
3709
3710 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3711    ARM/THUMB2 immediates, and add up to VAL.
3712    Thr function return value gives the number of insns required.  */
3713 static int
3714 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3715                             struct four_ints *return_sequence)
3716 {
3717   int best_consecutive_zeros = 0;
3718   int i;
3719   int best_start = 0;
3720   int insns1, insns2;
3721   struct four_ints tmp_sequence;
3722
3723   /* If we aren't targeting ARM, the best place to start is always at
3724      the bottom, otherwise look more closely.  */
3725   if (TARGET_ARM)
3726     {
3727       for (i = 0; i < 32; i += 2)
3728         {
3729           int consecutive_zeros = 0;
3730
3731           if (!(val & (3 << i)))
3732             {
3733               while ((i < 32) && !(val & (3 << i)))
3734                 {
3735                   consecutive_zeros += 2;
3736                   i += 2;
3737                 }
3738               if (consecutive_zeros > best_consecutive_zeros)
3739                 {
3740                   best_consecutive_zeros = consecutive_zeros;
3741                   best_start = i - consecutive_zeros;
3742                 }
3743               i -= 2;
3744             }
3745         }
3746     }
3747
3748   /* So long as it won't require any more insns to do so, it's
3749      desirable to emit a small constant (in bits 0...9) in the last
3750      insn.  This way there is more chance that it can be combined with
3751      a later addressing insn to form a pre-indexed load or store
3752      operation.  Consider:
3753
3754            *((volatile int *)0xe0000100) = 1;
3755            *((volatile int *)0xe0000110) = 2;
3756
3757      We want this to wind up as:
3758
3759             mov rA, #0xe0000000
3760             mov rB, #1
3761             str rB, [rA, #0x100]
3762             mov rB, #2
3763             str rB, [rA, #0x110]
3764
3765      rather than having to synthesize both large constants from scratch.
3766
3767      Therefore, we calculate how many insns would be required to emit
3768      the constant starting from `best_start', and also starting from
3769      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
3770      yield a shorter sequence, we may as well use zero.  */
3771   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3772   if (best_start != 0
3773       && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3774     {
3775       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3776       if (insns2 <= insns1)
3777         {
3778           *return_sequence = tmp_sequence;
3779           insns1 = insns2;
3780         }
3781     }
3782
3783   return insns1;
3784 }
3785
3786 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
3787 static int
3788 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3789                              struct four_ints *return_sequence, int i)
3790 {
3791   int remainder = val & 0xffffffff;
3792   int insns = 0;
3793
3794   /* Try and find a way of doing the job in either two or three
3795      instructions.
3796
3797      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3798      location.  We start at position I.  This may be the MSB, or
3799      optimial_immediate_sequence may have positioned it at the largest block
3800      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3801      wrapping around to the top of the word when we drop off the bottom.
3802      In the worst case this code should produce no more than four insns.
3803
3804      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3805      constants, shifted to any arbitrary location.  We should always start
3806      at the MSB.  */
3807   do
3808     {
3809       int end;
3810       unsigned int b1, b2, b3, b4;
3811       unsigned HOST_WIDE_INT result;
3812       int loc;
3813
3814       gcc_assert (insns < 4);
3815
3816       if (i <= 0)
3817         i += 32;
3818
3819       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
3820       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3821         {
3822           loc = i;
3823           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3824             /* We can use addw/subw for the last 12 bits.  */
3825             result = remainder;
3826           else
3827             {
3828               /* Use an 8-bit shifted/rotated immediate.  */
3829               end = i - 8;
3830               if (end < 0)
3831                 end += 32;
3832               result = remainder & ((0x0ff << end)
3833                                    | ((i < end) ? (0xff >> (32 - end))
3834                                                 : 0));
3835               i -= 8;
3836             }
3837         }
3838       else
3839         {
3840           /* Arm allows rotates by a multiple of two. Thumb-2 allows
3841              arbitrary shifts.  */
3842           i -= TARGET_ARM ? 2 : 1;
3843           continue;
3844         }
3845
3846       /* Next, see if we can do a better job with a thumb2 replicated
3847          constant.
3848
3849          We do it this way around to catch the cases like 0x01F001E0 where
3850          two 8-bit immediates would work, but a replicated constant would
3851          make it worse.
3852
3853          TODO: 16-bit constants that don't clear all the bits, but still win.
3854          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
3855       if (TARGET_THUMB2)
3856         {
3857           b1 = (remainder & 0xff000000) >> 24;
3858           b2 = (remainder & 0x00ff0000) >> 16;
3859           b3 = (remainder & 0x0000ff00) >> 8;
3860           b4 = remainder & 0xff;
3861
3862           if (loc > 24)
3863             {
3864               /* The 8-bit immediate already found clears b1 (and maybe b2),
3865                  but must leave b3 and b4 alone.  */
3866
3867               /* First try to find a 32-bit replicated constant that clears
3868                  almost everything.  We can assume that we can't do it in one,
3869                  or else we wouldn't be here.  */
3870               unsigned int tmp = b1 & b2 & b3 & b4;
3871               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3872                                   + (tmp << 24);
3873               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3874                                             + (tmp == b3) + (tmp == b4);
3875               if (tmp
3876                   && (matching_bytes >= 3
3877                       || (matching_bytes == 2
3878                           && const_ok_for_op (remainder & ~tmp2, code))))
3879                 {
3880                   /* At least 3 of the bytes match, and the fourth has at
3881                      least as many bits set, or two of the bytes match
3882                      and it will only require one more insn to finish.  */
3883                   result = tmp2;
3884                   i = tmp != b1 ? 32
3885                       : tmp != b2 ? 24
3886                       : tmp != b3 ? 16
3887                       : 8;
3888                 }
3889
3890               /* Second, try to find a 16-bit replicated constant that can
3891                  leave three of the bytes clear.  If b2 or b4 is already
3892                  zero, then we can.  If the 8-bit from above would not
3893                  clear b2 anyway, then we still win.  */
3894               else if (b1 == b3 && (!b2 || !b4
3895                                || (remainder & 0x00ff0000 & ~result)))
3896                 {
3897                   result = remainder & 0xff00ff00;
3898                   i = 24;
3899                 }
3900             }
3901           else if (loc > 16)
3902             {
3903               /* The 8-bit immediate already found clears b2 (and maybe b3)
3904                  and we don't get here unless b1 is alredy clear, but it will
3905                  leave b4 unchanged.  */
3906
3907               /* If we can clear b2 and b4 at once, then we win, since the
3908                  8-bits couldn't possibly reach that far.  */
3909               if (b2 == b4)
3910                 {
3911                   result = remainder & 0x00ff00ff;
3912                   i = 16;
3913                 }
3914             }
3915         }
3916
3917       return_sequence->i[insns++] = result;
3918       remainder &= ~result;
3919
3920       if (code == SET || code == MINUS)
3921         code = PLUS;
3922     }
3923   while (remainder);
3924
3925   return insns;
3926 }
3927
3928 /* Emit an instruction with the indicated PATTERN.  If COND is
3929    non-NULL, conditionalize the execution of the instruction on COND
3930    being true.  */
3931
3932 static void
3933 emit_constant_insn (rtx cond, rtx pattern)
3934 {
3935   if (cond)
3936     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3937   emit_insn (pattern);
3938 }
3939
3940 /* As above, but extra parameter GENERATE which, if clear, suppresses
3941    RTL generation.  */
3942
3943 static int
3944 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3945                   HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3946                   int generate)
3947 {
3948   int can_invert = 0;
3949   int can_negate = 0;
3950   int final_invert = 0;
3951   int i;
3952   int set_sign_bit_copies = 0;
3953   int clear_sign_bit_copies = 0;
3954   int clear_zero_bit_copies = 0;
3955   int set_zero_bit_copies = 0;
3956   int insns = 0, neg_insns, inv_insns;
3957   unsigned HOST_WIDE_INT temp1, temp2;
3958   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3959   struct four_ints *immediates;
3960   struct four_ints pos_immediates, neg_immediates, inv_immediates;
3961
3962   /* Find out which operations are safe for a given CODE.  Also do a quick
3963      check for degenerate cases; these can occur when DImode operations
3964      are split.  */
3965   switch (code)
3966     {
3967     case SET:
3968       can_invert = 1;
3969       break;
3970
3971     case PLUS:
3972       can_negate = 1;
3973       break;
3974
3975     case IOR:
3976       if (remainder == 0xffffffff)
3977         {
3978           if (generate)
3979             emit_constant_insn (cond,
3980                                 gen_rtx_SET (VOIDmode, target,
3981                                              GEN_INT (ARM_SIGN_EXTEND (val))));
3982           return 1;
3983         }
3984
3985       if (remainder == 0)
3986         {
3987           if (reload_completed && rtx_equal_p (target, source))
3988             return 0;
3989
3990           if (generate)
3991             emit_constant_insn (cond,
3992                                 gen_rtx_SET (VOIDmode, target, source));
3993           return 1;
3994         }
3995       break;
3996
3997     case AND:
3998       if (remainder == 0)
3999         {
4000           if (generate)
4001             emit_constant_insn (cond,
4002                                 gen_rtx_SET (VOIDmode, target, const0_rtx));
4003           return 1;
4004         }
4005       if (remainder == 0xffffffff)
4006         {
4007           if (reload_completed && rtx_equal_p (target, source))
4008             return 0;
4009           if (generate)
4010             emit_constant_insn (cond,
4011                                 gen_rtx_SET (VOIDmode, target, source));
4012           return 1;
4013         }
4014       can_invert = 1;
4015       break;
4016
4017     case XOR:
4018       if (remainder == 0)
4019         {
4020           if (reload_completed && rtx_equal_p (target, source))
4021             return 0;
4022           if (generate)
4023             emit_constant_insn (cond,
4024                                 gen_rtx_SET (VOIDmode, target, source));
4025           return 1;
4026         }
4027
4028       if (remainder == 0xffffffff)
4029         {
4030           if (generate)
4031             emit_constant_insn (cond,
4032                                 gen_rtx_SET (VOIDmode, target,
4033                                              gen_rtx_NOT (mode, source)));
4034           return 1;
4035         }
4036       final_invert = 1;
4037       break;
4038
4039     case MINUS:
4040       /* We treat MINUS as (val - source), since (source - val) is always
4041          passed as (source + (-val)).  */
4042       if (remainder == 0)
4043         {
4044           if (generate)
4045             emit_constant_insn (cond,
4046                                 gen_rtx_SET (VOIDmode, target,
4047                                              gen_rtx_NEG (mode, source)));
4048           return 1;
4049         }
4050       if (const_ok_for_arm (val))
4051         {
4052           if (generate)
4053             emit_constant_insn (cond,
4054                                 gen_rtx_SET (VOIDmode, target,
4055                                              gen_rtx_MINUS (mode, GEN_INT (val),
4056                                                             source)));
4057           return 1;
4058         }
4059
4060       break;
4061
4062     default:
4063       gcc_unreachable ();
4064     }
4065
4066   /* If we can do it in one insn get out quickly.  */
4067   if (const_ok_for_op (val, code))
4068     {
4069       if (generate)
4070         emit_constant_insn (cond,
4071                             gen_rtx_SET (VOIDmode, target,
4072                                          (source
4073                                           ? gen_rtx_fmt_ee (code, mode, source,
4074                                                             GEN_INT (val))
4075                                           : GEN_INT (val))));
4076       return 1;
4077     }
4078
4079   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4080      insn.  */
4081   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4082       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4083     {
4084       if (generate)
4085         {
4086           if (mode == SImode && i == 16)
4087             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4088                smaller insn.  */
4089             emit_constant_insn (cond,
4090                                 gen_zero_extendhisi2
4091                                 (target, gen_lowpart (HImode, source)));
4092           else
4093             /* Extz only supports SImode, but we can coerce the operands
4094                into that mode.  */
4095             emit_constant_insn (cond,
4096                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4097                                               gen_lowpart (SImode, source),
4098                                               GEN_INT (i), const0_rtx));
4099         }
4100
4101       return 1;
4102     }
4103
4104   /* Calculate a few attributes that may be useful for specific
4105      optimizations.  */
4106   /* Count number of leading zeros.  */
4107   for (i = 31; i >= 0; i--)
4108     {
4109       if ((remainder & (1 << i)) == 0)
4110         clear_sign_bit_copies++;
4111       else
4112         break;
4113     }
4114
4115   /* Count number of leading 1's.  */
4116   for (i = 31; i >= 0; i--)
4117     {
4118       if ((remainder & (1 << i)) != 0)
4119         set_sign_bit_copies++;
4120       else
4121         break;
4122     }
4123
4124   /* Count number of trailing zero's.  */
4125   for (i = 0; i <= 31; i++)
4126     {
4127       if ((remainder & (1 << i)) == 0)
4128         clear_zero_bit_copies++;
4129       else
4130         break;
4131     }
4132
4133   /* Count number of trailing 1's.  */
4134   for (i = 0; i <= 31; i++)
4135     {
4136       if ((remainder & (1 << i)) != 0)
4137         set_zero_bit_copies++;
4138       else
4139         break;
4140     }
4141
4142   switch (code)
4143     {
4144     case SET:
4145       /* See if we can do this by sign_extending a constant that is known
4146          to be negative.  This is a good, way of doing it, since the shift
4147          may well merge into a subsequent insn.  */
4148       if (set_sign_bit_copies > 1)
4149         {
4150           if (const_ok_for_arm
4151               (temp1 = ARM_SIGN_EXTEND (remainder
4152                                         << (set_sign_bit_copies - 1))))
4153             {
4154               if (generate)
4155                 {
4156                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4157                   emit_constant_insn (cond,
4158                                       gen_rtx_SET (VOIDmode, new_src,
4159                                                    GEN_INT (temp1)));
4160                   emit_constant_insn (cond,
4161                                       gen_ashrsi3 (target, new_src,
4162                                                    GEN_INT (set_sign_bit_copies - 1)));
4163                 }
4164               return 2;
4165             }
4166           /* For an inverted constant, we will need to set the low bits,
4167              these will be shifted out of harm's way.  */
4168           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4169           if (const_ok_for_arm (~temp1))
4170             {
4171               if (generate)
4172                 {
4173                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4174                   emit_constant_insn (cond,
4175                                       gen_rtx_SET (VOIDmode, new_src,
4176                                                    GEN_INT (temp1)));
4177                   emit_constant_insn (cond,
4178                                       gen_ashrsi3 (target, new_src,
4179                                                    GEN_INT (set_sign_bit_copies - 1)));
4180                 }
4181               return 2;
4182             }
4183         }
4184
4185       /* See if we can calculate the value as the difference between two
4186          valid immediates.  */
4187       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4188         {
4189           int topshift = clear_sign_bit_copies & ~1;
4190
4191           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4192                                    & (0xff000000 >> topshift));
4193
4194           /* If temp1 is zero, then that means the 9 most significant
4195              bits of remainder were 1 and we've caused it to overflow.
4196              When topshift is 0 we don't need to do anything since we
4197              can borrow from 'bit 32'.  */
4198           if (temp1 == 0 && topshift != 0)
4199             temp1 = 0x80000000 >> (topshift - 1);
4200
4201           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4202
4203           if (const_ok_for_arm (temp2))
4204             {
4205               if (generate)
4206                 {
4207                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4208                   emit_constant_insn (cond,
4209                                       gen_rtx_SET (VOIDmode, new_src,
4210                                                    GEN_INT (temp1)));
4211                   emit_constant_insn (cond,
4212                                       gen_addsi3 (target, new_src,
4213                                                   GEN_INT (-temp2)));
4214                 }
4215
4216               return 2;
4217             }
4218         }
4219
4220       /* See if we can generate this by setting the bottom (or the top)
4221          16 bits, and then shifting these into the other half of the
4222          word.  We only look for the simplest cases, to do more would cost
4223          too much.  Be careful, however, not to generate this when the
4224          alternative would take fewer insns.  */
4225       if (val & 0xffff0000)
4226         {
4227           temp1 = remainder & 0xffff0000;
4228           temp2 = remainder & 0x0000ffff;
4229
4230           /* Overlaps outside this range are best done using other methods.  */
4231           for (i = 9; i < 24; i++)
4232             {
4233               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4234                   && !const_ok_for_arm (temp2))
4235                 {
4236                   rtx new_src = (subtargets
4237                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4238                                  : target);
4239                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4240                                             source, subtargets, generate);
4241                   source = new_src;
4242                   if (generate)
4243                     emit_constant_insn
4244                       (cond,
4245                        gen_rtx_SET
4246                        (VOIDmode, target,
4247                         gen_rtx_IOR (mode,
4248                                      gen_rtx_ASHIFT (mode, source,
4249                                                      GEN_INT (i)),
4250                                      source)));
4251                   return insns + 1;
4252                 }
4253             }
4254
4255           /* Don't duplicate cases already considered.  */
4256           for (i = 17; i < 24; i++)
4257             {
4258               if (((temp1 | (temp1 >> i)) == remainder)
4259                   && !const_ok_for_arm (temp1))
4260                 {
4261                   rtx new_src = (subtargets
4262                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4263                                  : target);
4264                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4265                                             source, subtargets, generate);
4266                   source = new_src;
4267                   if (generate)
4268                     emit_constant_insn
4269                       (cond,
4270                        gen_rtx_SET (VOIDmode, target,
4271                                     gen_rtx_IOR
4272                                     (mode,
4273                                      gen_rtx_LSHIFTRT (mode, source,
4274                                                        GEN_INT (i)),
4275                                      source)));
4276                   return insns + 1;
4277                 }
4278             }
4279         }
4280       break;
4281
4282     case IOR:
4283     case XOR:
4284       /* If we have IOR or XOR, and the constant can be loaded in a
4285          single instruction, and we can find a temporary to put it in,
4286          then this can be done in two instructions instead of 3-4.  */
4287       if (subtargets
4288           /* TARGET can't be NULL if SUBTARGETS is 0 */
4289           || (reload_completed && !reg_mentioned_p (target, source)))
4290         {
4291           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4292             {
4293               if (generate)
4294                 {
4295                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4296
4297                   emit_constant_insn (cond,
4298                                       gen_rtx_SET (VOIDmode, sub,
4299                                                    GEN_INT (val)));
4300                   emit_constant_insn (cond,
4301                                       gen_rtx_SET (VOIDmode, target,
4302                                                    gen_rtx_fmt_ee (code, mode,
4303                                                                    source, sub)));
4304                 }
4305               return 2;
4306             }
4307         }
4308
4309       if (code == XOR)
4310         break;
4311
4312       /*  Convert.
4313           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4314                              and the remainder 0s for e.g. 0xfff00000)
4315           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4316
4317           This can be done in 2 instructions by using shifts with mov or mvn.
4318           e.g. for
4319           x = x | 0xfff00000;
4320           we generate.
4321           mvn   r0, r0, asl #12
4322           mvn   r0, r0, lsr #12  */
4323       if (set_sign_bit_copies > 8
4324           && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4325         {
4326           if (generate)
4327             {
4328               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4329               rtx shift = GEN_INT (set_sign_bit_copies);
4330
4331               emit_constant_insn
4332                 (cond,
4333                  gen_rtx_SET (VOIDmode, sub,
4334                               gen_rtx_NOT (mode,
4335                                            gen_rtx_ASHIFT (mode,
4336                                                            source,
4337                                                            shift))));
4338               emit_constant_insn
4339                 (cond,
4340                  gen_rtx_SET (VOIDmode, target,
4341                               gen_rtx_NOT (mode,
4342                                            gen_rtx_LSHIFTRT (mode, sub,
4343                                                              shift))));
4344             }
4345           return 2;
4346         }
4347
4348       /* Convert
4349           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4350            to
4351           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4352
4353           For eg. r0 = r0 | 0xfff
4354                mvn      r0, r0, lsr #12
4355                mvn      r0, r0, asl #12
4356
4357       */
4358       if (set_zero_bit_copies > 8
4359           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4360         {
4361           if (generate)
4362             {
4363               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4364               rtx shift = GEN_INT (set_zero_bit_copies);
4365
4366               emit_constant_insn
4367                 (cond,
4368                  gen_rtx_SET (VOIDmode, sub,
4369                               gen_rtx_NOT (mode,
4370                                            gen_rtx_LSHIFTRT (mode,
4371                                                              source,
4372                                                              shift))));
4373               emit_constant_insn
4374                 (cond,
4375                  gen_rtx_SET (VOIDmode, target,
4376                               gen_rtx_NOT (mode,
4377                                            gen_rtx_ASHIFT (mode, sub,
4378                                                            shift))));
4379             }
4380           return 2;
4381         }
4382
4383       /* This will never be reached for Thumb2 because orn is a valid
4384          instruction. This is for Thumb1 and the ARM 32 bit cases.
4385
4386          x = y | constant (such that ~constant is a valid constant)
4387          Transform this to
4388          x = ~(~y & ~constant).
4389       */
4390       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4391         {
4392           if (generate)
4393             {
4394               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4395               emit_constant_insn (cond,
4396                                   gen_rtx_SET (VOIDmode, sub,
4397                                                gen_rtx_NOT (mode, source)));
4398               source = sub;
4399               if (subtargets)
4400                 sub = gen_reg_rtx (mode);
4401               emit_constant_insn (cond,
4402                                   gen_rtx_SET (VOIDmode, sub,
4403                                                gen_rtx_AND (mode, source,
4404                                                             GEN_INT (temp1))));
4405               emit_constant_insn (cond,
4406                                   gen_rtx_SET (VOIDmode, target,
4407                                                gen_rtx_NOT (mode, sub)));
4408             }
4409           return 3;
4410         }
4411       break;
4412
4413     case AND:
4414       /* See if two shifts will do 2 or more insn's worth of work.  */
4415       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4416         {
4417           HOST_WIDE_INT shift_mask = ((0xffffffff
4418                                        << (32 - clear_sign_bit_copies))
4419                                       & 0xffffffff);
4420
4421           if ((remainder | shift_mask) != 0xffffffff)
4422             {
4423               if (generate)
4424                 {
4425                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4426                   insns = arm_gen_constant (AND, mode, cond,
4427                                             remainder | shift_mask,
4428                                             new_src, source, subtargets, 1);
4429                   source = new_src;
4430                 }
4431               else
4432                 {
4433                   rtx targ = subtargets ? NULL_RTX : target;
4434                   insns = arm_gen_constant (AND, mode, cond,
4435                                             remainder | shift_mask,
4436                                             targ, source, subtargets, 0);
4437                 }
4438             }
4439
4440           if (generate)
4441             {
4442               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4443               rtx shift = GEN_INT (clear_sign_bit_copies);
4444
4445               emit_insn (gen_ashlsi3 (new_src, source, shift));
4446               emit_insn (gen_lshrsi3 (target, new_src, shift));
4447             }
4448
4449           return insns + 2;
4450         }
4451
4452       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4453         {
4454           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4455
4456           if ((remainder | shift_mask) != 0xffffffff)
4457             {
4458               if (generate)
4459                 {
4460                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4461
4462                   insns = arm_gen_constant (AND, mode, cond,
4463                                             remainder | shift_mask,
4464                                             new_src, source, subtargets, 1);
4465                   source = new_src;
4466                 }
4467               else
4468                 {
4469                   rtx targ = subtargets ? NULL_RTX : target;
4470
4471                   insns = arm_gen_constant (AND, mode, cond,
4472                                             remainder | shift_mask,
4473                                             targ, source, subtargets, 0);
4474                 }
4475             }
4476
4477           if (generate)
4478             {
4479               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4480               rtx shift = GEN_INT (clear_zero_bit_copies);
4481
4482               emit_insn (gen_lshrsi3 (new_src, source, shift));
4483               emit_insn (gen_ashlsi3 (target, new_src, shift));
4484             }
4485
4486           return insns + 2;
4487         }
4488
4489       break;
4490
4491     default:
4492       break;
4493     }
4494
4495   /* Calculate what the instruction sequences would be if we generated it
4496      normally, negated, or inverted.  */
4497   if (code == AND)
4498     /* AND cannot be split into multiple insns, so invert and use BIC.  */
4499     insns = 99;
4500   else
4501     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4502
4503   if (can_negate)
4504     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4505                                             &neg_immediates);
4506   else
4507     neg_insns = 99;
4508
4509   if (can_invert || final_invert)
4510     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4511                                             &inv_immediates);
4512   else
4513     inv_insns = 99;
4514
4515   immediates = &pos_immediates;
4516
4517   /* Is the negated immediate sequence more efficient?  */
4518   if (neg_insns < insns && neg_insns <= inv_insns)
4519     {
4520       insns = neg_insns;
4521       immediates = &neg_immediates;
4522     }
4523   else
4524     can_negate = 0;
4525
4526   /* Is the inverted immediate sequence more efficient?
4527      We must allow for an extra NOT instruction for XOR operations, although
4528      there is some chance that the final 'mvn' will get optimized later.  */
4529   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4530     {
4531       insns = inv_insns;
4532       immediates = &inv_immediates;
4533     }
4534   else
4535     {
4536       can_invert = 0;
4537       final_invert = 0;
4538     }
4539
4540   /* Now output the chosen sequence as instructions.  */
4541   if (generate)
4542     {
4543       for (i = 0; i < insns; i++)
4544         {
4545           rtx new_src, temp1_rtx;
4546
4547           temp1 = immediates->i[i];
4548
4549           if (code == SET || code == MINUS)
4550             new_src = (subtargets ? gen_reg_rtx (mode) : target);
4551           else if ((final_invert || i < (insns - 1)) && subtargets)
4552             new_src = gen_reg_rtx (mode);
4553           else
4554             new_src = target;
4555
4556           if (can_invert)
4557             temp1 = ~temp1;
4558           else if (can_negate)
4559             temp1 = -temp1;
4560
4561           temp1 = trunc_int_for_mode (temp1, mode);
4562           temp1_rtx = GEN_INT (temp1);
4563
4564           if (code == SET)
4565             ;
4566           else if (code == MINUS)
4567             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4568           else
4569             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4570
4571           emit_constant_insn (cond,
4572                               gen_rtx_SET (VOIDmode, new_src,
4573                                            temp1_rtx));
4574           source = new_src;
4575
4576           if (code == SET)
4577             {
4578               can_negate = can_invert;
4579               can_invert = 0;
4580               code = PLUS;
4581             }
4582           else if (code == MINUS)
4583             code = PLUS;
4584         }
4585     }
4586
4587   if (final_invert)
4588     {
4589       if (generate)
4590         emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4591                                                gen_rtx_NOT (mode, source)));
4592       insns++;
4593     }
4594
4595   return insns;
4596 }
4597
4598 /* Canonicalize a comparison so that we are more likely to recognize it.
4599    This can be done for a few constant compares, where we can make the
4600    immediate value easier to load.  */
4601
4602 static void
4603 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4604                              bool op0_preserve_value)
4605 {
4606   machine_mode mode;
4607   unsigned HOST_WIDE_INT i, maxval;
4608
4609   mode = GET_MODE (*op0);
4610   if (mode == VOIDmode)
4611     mode = GET_MODE (*op1);
4612
4613   maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4614
4615   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
4616      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
4617      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
4618      for GTU/LEU in Thumb mode.  */
4619   if (mode == DImode)
4620     {
4621
4622       if (*code == GT || *code == LE
4623           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4624         {
4625           /* Missing comparison.  First try to use an available
4626              comparison.  */
4627           if (CONST_INT_P (*op1))
4628             {
4629               i = INTVAL (*op1);
4630               switch (*code)
4631                 {
4632                 case GT:
4633                 case LE:
4634                   if (i != maxval
4635                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4636                     {
4637                       *op1 = GEN_INT (i + 1);
4638                       *code = *code == GT ? GE : LT;
4639                       return;
4640                     }
4641                   break;
4642                 case GTU:
4643                 case LEU:
4644                   if (i != ~((unsigned HOST_WIDE_INT) 0)
4645                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4646                     {
4647                       *op1 = GEN_INT (i + 1);
4648                       *code = *code == GTU ? GEU : LTU;
4649                       return;
4650                     }
4651                   break;
4652                 default:
4653                   gcc_unreachable ();
4654                 }
4655             }
4656
4657           /* If that did not work, reverse the condition.  */
4658           if (!op0_preserve_value)
4659             {
4660               std::swap (*op0, *op1);
4661               *code = (int)swap_condition ((enum rtx_code)*code);
4662             }
4663         }
4664       return;
4665     }
4666
4667   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4668      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4669      to facilitate possible combining with a cmp into 'ands'.  */
4670   if (mode == SImode
4671       && GET_CODE (*op0) == ZERO_EXTEND
4672       && GET_CODE (XEXP (*op0, 0)) == SUBREG
4673       && GET_MODE (XEXP (*op0, 0)) == QImode
4674       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4675       && subreg_lowpart_p (XEXP (*op0, 0))
4676       && *op1 == const0_rtx)
4677     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4678                         GEN_INT (255));
4679
4680   /* Comparisons smaller than DImode.  Only adjust comparisons against
4681      an out-of-range constant.  */
4682   if (!CONST_INT_P (*op1)
4683       || const_ok_for_arm (INTVAL (*op1))
4684       || const_ok_for_arm (- INTVAL (*op1)))
4685     return;
4686
4687   i = INTVAL (*op1);
4688
4689   switch (*code)
4690     {
4691     case EQ:
4692     case NE:
4693       return;
4694
4695     case GT:
4696     case LE:
4697       if (i != maxval
4698           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4699         {
4700           *op1 = GEN_INT (i + 1);
4701           *code = *code == GT ? GE : LT;
4702           return;
4703         }
4704       break;
4705
4706     case GE:
4707     case LT:
4708       if (i != ~maxval
4709           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4710         {
4711           *op1 = GEN_INT (i - 1);
4712           *code = *code == GE ? GT : LE;
4713           return;
4714         }
4715       break;
4716
4717     case GTU:
4718     case LEU:
4719       if (i != ~((unsigned HOST_WIDE_INT) 0)
4720           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4721         {
4722           *op1 = GEN_INT (i + 1);
4723           *code = *code == GTU ? GEU : LTU;
4724           return;
4725         }
4726       break;
4727
4728     case GEU:
4729     case LTU:
4730       if (i != 0
4731           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4732         {
4733           *op1 = GEN_INT (i - 1);
4734           *code = *code == GEU ? GTU : LEU;
4735           return;
4736         }
4737       break;
4738
4739     default:
4740       gcc_unreachable ();
4741     }
4742 }
4743
4744
4745 /* Define how to find the value returned by a function.  */
4746
4747 static rtx
4748 arm_function_value(const_tree type, const_tree func,
4749                    bool outgoing ATTRIBUTE_UNUSED)
4750 {
4751   machine_mode mode;
4752   int unsignedp ATTRIBUTE_UNUSED;
4753   rtx r ATTRIBUTE_UNUSED;
4754
4755   mode = TYPE_MODE (type);
4756
4757   if (TARGET_AAPCS_BASED)
4758     return aapcs_allocate_return_reg (mode, type, func);
4759
4760   /* Promote integer types.  */
4761   if (INTEGRAL_TYPE_P (type))
4762     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4763
4764   /* Promotes small structs returned in a register to full-word size
4765      for big-endian AAPCS.  */
4766   if (arm_return_in_msb (type))
4767     {
4768       HOST_WIDE_INT size = int_size_in_bytes (type);
4769       if (size % UNITS_PER_WORD != 0)
4770         {
4771           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4772           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4773         }
4774     }
4775
4776   return arm_libcall_value_1 (mode);
4777 }
4778
4779 /* libcall hashtable helpers.  */
4780
4781 struct libcall_hasher : typed_noop_remove <rtx_def>
4782 {
4783   typedef rtx_def value_type;
4784   typedef rtx_def compare_type;
4785   static inline hashval_t hash (const value_type *);
4786   static inline bool equal (const value_type *, const compare_type *);
4787   static inline void remove (value_type *);
4788 };
4789
4790 inline bool
4791 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4792 {
4793   return rtx_equal_p (p1, p2);
4794 }
4795
4796 inline hashval_t
4797 libcall_hasher::hash (const value_type *p1)
4798 {
4799   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4800 }
4801
4802 typedef hash_table<libcall_hasher> libcall_table_type;
4803
4804 static void
4805 add_libcall (libcall_table_type *htab, rtx libcall)
4806 {
4807   *htab->find_slot (libcall, INSERT) = libcall;
4808 }
4809
4810 static bool
4811 arm_libcall_uses_aapcs_base (const_rtx libcall)
4812 {
4813   static bool init_done = false;
4814   static libcall_table_type *libcall_htab = NULL;
4815
4816   if (!init_done)
4817     {
4818       init_done = true;
4819
4820       libcall_htab = new libcall_table_type (31);
4821       add_libcall (libcall_htab,
4822                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4823       add_libcall (libcall_htab,
4824                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4825       add_libcall (libcall_htab,
4826                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4827       add_libcall (libcall_htab,
4828                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4829
4830       add_libcall (libcall_htab,
4831                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4832       add_libcall (libcall_htab,
4833                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4834       add_libcall (libcall_htab,
4835                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4836       add_libcall (libcall_htab,
4837                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4838
4839       add_libcall (libcall_htab,
4840                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
4841       add_libcall (libcall_htab,
4842                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4843       add_libcall (libcall_htab,
4844                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
4845       add_libcall (libcall_htab,
4846                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
4847       add_libcall (libcall_htab,
4848                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
4849       add_libcall (libcall_htab,
4850                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
4851       add_libcall (libcall_htab,
4852                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
4853       add_libcall (libcall_htab,
4854                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
4855
4856       /* Values from double-precision helper functions are returned in core
4857          registers if the selected core only supports single-precision
4858          arithmetic, even if we are using the hard-float ABI.  The same is
4859          true for single-precision helpers, but we will never be using the
4860          hard-float ABI on a CPU which doesn't support single-precision
4861          operations in hardware.  */
4862       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4863       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4864       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4865       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4866       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4867       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4868       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4869       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4870       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4871       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4872       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4873       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4874                                                         SFmode));
4875       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4876                                                         DFmode));
4877     }
4878
4879   return libcall && libcall_htab->find (libcall) != NULL;
4880 }
4881
4882 static rtx
4883 arm_libcall_value_1 (machine_mode mode)
4884 {
4885   if (TARGET_AAPCS_BASED)
4886     return aapcs_libcall_value (mode);
4887   else if (TARGET_IWMMXT_ABI
4888            && arm_vector_mode_supported_p (mode))
4889     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4890   else
4891     return gen_rtx_REG (mode, ARG_REGISTER (1));
4892 }
4893
4894 /* Define how to find the value returned by a library function
4895    assuming the value has mode MODE.  */
4896
4897 static rtx
4898 arm_libcall_value (machine_mode mode, const_rtx libcall)
4899 {
4900   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4901       && GET_MODE_CLASS (mode) == MODE_FLOAT)
4902     {
4903       /* The following libcalls return their result in integer registers,
4904          even though they return a floating point value.  */
4905       if (arm_libcall_uses_aapcs_base (libcall))
4906         return gen_rtx_REG (mode, ARG_REGISTER(1));
4907
4908     }
4909
4910   return arm_libcall_value_1 (mode);
4911 }
4912
4913 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
4914
4915 static bool
4916 arm_function_value_regno_p (const unsigned int regno)
4917 {
4918   if (regno == ARG_REGISTER (1)
4919       || (TARGET_32BIT
4920           && TARGET_AAPCS_BASED
4921           && TARGET_VFP
4922           && TARGET_HARD_FLOAT
4923           && regno == FIRST_VFP_REGNUM)
4924       || (TARGET_IWMMXT_ABI
4925           && regno == FIRST_IWMMXT_REGNUM))
4926     return true;
4927
4928   return false;
4929 }
4930
4931 /* Determine the amount of memory needed to store the possible return
4932    registers of an untyped call.  */
4933 int
4934 arm_apply_result_size (void)
4935 {
4936   int size = 16;
4937
4938   if (TARGET_32BIT)
4939     {
4940       if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4941         size += 32;
4942       if (TARGET_IWMMXT_ABI)
4943         size += 8;
4944     }
4945
4946   return size;
4947 }
4948
4949 /* Decide whether TYPE should be returned in memory (true)
4950    or in a register (false).  FNTYPE is the type of the function making
4951    the call.  */
4952 static bool
4953 arm_return_in_memory (const_tree type, const_tree fntype)
4954 {
4955   HOST_WIDE_INT size;
4956
4957   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
4958
4959   if (TARGET_AAPCS_BASED)
4960     {
4961       /* Simple, non-aggregate types (ie not including vectors and
4962          complex) are always returned in a register (or registers).
4963          We don't care about which register here, so we can short-cut
4964          some of the detail.  */
4965       if (!AGGREGATE_TYPE_P (type)
4966           && TREE_CODE (type) != VECTOR_TYPE
4967           && TREE_CODE (type) != COMPLEX_TYPE)
4968         return false;
4969
4970       /* Any return value that is no larger than one word can be
4971          returned in r0.  */
4972       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4973         return false;
4974
4975       /* Check any available co-processors to see if they accept the
4976          type as a register candidate (VFP, for example, can return
4977          some aggregates in consecutive registers).  These aren't
4978          available if the call is variadic.  */
4979       if (aapcs_select_return_coproc (type, fntype) >= 0)
4980         return false;
4981
4982       /* Vector values should be returned using ARM registers, not
4983          memory (unless they're over 16 bytes, which will break since
4984          we only have four call-clobbered registers to play with).  */
4985       if (TREE_CODE (type) == VECTOR_TYPE)
4986         return (size < 0 || size > (4 * UNITS_PER_WORD));
4987
4988       /* The rest go in memory.  */
4989       return true;
4990     }
4991
4992   if (TREE_CODE (type) == VECTOR_TYPE)
4993     return (size < 0 || size > (4 * UNITS_PER_WORD));
4994
4995   if (!AGGREGATE_TYPE_P (type) &&
4996       (TREE_CODE (type) != VECTOR_TYPE))
4997     /* All simple types are returned in registers.  */
4998     return false;
4999
5000   if (arm_abi != ARM_ABI_APCS)
5001     {
5002       /* ATPCS and later return aggregate types in memory only if they are
5003          larger than a word (or are variable size).  */
5004       return (size < 0 || size > UNITS_PER_WORD);
5005     }
5006
5007   /* For the arm-wince targets we choose to be compatible with Microsoft's
5008      ARM and Thumb compilers, which always return aggregates in memory.  */
5009 #ifndef ARM_WINCE
5010   /* All structures/unions bigger than one word are returned in memory.
5011      Also catch the case where int_size_in_bytes returns -1.  In this case
5012      the aggregate is either huge or of variable size, and in either case
5013      we will want to return it via memory and not in a register.  */
5014   if (size < 0 || size > UNITS_PER_WORD)
5015     return true;
5016
5017   if (TREE_CODE (type) == RECORD_TYPE)
5018     {
5019       tree field;
5020
5021       /* For a struct the APCS says that we only return in a register
5022          if the type is 'integer like' and every addressable element
5023          has an offset of zero.  For practical purposes this means
5024          that the structure can have at most one non bit-field element
5025          and that this element must be the first one in the structure.  */
5026
5027       /* Find the first field, ignoring non FIELD_DECL things which will
5028          have been created by C++.  */
5029       for (field = TYPE_FIELDS (type);
5030            field && TREE_CODE (field) != FIELD_DECL;
5031            field = DECL_CHAIN (field))
5032         continue;
5033
5034       if (field == NULL)
5035         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5036
5037       /* Check that the first field is valid for returning in a register.  */
5038
5039       /* ... Floats are not allowed */
5040       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5041         return true;
5042
5043       /* ... Aggregates that are not themselves valid for returning in
5044          a register are not allowed.  */
5045       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5046         return true;
5047
5048       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5049          since they are not addressable.  */
5050       for (field = DECL_CHAIN (field);
5051            field;
5052            field = DECL_CHAIN (field))
5053         {
5054           if (TREE_CODE (field) != FIELD_DECL)
5055             continue;
5056
5057           if (!DECL_BIT_FIELD_TYPE (field))
5058             return true;
5059         }
5060
5061       return false;
5062     }
5063
5064   if (TREE_CODE (type) == UNION_TYPE)
5065     {
5066       tree field;
5067
5068       /* Unions can be returned in registers if every element is
5069          integral, or can be returned in an integer register.  */
5070       for (field = TYPE_FIELDS (type);
5071            field;
5072            field = DECL_CHAIN (field))
5073         {
5074           if (TREE_CODE (field) != FIELD_DECL)
5075             continue;
5076
5077           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5078             return true;
5079
5080           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5081             return true;
5082         }
5083
5084       return false;
5085     }
5086 #endif /* not ARM_WINCE */
5087
5088   /* Return all other types in memory.  */
5089   return true;
5090 }
5091
5092 const struct pcs_attribute_arg
5093 {
5094   const char *arg;
5095   enum arm_pcs value;
5096 } pcs_attribute_args[] =
5097   {
5098     {"aapcs", ARM_PCS_AAPCS},
5099     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5100 #if 0
5101     /* We could recognize these, but changes would be needed elsewhere
5102      * to implement them.  */
5103     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5104     {"atpcs", ARM_PCS_ATPCS},
5105     {"apcs", ARM_PCS_APCS},
5106 #endif
5107     {NULL, ARM_PCS_UNKNOWN}
5108   };
5109
5110 static enum arm_pcs
5111 arm_pcs_from_attribute (tree attr)
5112 {
5113   const struct pcs_attribute_arg *ptr;
5114   const char *arg;
5115
5116   /* Get the value of the argument.  */
5117   if (TREE_VALUE (attr) == NULL_TREE
5118       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5119     return ARM_PCS_UNKNOWN;
5120
5121   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5122
5123   /* Check it against the list of known arguments.  */
5124   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5125     if (streq (arg, ptr->arg))
5126       return ptr->value;
5127
5128   /* An unrecognized interrupt type.  */
5129   return ARM_PCS_UNKNOWN;
5130 }
5131
5132 /* Get the PCS variant to use for this call.  TYPE is the function's type
5133    specification, DECL is the specific declartion.  DECL may be null if
5134    the call could be indirect or if this is a library call.  */
5135 static enum arm_pcs
5136 arm_get_pcs_model (const_tree type, const_tree decl)
5137 {
5138   bool user_convention = false;
5139   enum arm_pcs user_pcs = arm_pcs_default;
5140   tree attr;
5141
5142   gcc_assert (type);
5143
5144   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5145   if (attr)
5146     {
5147       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5148       user_convention = true;
5149     }
5150
5151   if (TARGET_AAPCS_BASED)
5152     {
5153       /* Detect varargs functions.  These always use the base rules
5154          (no argument is ever a candidate for a co-processor
5155          register).  */
5156       bool base_rules = stdarg_p (type);
5157
5158       if (user_convention)
5159         {
5160           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5161             sorry ("non-AAPCS derived PCS variant");
5162           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5163             error ("variadic functions must use the base AAPCS variant");
5164         }
5165
5166       if (base_rules)
5167         return ARM_PCS_AAPCS;
5168       else if (user_convention)
5169         return user_pcs;
5170       else if (decl && flag_unit_at_a_time)
5171         {
5172           /* Local functions never leak outside this compilation unit,
5173              so we are free to use whatever conventions are
5174              appropriate.  */
5175           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5176           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5177           if (i && i->local)
5178             return ARM_PCS_AAPCS_LOCAL;
5179         }
5180     }
5181   else if (user_convention && user_pcs != arm_pcs_default)
5182     sorry ("PCS variant");
5183
5184   /* For everything else we use the target's default.  */
5185   return arm_pcs_default;
5186 }
5187
5188
5189 static void
5190 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5191                     const_tree fntype ATTRIBUTE_UNUSED,
5192                     rtx libcall ATTRIBUTE_UNUSED,
5193                     const_tree fndecl ATTRIBUTE_UNUSED)
5194 {
5195   /* Record the unallocated VFP registers.  */
5196   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5197   pcum->aapcs_vfp_reg_alloc = 0;
5198 }
5199
5200 /* Walk down the type tree of TYPE counting consecutive base elements.
5201    If *MODEP is VOIDmode, then set it to the first valid floating point
5202    type.  If a non-floating point type is found, or if a floating point
5203    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5204    otherwise return the count in the sub-tree.  */
5205 static int
5206 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5207 {
5208   machine_mode mode;
5209   HOST_WIDE_INT size;
5210
5211   switch (TREE_CODE (type))
5212     {
5213     case REAL_TYPE:
5214       mode = TYPE_MODE (type);
5215       if (mode != DFmode && mode != SFmode)
5216         return -1;
5217
5218       if (*modep == VOIDmode)
5219         *modep = mode;
5220
5221       if (*modep == mode)
5222         return 1;
5223
5224       break;
5225
5226     case COMPLEX_TYPE:
5227       mode = TYPE_MODE (TREE_TYPE (type));
5228       if (mode != DFmode && mode != SFmode)
5229         return -1;
5230
5231       if (*modep == VOIDmode)
5232         *modep = mode;
5233
5234       if (*modep == mode)
5235         return 2;
5236
5237       break;
5238
5239     case VECTOR_TYPE:
5240       /* Use V2SImode and V4SImode as representatives of all 64-bit
5241          and 128-bit vector types, whether or not those modes are
5242          supported with the present options.  */
5243       size = int_size_in_bytes (type);
5244       switch (size)
5245         {
5246         case 8:
5247           mode = V2SImode;
5248           break;
5249         case 16:
5250           mode = V4SImode;
5251           break;
5252         default:
5253           return -1;
5254         }
5255
5256       if (*modep == VOIDmode)
5257         *modep = mode;
5258
5259       /* Vector modes are considered to be opaque: two vectors are
5260          equivalent for the purposes of being homogeneous aggregates
5261          if they are the same size.  */
5262       if (*modep == mode)
5263         return 1;
5264
5265       break;
5266
5267     case ARRAY_TYPE:
5268       {
5269         int count;
5270         tree index = TYPE_DOMAIN (type);
5271
5272         /* Can't handle incomplete types nor sizes that are not
5273            fixed.  */
5274         if (!COMPLETE_TYPE_P (type)
5275             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5276           return -1;
5277
5278         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5279         if (count == -1
5280             || !index
5281             || !TYPE_MAX_VALUE (index)
5282             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5283             || !TYPE_MIN_VALUE (index)
5284             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5285             || count < 0)
5286           return -1;
5287
5288         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5289                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5290
5291         /* There must be no padding.  */
5292         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5293           return -1;
5294
5295         return count;
5296       }
5297
5298     case RECORD_TYPE:
5299       {
5300         int count = 0;
5301         int sub_count;
5302         tree field;
5303
5304         /* Can't handle incomplete types nor sizes that are not
5305            fixed.  */
5306         if (!COMPLETE_TYPE_P (type)
5307             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5308           return -1;
5309
5310         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5311           {
5312             if (TREE_CODE (field) != FIELD_DECL)
5313               continue;
5314
5315             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5316             if (sub_count < 0)
5317               return -1;
5318             count += sub_count;
5319           }
5320
5321         /* There must be no padding.  */
5322         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5323           return -1;
5324
5325         return count;
5326       }
5327
5328     case UNION_TYPE:
5329     case QUAL_UNION_TYPE:
5330       {
5331         /* These aren't very interesting except in a degenerate case.  */
5332         int count = 0;
5333         int sub_count;
5334         tree field;
5335
5336         /* Can't handle incomplete types nor sizes that are not
5337            fixed.  */
5338         if (!COMPLETE_TYPE_P (type)
5339             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5340           return -1;
5341
5342         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5343           {
5344             if (TREE_CODE (field) != FIELD_DECL)
5345               continue;
5346
5347             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5348             if (sub_count < 0)
5349               return -1;
5350             count = count > sub_count ? count : sub_count;
5351           }
5352
5353         /* There must be no padding.  */
5354         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5355           return -1;
5356
5357         return count;
5358       }
5359
5360     default:
5361       break;
5362     }
5363
5364   return -1;
5365 }
5366
5367 /* Return true if PCS_VARIANT should use VFP registers.  */
5368 static bool
5369 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5370 {
5371   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5372     {
5373       static bool seen_thumb1_vfp = false;
5374
5375       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5376         {
5377           sorry ("Thumb-1 hard-float VFP ABI");
5378           /* sorry() is not immediately fatal, so only display this once.  */
5379           seen_thumb1_vfp = true;
5380         }
5381
5382       return true;
5383     }
5384
5385   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5386     return false;
5387
5388   return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5389           (TARGET_VFP_DOUBLE || !is_double));
5390 }
5391
5392 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5393    suitable for passing or returning in VFP registers for the PCS
5394    variant selected.  If it is, then *BASE_MODE is updated to contain
5395    a machine mode describing each element of the argument's type and
5396    *COUNT to hold the number of such elements.  */
5397 static bool
5398 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5399                                        machine_mode mode, const_tree type,
5400                                        machine_mode *base_mode, int *count)
5401 {
5402   machine_mode new_mode = VOIDmode;
5403
5404   /* If we have the type information, prefer that to working things
5405      out from the mode.  */
5406   if (type)
5407     {
5408       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5409
5410       if (ag_count > 0 && ag_count <= 4)
5411         *count = ag_count;
5412       else
5413         return false;
5414     }
5415   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5416            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5417            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5418     {
5419       *count = 1;
5420       new_mode = mode;
5421     }
5422   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5423     {
5424       *count = 2;
5425       new_mode = (mode == DCmode ? DFmode : SFmode);
5426     }
5427   else
5428     return false;
5429
5430
5431   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5432     return false;
5433
5434   *base_mode = new_mode;
5435   return true;
5436 }
5437
5438 static bool
5439 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5440                                machine_mode mode, const_tree type)
5441 {
5442   int count ATTRIBUTE_UNUSED;
5443   machine_mode ag_mode ATTRIBUTE_UNUSED;
5444
5445   if (!use_vfp_abi (pcs_variant, false))
5446     return false;
5447   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5448                                                 &ag_mode, &count);
5449 }
5450
5451 static bool
5452 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5453                              const_tree type)
5454 {
5455   if (!use_vfp_abi (pcum->pcs_variant, false))
5456     return false;
5457
5458   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5459                                                 &pcum->aapcs_vfp_rmode,
5460                                                 &pcum->aapcs_vfp_rcount);
5461 }
5462
5463 static bool
5464 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5465                     const_tree type  ATTRIBUTE_UNUSED)
5466 {
5467   int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5468   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5469   int regno;
5470
5471   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5472     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5473       {
5474         pcum->aapcs_vfp_reg_alloc = mask << regno;
5475         if (mode == BLKmode
5476             || (mode == TImode && ! TARGET_NEON)
5477             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5478           {
5479             int i;
5480             int rcount = pcum->aapcs_vfp_rcount;
5481             int rshift = shift;
5482             machine_mode rmode = pcum->aapcs_vfp_rmode;
5483             rtx par;
5484             if (!TARGET_NEON)
5485               {
5486                 /* Avoid using unsupported vector modes.  */
5487                 if (rmode == V2SImode)
5488                   rmode = DImode;
5489                 else if (rmode == V4SImode)
5490                   {
5491                     rmode = DImode;
5492                     rcount *= 2;
5493                     rshift /= 2;
5494                   }
5495               }
5496             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5497             for (i = 0; i < rcount; i++)
5498               {
5499                 rtx tmp = gen_rtx_REG (rmode,
5500                                        FIRST_VFP_REGNUM + regno + i * rshift);
5501                 tmp = gen_rtx_EXPR_LIST
5502                   (VOIDmode, tmp,
5503                    GEN_INT (i * GET_MODE_SIZE (rmode)));
5504                 XVECEXP (par, 0, i) = tmp;
5505               }
5506
5507             pcum->aapcs_reg = par;
5508           }
5509         else
5510           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5511         return true;
5512       }
5513   return false;
5514 }
5515
5516 static rtx
5517 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5518                                machine_mode mode,
5519                                const_tree type ATTRIBUTE_UNUSED)
5520 {
5521   if (!use_vfp_abi (pcs_variant, false))
5522     return NULL;
5523
5524   if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5525     {
5526       int count;
5527       machine_mode ag_mode;
5528       int i;
5529       rtx par;
5530       int shift;
5531
5532       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5533                                              &ag_mode, &count);
5534
5535       if (!TARGET_NEON)
5536         {
5537           if (ag_mode == V2SImode)
5538             ag_mode = DImode;
5539           else if (ag_mode == V4SImode)
5540             {
5541               ag_mode = DImode;
5542               count *= 2;
5543             }
5544         }
5545       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5546       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5547       for (i = 0; i < count; i++)
5548         {
5549           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5550           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5551                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5552           XVECEXP (par, 0, i) = tmp;
5553         }
5554
5555       return par;
5556     }
5557
5558   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5559 }
5560
5561 static void
5562 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5563                    machine_mode mode  ATTRIBUTE_UNUSED,
5564                    const_tree type  ATTRIBUTE_UNUSED)
5565 {
5566   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5567   pcum->aapcs_vfp_reg_alloc = 0;
5568   return;
5569 }
5570
5571 #define AAPCS_CP(X)                             \
5572   {                                             \
5573     aapcs_ ## X ## _cum_init,                   \
5574     aapcs_ ## X ## _is_call_candidate,          \
5575     aapcs_ ## X ## _allocate,                   \
5576     aapcs_ ## X ## _is_return_candidate,        \
5577     aapcs_ ## X ## _allocate_return_reg,        \
5578     aapcs_ ## X ## _advance                     \
5579   }
5580
5581 /* Table of co-processors that can be used to pass arguments in
5582    registers.  Idealy no arugment should be a candidate for more than
5583    one co-processor table entry, but the table is processed in order
5584    and stops after the first match.  If that entry then fails to put
5585    the argument into a co-processor register, the argument will go on
5586    the stack.  */
5587 static struct
5588 {
5589   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
5590   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5591
5592   /* Return true if an argument of mode MODE (or type TYPE if MODE is
5593      BLKmode) is a candidate for this co-processor's registers; this
5594      function should ignore any position-dependent state in
5595      CUMULATIVE_ARGS and only use call-type dependent information.  */
5596   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5597
5598   /* Return true if the argument does get a co-processor register; it
5599      should set aapcs_reg to an RTX of the register allocated as is
5600      required for a return from FUNCTION_ARG.  */
5601   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5602
5603   /* Return true if a result of mode MODE (or type TYPE if MODE is
5604      BLKmode) is can be returned in this co-processor's registers.  */
5605   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5606
5607   /* Allocate and return an RTX element to hold the return type of a
5608      call, this routine must not fail and will only be called if
5609      is_return_candidate returned true with the same parameters.  */
5610   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5611
5612   /* Finish processing this argument and prepare to start processing
5613      the next one.  */
5614   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5615 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5616   {
5617     AAPCS_CP(vfp)
5618   };
5619
5620 #undef AAPCS_CP
5621
5622 static int
5623 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5624                           const_tree type)
5625 {
5626   int i;
5627
5628   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5629     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5630       return i;
5631
5632   return -1;
5633 }
5634
5635 static int
5636 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5637 {
5638   /* We aren't passed a decl, so we can't check that a call is local.
5639      However, it isn't clear that that would be a win anyway, since it
5640      might limit some tail-calling opportunities.  */
5641   enum arm_pcs pcs_variant;
5642
5643   if (fntype)
5644     {
5645       const_tree fndecl = NULL_TREE;
5646
5647       if (TREE_CODE (fntype) == FUNCTION_DECL)
5648         {
5649           fndecl = fntype;
5650           fntype = TREE_TYPE (fntype);
5651         }
5652
5653       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5654     }
5655   else
5656     pcs_variant = arm_pcs_default;
5657
5658   if (pcs_variant != ARM_PCS_AAPCS)
5659     {
5660       int i;
5661
5662       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5663         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5664                                                         TYPE_MODE (type),
5665                                                         type))
5666           return i;
5667     }
5668   return -1;
5669 }
5670
5671 static rtx
5672 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5673                            const_tree fntype)
5674 {
5675   /* We aren't passed a decl, so we can't check that a call is local.
5676      However, it isn't clear that that would be a win anyway, since it
5677      might limit some tail-calling opportunities.  */
5678   enum arm_pcs pcs_variant;
5679   int unsignedp ATTRIBUTE_UNUSED;
5680
5681   if (fntype)
5682     {
5683       const_tree fndecl = NULL_TREE;
5684
5685       if (TREE_CODE (fntype) == FUNCTION_DECL)
5686         {
5687           fndecl = fntype;
5688           fntype = TREE_TYPE (fntype);
5689         }
5690
5691       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5692     }
5693   else
5694     pcs_variant = arm_pcs_default;
5695
5696   /* Promote integer types.  */
5697   if (type && INTEGRAL_TYPE_P (type))
5698     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5699
5700   if (pcs_variant != ARM_PCS_AAPCS)
5701     {
5702       int i;
5703
5704       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5705         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5706                                                         type))
5707           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5708                                                              mode, type);
5709     }
5710
5711   /* Promotes small structs returned in a register to full-word size
5712      for big-endian AAPCS.  */
5713   if (type && arm_return_in_msb (type))
5714     {
5715       HOST_WIDE_INT size = int_size_in_bytes (type);
5716       if (size % UNITS_PER_WORD != 0)
5717         {
5718           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5719           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5720         }
5721     }
5722
5723   return gen_rtx_REG (mode, R0_REGNUM);
5724 }
5725
5726 static rtx
5727 aapcs_libcall_value (machine_mode mode)
5728 {
5729   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5730       && GET_MODE_SIZE (mode) <= 4)
5731     mode = SImode;
5732
5733   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5734 }
5735
5736 /* Lay out a function argument using the AAPCS rules.  The rule
5737    numbers referred to here are those in the AAPCS.  */
5738 static void
5739 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5740                   const_tree type, bool named)
5741 {
5742   int nregs, nregs2;
5743   int ncrn;
5744
5745   /* We only need to do this once per argument.  */
5746   if (pcum->aapcs_arg_processed)
5747     return;
5748
5749   pcum->aapcs_arg_processed = true;
5750
5751   /* Special case: if named is false then we are handling an incoming
5752      anonymous argument which is on the stack.  */
5753   if (!named)
5754     return;
5755
5756   /* Is this a potential co-processor register candidate?  */
5757   if (pcum->pcs_variant != ARM_PCS_AAPCS)
5758     {
5759       int slot = aapcs_select_call_coproc (pcum, mode, type);
5760       pcum->aapcs_cprc_slot = slot;
5761
5762       /* We don't have to apply any of the rules from part B of the
5763          preparation phase, these are handled elsewhere in the
5764          compiler.  */
5765
5766       if (slot >= 0)
5767         {
5768           /* A Co-processor register candidate goes either in its own
5769              class of registers or on the stack.  */
5770           if (!pcum->aapcs_cprc_failed[slot])
5771             {
5772               /* C1.cp - Try to allocate the argument to co-processor
5773                  registers.  */
5774               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5775                 return;
5776
5777               /* C2.cp - Put the argument on the stack and note that we
5778                  can't assign any more candidates in this slot.  We also
5779                  need to note that we have allocated stack space, so that
5780                  we won't later try to split a non-cprc candidate between
5781                  core registers and the stack.  */
5782               pcum->aapcs_cprc_failed[slot] = true;
5783               pcum->can_split = false;
5784             }
5785
5786           /* We didn't get a register, so this argument goes on the
5787              stack.  */
5788           gcc_assert (pcum->can_split == false);
5789           return;
5790         }
5791     }
5792
5793   /* C3 - For double-word aligned arguments, round the NCRN up to the
5794      next even number.  */
5795   ncrn = pcum->aapcs_ncrn;
5796   if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5797     ncrn++;
5798
5799   nregs = ARM_NUM_REGS2(mode, type);
5800
5801   /* Sigh, this test should really assert that nregs > 0, but a GCC
5802      extension allows empty structs and then gives them empty size; it
5803      then allows such a structure to be passed by value.  For some of
5804      the code below we have to pretend that such an argument has
5805      non-zero size so that we 'locate' it correctly either in
5806      registers or on the stack.  */
5807   gcc_assert (nregs >= 0);
5808
5809   nregs2 = nregs ? nregs : 1;
5810
5811   /* C4 - Argument fits entirely in core registers.  */
5812   if (ncrn + nregs2 <= NUM_ARG_REGS)
5813     {
5814       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5815       pcum->aapcs_next_ncrn = ncrn + nregs;
5816       return;
5817     }
5818
5819   /* C5 - Some core registers left and there are no arguments already
5820      on the stack: split this argument between the remaining core
5821      registers and the stack.  */
5822   if (ncrn < NUM_ARG_REGS && pcum->can_split)
5823     {
5824       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5825       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5826       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5827       return;
5828     }
5829
5830   /* C6 - NCRN is set to 4.  */
5831   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5832
5833   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
5834   return;
5835 }
5836
5837 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5838    for a call to a function whose data type is FNTYPE.
5839    For a library call, FNTYPE is NULL.  */
5840 void
5841 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5842                           rtx libname,
5843                           tree fndecl ATTRIBUTE_UNUSED)
5844 {
5845   /* Long call handling.  */
5846   if (fntype)
5847     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5848   else
5849     pcum->pcs_variant = arm_pcs_default;
5850
5851   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5852     {
5853       if (arm_libcall_uses_aapcs_base (libname))
5854         pcum->pcs_variant = ARM_PCS_AAPCS;
5855
5856       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5857       pcum->aapcs_reg = NULL_RTX;
5858       pcum->aapcs_partial = 0;
5859       pcum->aapcs_arg_processed = false;
5860       pcum->aapcs_cprc_slot = -1;
5861       pcum->can_split = true;
5862
5863       if (pcum->pcs_variant != ARM_PCS_AAPCS)
5864         {
5865           int i;
5866
5867           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5868             {
5869               pcum->aapcs_cprc_failed[i] = false;
5870               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5871             }
5872         }
5873       return;
5874     }
5875
5876   /* Legacy ABIs */
5877
5878   /* On the ARM, the offset starts at 0.  */
5879   pcum->nregs = 0;
5880   pcum->iwmmxt_nregs = 0;
5881   pcum->can_split = true;
5882
5883   /* Varargs vectors are treated the same as long long.
5884      named_count avoids having to change the way arm handles 'named' */
5885   pcum->named_count = 0;
5886   pcum->nargs = 0;
5887
5888   if (TARGET_REALLY_IWMMXT && fntype)
5889     {
5890       tree fn_arg;
5891
5892       for (fn_arg = TYPE_ARG_TYPES (fntype);
5893            fn_arg;
5894            fn_arg = TREE_CHAIN (fn_arg))
5895         pcum->named_count += 1;
5896
5897       if (! pcum->named_count)
5898         pcum->named_count = INT_MAX;
5899     }
5900 }
5901
5902 /* Return true if we use LRA instead of reload pass.  */
5903 static bool
5904 arm_lra_p (void)
5905 {
5906   return arm_lra_flag;
5907 }
5908
5909 /* Return true if mode/type need doubleword alignment.  */
5910 static bool
5911 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5912 {
5913   return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5914           || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5915 }
5916
5917
5918 /* Determine where to put an argument to a function.
5919    Value is zero to push the argument on the stack,
5920    or a hard register in which to store the argument.
5921
5922    MODE is the argument's machine mode.
5923    TYPE is the data type of the argument (as a tree).
5924     This is null for libcalls where that information may
5925     not be available.
5926    CUM is a variable of type CUMULATIVE_ARGS which gives info about
5927     the preceding args and about the function being called.
5928    NAMED is nonzero if this argument is a named parameter
5929     (otherwise it is an extra parameter matching an ellipsis).
5930
5931    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5932    other arguments are passed on the stack.  If (NAMED == 0) (which happens
5933    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5934    defined), say it is passed in the stack (function_prologue will
5935    indeed make it pass in the stack if necessary).  */
5936
5937 static rtx
5938 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5939                   const_tree type, bool named)
5940 {
5941   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5942   int nregs;
5943
5944   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
5945      a call insn (op3 of a call_value insn).  */
5946   if (mode == VOIDmode)
5947     return const0_rtx;
5948
5949   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5950     {
5951       aapcs_layout_arg (pcum, mode, type, named);
5952       return pcum->aapcs_reg;
5953     }
5954
5955   /* Varargs vectors are treated the same as long long.
5956      named_count avoids having to change the way arm handles 'named' */
5957   if (TARGET_IWMMXT_ABI
5958       && arm_vector_mode_supported_p (mode)
5959       && pcum->named_count > pcum->nargs + 1)
5960     {
5961       if (pcum->iwmmxt_nregs <= 9)
5962         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5963       else
5964         {
5965           pcum->can_split = false;
5966           return NULL_RTX;
5967         }
5968     }
5969
5970   /* Put doubleword aligned quantities in even register pairs.  */
5971   if (pcum->nregs & 1
5972       && ARM_DOUBLEWORD_ALIGN
5973       && arm_needs_doubleword_align (mode, type))
5974     pcum->nregs++;
5975
5976   /* Only allow splitting an arg between regs and memory if all preceding
5977      args were allocated to regs.  For args passed by reference we only count
5978      the reference pointer.  */
5979   if (pcum->can_split)
5980     nregs = 1;
5981   else
5982     nregs = ARM_NUM_REGS2 (mode, type);
5983
5984   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5985     return NULL_RTX;
5986
5987   return gen_rtx_REG (mode, pcum->nregs);
5988 }
5989
5990 static unsigned int
5991 arm_function_arg_boundary (machine_mode mode, const_tree type)
5992 {
5993   return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5994           ? DOUBLEWORD_ALIGNMENT
5995           : PARM_BOUNDARY);
5996 }
5997
5998 static int
5999 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6000                        tree type, bool named)
6001 {
6002   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6003   int nregs = pcum->nregs;
6004
6005   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6006     {
6007       aapcs_layout_arg (pcum, mode, type, named);
6008       return pcum->aapcs_partial;
6009     }
6010
6011   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6012     return 0;
6013
6014   if (NUM_ARG_REGS > nregs
6015       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6016       && pcum->can_split)
6017     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6018
6019   return 0;
6020 }
6021
6022 /* Update the data in PCUM to advance over an argument
6023    of mode MODE and data type TYPE.
6024    (TYPE is null for libcalls where that information may not be available.)  */
6025
6026 static void
6027 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6028                           const_tree type, bool named)
6029 {
6030   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6031
6032   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6033     {
6034       aapcs_layout_arg (pcum, mode, type, named);
6035
6036       if (pcum->aapcs_cprc_slot >= 0)
6037         {
6038           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6039                                                               type);
6040           pcum->aapcs_cprc_slot = -1;
6041         }
6042
6043       /* Generic stuff.  */
6044       pcum->aapcs_arg_processed = false;
6045       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6046       pcum->aapcs_reg = NULL_RTX;
6047       pcum->aapcs_partial = 0;
6048     }
6049   else
6050     {
6051       pcum->nargs += 1;
6052       if (arm_vector_mode_supported_p (mode)
6053           && pcum->named_count > pcum->nargs
6054           && TARGET_IWMMXT_ABI)
6055         pcum->iwmmxt_nregs += 1;
6056       else
6057         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6058     }
6059 }
6060
6061 /* Variable sized types are passed by reference.  This is a GCC
6062    extension to the ARM ABI.  */
6063
6064 static bool
6065 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6066                        machine_mode mode ATTRIBUTE_UNUSED,
6067                        const_tree type, bool named ATTRIBUTE_UNUSED)
6068 {
6069   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6070 }
6071 \f
6072 /* Encode the current state of the #pragma [no_]long_calls.  */
6073 typedef enum
6074 {
6075   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6076   LONG,         /* #pragma long_calls is in effect.  */
6077   SHORT         /* #pragma no_long_calls is in effect.  */
6078 } arm_pragma_enum;
6079
6080 static arm_pragma_enum arm_pragma_long_calls = OFF;
6081
6082 void
6083 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6084 {
6085   arm_pragma_long_calls = LONG;
6086 }
6087
6088 void
6089 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6090 {
6091   arm_pragma_long_calls = SHORT;
6092 }
6093
6094 void
6095 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6096 {
6097   arm_pragma_long_calls = OFF;
6098 }
6099 \f
6100 /* Handle an attribute requiring a FUNCTION_DECL;
6101    arguments as in struct attribute_spec.handler.  */
6102 static tree
6103 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6104                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6105 {
6106   if (TREE_CODE (*node) != FUNCTION_DECL)
6107     {
6108       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6109                name);
6110       *no_add_attrs = true;
6111     }
6112
6113   return NULL_TREE;
6114 }
6115
6116 /* Handle an "interrupt" or "isr" attribute;
6117    arguments as in struct attribute_spec.handler.  */
6118 static tree
6119 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6120                           bool *no_add_attrs)
6121 {
6122   if (DECL_P (*node))
6123     {
6124       if (TREE_CODE (*node) != FUNCTION_DECL)
6125         {
6126           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6127                    name);
6128           *no_add_attrs = true;
6129         }
6130       /* FIXME: the argument if any is checked for type attributes;
6131          should it be checked for decl ones?  */
6132     }
6133   else
6134     {
6135       if (TREE_CODE (*node) == FUNCTION_TYPE
6136           || TREE_CODE (*node) == METHOD_TYPE)
6137         {
6138           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6139             {
6140               warning (OPT_Wattributes, "%qE attribute ignored",
6141                        name);
6142               *no_add_attrs = true;
6143             }
6144         }
6145       else if (TREE_CODE (*node) == POINTER_TYPE
6146                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6147                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6148                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6149         {
6150           *node = build_variant_type_copy (*node);
6151           TREE_TYPE (*node) = build_type_attribute_variant
6152             (TREE_TYPE (*node),
6153              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6154           *no_add_attrs = true;
6155         }
6156       else
6157         {
6158           /* Possibly pass this attribute on from the type to a decl.  */
6159           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6160                        | (int) ATTR_FLAG_FUNCTION_NEXT
6161                        | (int) ATTR_FLAG_ARRAY_NEXT))
6162             {
6163               *no_add_attrs = true;
6164               return tree_cons (name, args, NULL_TREE);
6165             }
6166           else
6167             {
6168               warning (OPT_Wattributes, "%qE attribute ignored",
6169                        name);
6170             }
6171         }
6172     }
6173
6174   return NULL_TREE;
6175 }
6176
6177 /* Handle a "pcs" attribute; arguments as in struct
6178    attribute_spec.handler.  */
6179 static tree
6180 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6181                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6182 {
6183   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6184     {
6185       warning (OPT_Wattributes, "%qE attribute ignored", name);
6186       *no_add_attrs = true;
6187     }
6188   return NULL_TREE;
6189 }
6190
6191 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6192 /* Handle the "notshared" attribute.  This attribute is another way of
6193    requesting hidden visibility.  ARM's compiler supports
6194    "__declspec(notshared)"; we support the same thing via an
6195    attribute.  */
6196
6197 static tree
6198 arm_handle_notshared_attribute (tree *node,
6199                                 tree name ATTRIBUTE_UNUSED,
6200                                 tree args ATTRIBUTE_UNUSED,
6201                                 int flags ATTRIBUTE_UNUSED,
6202                                 bool *no_add_attrs)
6203 {
6204   tree decl = TYPE_NAME (*node);
6205
6206   if (decl)
6207     {
6208       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6209       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6210       *no_add_attrs = false;
6211     }
6212   return NULL_TREE;
6213 }
6214 #endif
6215
6216 /* Return 0 if the attributes for two types are incompatible, 1 if they
6217    are compatible, and 2 if they are nearly compatible (which causes a
6218    warning to be generated).  */
6219 static int
6220 arm_comp_type_attributes (const_tree type1, const_tree type2)
6221 {
6222   int l1, l2, s1, s2;
6223
6224   /* Check for mismatch of non-default calling convention.  */
6225   if (TREE_CODE (type1) != FUNCTION_TYPE)
6226     return 1;
6227
6228   /* Check for mismatched call attributes.  */
6229   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6230   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6231   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6232   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6233
6234   /* Only bother to check if an attribute is defined.  */
6235   if (l1 | l2 | s1 | s2)
6236     {
6237       /* If one type has an attribute, the other must have the same attribute.  */
6238       if ((l1 != l2) || (s1 != s2))
6239         return 0;
6240
6241       /* Disallow mixed attributes.  */
6242       if ((l1 & s2) || (l2 & s1))
6243         return 0;
6244     }
6245
6246   /* Check for mismatched ISR attribute.  */
6247   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6248   if (! l1)
6249     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6250   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6251   if (! l2)
6252     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6253   if (l1 != l2)
6254     return 0;
6255
6256   return 1;
6257 }
6258
6259 /*  Assigns default attributes to newly defined type.  This is used to
6260     set short_call/long_call attributes for function types of
6261     functions defined inside corresponding #pragma scopes.  */
6262 static void
6263 arm_set_default_type_attributes (tree type)
6264 {
6265   /* Add __attribute__ ((long_call)) to all functions, when
6266      inside #pragma long_calls or __attribute__ ((short_call)),
6267      when inside #pragma no_long_calls.  */
6268   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6269     {
6270       tree type_attr_list, attr_name;
6271       type_attr_list = TYPE_ATTRIBUTES (type);
6272
6273       if (arm_pragma_long_calls == LONG)
6274         attr_name = get_identifier ("long_call");
6275       else if (arm_pragma_long_calls == SHORT)
6276         attr_name = get_identifier ("short_call");
6277       else
6278         return;
6279
6280       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6281       TYPE_ATTRIBUTES (type) = type_attr_list;
6282     }
6283 }
6284 \f
6285 /* Return true if DECL is known to be linked into section SECTION.  */
6286
6287 static bool
6288 arm_function_in_section_p (tree decl, section *section)
6289 {
6290   /* We can only be certain about functions defined in the same
6291      compilation unit.  */
6292   if (!TREE_STATIC (decl))
6293     return false;
6294
6295   /* Make sure that SYMBOL always binds to the definition in this
6296      compilation unit.  */
6297   if (!targetm.binds_local_p (decl))
6298     return false;
6299
6300   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
6301   if (!DECL_SECTION_NAME (decl))
6302     {
6303       /* Make sure that we will not create a unique section for DECL.  */
6304       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6305         return false;
6306     }
6307
6308   return function_section (decl) == section;
6309 }
6310
6311 /* Return nonzero if a 32-bit "long_call" should be generated for
6312    a call from the current function to DECL.  We generate a long_call
6313    if the function:
6314
6315         a.  has an __attribute__((long call))
6316      or b.  is within the scope of a #pragma long_calls
6317      or c.  the -mlong-calls command line switch has been specified
6318
6319    However we do not generate a long call if the function:
6320
6321         d.  has an __attribute__ ((short_call))
6322      or e.  is inside the scope of a #pragma no_long_calls
6323      or f.  is defined in the same section as the current function.  */
6324
6325 bool
6326 arm_is_long_call_p (tree decl)
6327 {
6328   tree attrs;
6329
6330   if (!decl)
6331     return TARGET_LONG_CALLS;
6332
6333   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6334   if (lookup_attribute ("short_call", attrs))
6335     return false;
6336
6337   /* For "f", be conservative, and only cater for cases in which the
6338      whole of the current function is placed in the same section.  */
6339   if (!flag_reorder_blocks_and_partition
6340       && TREE_CODE (decl) == FUNCTION_DECL
6341       && arm_function_in_section_p (decl, current_function_section ()))
6342     return false;
6343
6344   if (lookup_attribute ("long_call", attrs))
6345     return true;
6346
6347   return TARGET_LONG_CALLS;
6348 }
6349
6350 /* Return nonzero if it is ok to make a tail-call to DECL.  */
6351 static bool
6352 arm_function_ok_for_sibcall (tree decl, tree exp)
6353 {
6354   unsigned long func_type;
6355
6356   if (cfun->machine->sibcall_blocked)
6357     return false;
6358
6359   /* Never tailcall something if we are generating code for Thumb-1.  */
6360   if (TARGET_THUMB1)
6361     return false;
6362
6363   /* The PIC register is live on entry to VxWorks PLT entries, so we
6364      must make the call before restoring the PIC register.  */
6365   if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6366     return false;
6367
6368   /* If we are interworking and the function is not declared static
6369      then we can't tail-call it unless we know that it exists in this
6370      compilation unit (since it might be a Thumb routine).  */
6371   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6372       && !TREE_ASM_WRITTEN (decl))
6373     return false;
6374
6375   func_type = arm_current_func_type ();
6376   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
6377   if (IS_INTERRUPT (func_type))
6378     return false;
6379
6380   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6381     {
6382       /* Check that the return value locations are the same.  For
6383          example that we aren't returning a value from the sibling in
6384          a VFP register but then need to transfer it to a core
6385          register.  */
6386       rtx a, b;
6387
6388       a = arm_function_value (TREE_TYPE (exp), decl, false);
6389       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6390                               cfun->decl, false);
6391       if (!rtx_equal_p (a, b))
6392         return false;
6393     }
6394
6395   /* Never tailcall if function may be called with a misaligned SP.  */
6396   if (IS_STACKALIGN (func_type))
6397     return false;
6398
6399   /* The AAPCS says that, on bare-metal, calls to unresolved weak
6400      references should become a NOP.  Don't convert such calls into
6401      sibling calls.  */
6402   if (TARGET_AAPCS_BASED
6403       && arm_abi == ARM_ABI_AAPCS
6404       && decl
6405       && DECL_WEAK (decl))
6406     return false;
6407
6408   /* Everything else is ok.  */
6409   return true;
6410 }
6411
6412 \f
6413 /* Addressing mode support functions.  */
6414
6415 /* Return nonzero if X is a legitimate immediate operand when compiling
6416    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
6417 int
6418 legitimate_pic_operand_p (rtx x)
6419 {
6420   if (GET_CODE (x) == SYMBOL_REF
6421       || (GET_CODE (x) == CONST
6422           && GET_CODE (XEXP (x, 0)) == PLUS
6423           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6424     return 0;
6425
6426   return 1;
6427 }
6428
6429 /* Record that the current function needs a PIC register.  Initialize
6430    cfun->machine->pic_reg if we have not already done so.  */
6431
6432 static void
6433 require_pic_register (void)
6434 {
6435   /* A lot of the logic here is made obscure by the fact that this
6436      routine gets called as part of the rtx cost estimation process.
6437      We don't want those calls to affect any assumptions about the real
6438      function; and further, we can't call entry_of_function() until we
6439      start the real expansion process.  */
6440   if (!crtl->uses_pic_offset_table)
6441     {
6442       gcc_assert (can_create_pseudo_p ());
6443       if (arm_pic_register != INVALID_REGNUM
6444           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6445         {
6446           if (!cfun->machine->pic_reg)
6447             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6448
6449           /* Play games to avoid marking the function as needing pic
6450              if we are being called as part of the cost-estimation
6451              process.  */
6452           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6453             crtl->uses_pic_offset_table = 1;
6454         }
6455       else
6456         {
6457           rtx_insn *seq, *insn;
6458
6459           if (!cfun->machine->pic_reg)
6460             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6461
6462           /* Play games to avoid marking the function as needing pic
6463              if we are being called as part of the cost-estimation
6464              process.  */
6465           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6466             {
6467               crtl->uses_pic_offset_table = 1;
6468               start_sequence ();
6469
6470               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6471                   && arm_pic_register > LAST_LO_REGNUM)
6472                 emit_move_insn (cfun->machine->pic_reg,
6473                                 gen_rtx_REG (Pmode, arm_pic_register));
6474               else
6475                 arm_load_pic_register (0UL);
6476
6477               seq = get_insns ();
6478               end_sequence ();
6479
6480               for (insn = seq; insn; insn = NEXT_INSN (insn))
6481                 if (INSN_P (insn))
6482                   INSN_LOCATION (insn) = prologue_location;
6483
6484               /* We can be called during expansion of PHI nodes, where
6485                  we can't yet emit instructions directly in the final
6486                  insn stream.  Queue the insns on the entry edge, they will
6487                  be committed after everything else is expanded.  */
6488               insert_insn_on_edge (seq,
6489                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6490             }
6491         }
6492     }
6493 }
6494
6495 rtx
6496 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6497 {
6498   if (GET_CODE (orig) == SYMBOL_REF
6499       || GET_CODE (orig) == LABEL_REF)
6500     {
6501       rtx insn;
6502
6503       if (reg == 0)
6504         {
6505           gcc_assert (can_create_pseudo_p ());
6506           reg = gen_reg_rtx (Pmode);
6507         }
6508
6509       /* VxWorks does not impose a fixed gap between segments; the run-time
6510          gap can be different from the object-file gap.  We therefore can't
6511          use GOTOFF unless we are absolutely sure that the symbol is in the
6512          same segment as the GOT.  Unfortunately, the flexibility of linker
6513          scripts means that we can't be sure of that in general, so assume
6514          that GOTOFF is never valid on VxWorks.  */
6515       if ((GET_CODE (orig) == LABEL_REF
6516            || (GET_CODE (orig) == SYMBOL_REF &&
6517                SYMBOL_REF_LOCAL_P (orig)))
6518           && NEED_GOT_RELOC
6519           && arm_pic_data_is_text_relative)
6520         insn = arm_pic_static_addr (orig, reg);
6521       else
6522         {
6523           rtx pat;
6524           rtx mem;
6525
6526           /* If this function doesn't have a pic register, create one now.  */
6527           require_pic_register ();
6528
6529           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6530
6531           /* Make the MEM as close to a constant as possible.  */
6532           mem = SET_SRC (pat);
6533           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6534           MEM_READONLY_P (mem) = 1;
6535           MEM_NOTRAP_P (mem) = 1;
6536
6537           insn = emit_insn (pat);
6538         }
6539
6540       /* Put a REG_EQUAL note on this insn, so that it can be optimized
6541          by loop.  */
6542       set_unique_reg_note (insn, REG_EQUAL, orig);
6543
6544       return reg;
6545     }
6546   else if (GET_CODE (orig) == CONST)
6547     {
6548       rtx base, offset;
6549
6550       if (GET_CODE (XEXP (orig, 0)) == PLUS
6551           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6552         return orig;
6553
6554       /* Handle the case where we have: const (UNSPEC_TLS).  */
6555       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6556           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6557         return orig;
6558
6559       /* Handle the case where we have:
6560          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
6561          CONST_INT.  */
6562       if (GET_CODE (XEXP (orig, 0)) == PLUS
6563           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6564           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6565         {
6566           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6567           return orig;
6568         }
6569
6570       if (reg == 0)
6571         {
6572           gcc_assert (can_create_pseudo_p ());
6573           reg = gen_reg_rtx (Pmode);
6574         }
6575
6576       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6577
6578       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6579       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6580                                        base == reg ? 0 : reg);
6581
6582       if (CONST_INT_P (offset))
6583         {
6584           /* The base register doesn't really matter, we only want to
6585              test the index for the appropriate mode.  */
6586           if (!arm_legitimate_index_p (mode, offset, SET, 0))
6587             {
6588               gcc_assert (can_create_pseudo_p ());
6589               offset = force_reg (Pmode, offset);
6590             }
6591
6592           if (CONST_INT_P (offset))
6593             return plus_constant (Pmode, base, INTVAL (offset));
6594         }
6595
6596       if (GET_MODE_SIZE (mode) > 4
6597           && (GET_MODE_CLASS (mode) == MODE_INT
6598               || TARGET_SOFT_FLOAT))
6599         {
6600           emit_insn (gen_addsi3 (reg, base, offset));
6601           return reg;
6602         }
6603
6604       return gen_rtx_PLUS (Pmode, base, offset);
6605     }
6606
6607   return orig;
6608 }
6609
6610
6611 /* Find a spare register to use during the prolog of a function.  */
6612
6613 static int
6614 thumb_find_work_register (unsigned long pushed_regs_mask)
6615 {
6616   int reg;
6617
6618   /* Check the argument registers first as these are call-used.  The
6619      register allocation order means that sometimes r3 might be used
6620      but earlier argument registers might not, so check them all.  */
6621   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6622     if (!df_regs_ever_live_p (reg))
6623       return reg;
6624
6625   /* Before going on to check the call-saved registers we can try a couple
6626      more ways of deducing that r3 is available.  The first is when we are
6627      pushing anonymous arguments onto the stack and we have less than 4
6628      registers worth of fixed arguments(*).  In this case r3 will be part of
6629      the variable argument list and so we can be sure that it will be
6630      pushed right at the start of the function.  Hence it will be available
6631      for the rest of the prologue.
6632      (*): ie crtl->args.pretend_args_size is greater than 0.  */
6633   if (cfun->machine->uses_anonymous_args
6634       && crtl->args.pretend_args_size > 0)
6635     return LAST_ARG_REGNUM;
6636
6637   /* The other case is when we have fixed arguments but less than 4 registers
6638      worth.  In this case r3 might be used in the body of the function, but
6639      it is not being used to convey an argument into the function.  In theory
6640      we could just check crtl->args.size to see how many bytes are
6641      being passed in argument registers, but it seems that it is unreliable.
6642      Sometimes it will have the value 0 when in fact arguments are being
6643      passed.  (See testcase execute/20021111-1.c for an example).  So we also
6644      check the args_info.nregs field as well.  The problem with this field is
6645      that it makes no allowances for arguments that are passed to the
6646      function but which are not used.  Hence we could miss an opportunity
6647      when a function has an unused argument in r3.  But it is better to be
6648      safe than to be sorry.  */
6649   if (! cfun->machine->uses_anonymous_args
6650       && crtl->args.size >= 0
6651       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6652       && (TARGET_AAPCS_BASED
6653           ? crtl->args.info.aapcs_ncrn < 4
6654           : crtl->args.info.nregs < 4))
6655     return LAST_ARG_REGNUM;
6656
6657   /* Otherwise look for a call-saved register that is going to be pushed.  */
6658   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6659     if (pushed_regs_mask & (1 << reg))
6660       return reg;
6661
6662   if (TARGET_THUMB2)
6663     {
6664       /* Thumb-2 can use high regs.  */
6665       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6666         if (pushed_regs_mask & (1 << reg))
6667           return reg;
6668     }
6669   /* Something went wrong - thumb_compute_save_reg_mask()
6670      should have arranged for a suitable register to be pushed.  */
6671   gcc_unreachable ();
6672 }
6673
6674 static GTY(()) int pic_labelno;
6675
6676 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
6677    low register.  */
6678
6679 void
6680 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6681 {
6682   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6683
6684   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6685     return;
6686
6687   gcc_assert (flag_pic);
6688
6689   pic_reg = cfun->machine->pic_reg;
6690   if (TARGET_VXWORKS_RTP)
6691     {
6692       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6693       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6694       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6695
6696       emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6697
6698       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6699       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6700     }
6701   else
6702     {
6703       /* We use an UNSPEC rather than a LABEL_REF because this label
6704          never appears in the code stream.  */
6705
6706       labelno = GEN_INT (pic_labelno++);
6707       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6708       l1 = gen_rtx_CONST (VOIDmode, l1);
6709
6710       /* On the ARM the PC register contains 'dot + 8' at the time of the
6711          addition, on the Thumb it is 'dot + 4'.  */
6712       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6713       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6714                                 UNSPEC_GOTSYM_OFF);
6715       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6716
6717       if (TARGET_32BIT)
6718         {
6719           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6720         }
6721       else /* TARGET_THUMB1 */
6722         {
6723           if (arm_pic_register != INVALID_REGNUM
6724               && REGNO (pic_reg) > LAST_LO_REGNUM)
6725             {
6726               /* We will have pushed the pic register, so we should always be
6727                  able to find a work register.  */
6728               pic_tmp = gen_rtx_REG (SImode,
6729                                      thumb_find_work_register (saved_regs));
6730               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6731               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6732               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6733             }
6734           else if (arm_pic_register != INVALID_REGNUM
6735                    && arm_pic_register > LAST_LO_REGNUM
6736                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
6737             {
6738               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6739               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6740               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6741             }
6742           else
6743             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6744         }
6745     }
6746
6747   /* Need to emit this whether or not we obey regdecls,
6748      since setjmp/longjmp can cause life info to screw up.  */
6749   emit_use (pic_reg);
6750 }
6751
6752 /* Generate code to load the address of a static var when flag_pic is set.  */
6753 static rtx
6754 arm_pic_static_addr (rtx orig, rtx reg)
6755 {
6756   rtx l1, labelno, offset_rtx, insn;
6757
6758   gcc_assert (flag_pic);
6759
6760   /* We use an UNSPEC rather than a LABEL_REF because this label
6761      never appears in the code stream.  */
6762   labelno = GEN_INT (pic_labelno++);
6763   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6764   l1 = gen_rtx_CONST (VOIDmode, l1);
6765
6766   /* On the ARM the PC register contains 'dot + 8' at the time of the
6767      addition, on the Thumb it is 'dot + 4'.  */
6768   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6769   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6770                                UNSPEC_SYMBOL_OFFSET);
6771   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6772
6773   insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6774   return insn;
6775 }
6776
6777 /* Return nonzero if X is valid as an ARM state addressing register.  */
6778 static int
6779 arm_address_register_rtx_p (rtx x, int strict_p)
6780 {
6781   int regno;
6782
6783   if (!REG_P (x))
6784     return 0;
6785
6786   regno = REGNO (x);
6787
6788   if (strict_p)
6789     return ARM_REGNO_OK_FOR_BASE_P (regno);
6790
6791   return (regno <= LAST_ARM_REGNUM
6792           || regno >= FIRST_PSEUDO_REGISTER
6793           || regno == FRAME_POINTER_REGNUM
6794           || regno == ARG_POINTER_REGNUM);
6795 }
6796
6797 /* Return TRUE if this rtx is the difference of a symbol and a label,
6798    and will reduce to a PC-relative relocation in the object file.
6799    Expressions like this can be left alone when generating PIC, rather
6800    than forced through the GOT.  */
6801 static int
6802 pcrel_constant_p (rtx x)
6803 {
6804   if (GET_CODE (x) == MINUS)
6805     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6806
6807   return FALSE;
6808 }
6809
6810 /* Return true if X will surely end up in an index register after next
6811    splitting pass.  */
6812 static bool
6813 will_be_in_index_register (const_rtx x)
6814 {
6815   /* arm.md: calculate_pic_address will split this into a register.  */
6816   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6817 }
6818
6819 /* Return nonzero if X is a valid ARM state address operand.  */
6820 int
6821 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6822                                 int strict_p)
6823 {
6824   bool use_ldrd;
6825   enum rtx_code code = GET_CODE (x);
6826
6827   if (arm_address_register_rtx_p (x, strict_p))
6828     return 1;
6829
6830   use_ldrd = (TARGET_LDRD
6831               && (mode == DImode
6832                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6833
6834   if (code == POST_INC || code == PRE_DEC
6835       || ((code == PRE_INC || code == POST_DEC)
6836           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6837     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6838
6839   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6840            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6841            && GET_CODE (XEXP (x, 1)) == PLUS
6842            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6843     {
6844       rtx addend = XEXP (XEXP (x, 1), 1);
6845
6846       /* Don't allow ldrd post increment by register because it's hard
6847          to fixup invalid register choices.  */
6848       if (use_ldrd
6849           && GET_CODE (x) == POST_MODIFY
6850           && REG_P (addend))
6851         return 0;
6852
6853       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6854               && arm_legitimate_index_p (mode, addend, outer, strict_p));
6855     }
6856
6857   /* After reload constants split into minipools will have addresses
6858      from a LABEL_REF.  */
6859   else if (reload_completed
6860            && (code == LABEL_REF
6861                || (code == CONST
6862                    && GET_CODE (XEXP (x, 0)) == PLUS
6863                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6864                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6865     return 1;
6866
6867   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6868     return 0;
6869
6870   else if (code == PLUS)
6871     {
6872       rtx xop0 = XEXP (x, 0);
6873       rtx xop1 = XEXP (x, 1);
6874
6875       return ((arm_address_register_rtx_p (xop0, strict_p)
6876                && ((CONST_INT_P (xop1)
6877                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6878                    || (!strict_p && will_be_in_index_register (xop1))))
6879               || (arm_address_register_rtx_p (xop1, strict_p)
6880                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6881     }
6882
6883 #if 0
6884   /* Reload currently can't handle MINUS, so disable this for now */
6885   else if (GET_CODE (x) == MINUS)
6886     {
6887       rtx xop0 = XEXP (x, 0);
6888       rtx xop1 = XEXP (x, 1);
6889
6890       return (arm_address_register_rtx_p (xop0, strict_p)
6891               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6892     }
6893 #endif
6894
6895   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6896            && code == SYMBOL_REF
6897            && CONSTANT_POOL_ADDRESS_P (x)
6898            && ! (flag_pic
6899                  && symbol_mentioned_p (get_pool_constant (x))
6900                  && ! pcrel_constant_p (get_pool_constant (x))))
6901     return 1;
6902
6903   return 0;
6904 }
6905
6906 /* Return nonzero if X is a valid Thumb-2 address operand.  */
6907 static int
6908 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6909 {
6910   bool use_ldrd;
6911   enum rtx_code code = GET_CODE (x);
6912
6913   if (arm_address_register_rtx_p (x, strict_p))
6914     return 1;
6915
6916   use_ldrd = (TARGET_LDRD
6917               && (mode == DImode
6918                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6919
6920   if (code == POST_INC || code == PRE_DEC
6921       || ((code == PRE_INC || code == POST_DEC)
6922           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6923     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6924
6925   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6926            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6927            && GET_CODE (XEXP (x, 1)) == PLUS
6928            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6929     {
6930       /* Thumb-2 only has autoincrement by constant.  */
6931       rtx addend = XEXP (XEXP (x, 1), 1);
6932       HOST_WIDE_INT offset;
6933
6934       if (!CONST_INT_P (addend))
6935         return 0;
6936
6937       offset = INTVAL(addend);
6938       if (GET_MODE_SIZE (mode) <= 4)
6939         return (offset > -256 && offset < 256);
6940
6941       return (use_ldrd && offset > -1024 && offset < 1024
6942               && (offset & 3) == 0);
6943     }
6944
6945   /* After reload constants split into minipools will have addresses
6946      from a LABEL_REF.  */
6947   else if (reload_completed
6948            && (code == LABEL_REF
6949                || (code == CONST
6950                    && GET_CODE (XEXP (x, 0)) == PLUS
6951                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6952                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6953     return 1;
6954
6955   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6956     return 0;
6957
6958   else if (code == PLUS)
6959     {
6960       rtx xop0 = XEXP (x, 0);
6961       rtx xop1 = XEXP (x, 1);
6962
6963       return ((arm_address_register_rtx_p (xop0, strict_p)
6964                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6965                    || (!strict_p && will_be_in_index_register (xop1))))
6966               || (arm_address_register_rtx_p (xop1, strict_p)
6967                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6968     }
6969
6970   /* Normally we can assign constant values to target registers without
6971      the help of constant pool.  But there are cases we have to use constant
6972      pool like:
6973      1) assign a label to register.
6974      2) sign-extend a 8bit value to 32bit and then assign to register.
6975
6976      Constant pool access in format:
6977      (set (reg r0) (mem (symbol_ref (".LC0"))))
6978      will cause the use of literal pool (later in function arm_reorg).
6979      So here we mark such format as an invalid format, then the compiler
6980      will adjust it into:
6981      (set (reg r0) (symbol_ref (".LC0")))
6982      (set (reg r0) (mem (reg r0))).
6983      No extra register is required, and (mem (reg r0)) won't cause the use
6984      of literal pools.  */
6985   else if (arm_disable_literal_pool && code == SYMBOL_REF
6986            && CONSTANT_POOL_ADDRESS_P (x))
6987     return 0;
6988
6989   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6990            && code == SYMBOL_REF
6991            && CONSTANT_POOL_ADDRESS_P (x)
6992            && ! (flag_pic
6993                  && symbol_mentioned_p (get_pool_constant (x))
6994                  && ! pcrel_constant_p (get_pool_constant (x))))
6995     return 1;
6996
6997   return 0;
6998 }
6999
7000 /* Return nonzero if INDEX is valid for an address index operand in
7001    ARM state.  */
7002 static int
7003 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7004                         int strict_p)
7005 {
7006   HOST_WIDE_INT range;
7007   enum rtx_code code = GET_CODE (index);
7008
7009   /* Standard coprocessor addressing modes.  */
7010   if (TARGET_HARD_FLOAT
7011       && TARGET_VFP
7012       && (mode == SFmode || mode == DFmode))
7013     return (code == CONST_INT && INTVAL (index) < 1024
7014             && INTVAL (index) > -1024
7015             && (INTVAL (index) & 3) == 0);
7016
7017   /* For quad modes, we restrict the constant offset to be slightly less
7018      than what the instruction format permits.  We do this because for
7019      quad mode moves, we will actually decompose them into two separate
7020      double-mode reads or writes.  INDEX must therefore be a valid
7021      (double-mode) offset and so should INDEX+8.  */
7022   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7023     return (code == CONST_INT
7024             && INTVAL (index) < 1016
7025             && INTVAL (index) > -1024
7026             && (INTVAL (index) & 3) == 0);
7027
7028   /* We have no such constraint on double mode offsets, so we permit the
7029      full range of the instruction format.  */
7030   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7031     return (code == CONST_INT
7032             && INTVAL (index) < 1024
7033             && INTVAL (index) > -1024
7034             && (INTVAL (index) & 3) == 0);
7035
7036   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7037     return (code == CONST_INT
7038             && INTVAL (index) < 1024
7039             && INTVAL (index) > -1024
7040             && (INTVAL (index) & 3) == 0);
7041
7042   if (arm_address_register_rtx_p (index, strict_p)
7043       && (GET_MODE_SIZE (mode) <= 4))
7044     return 1;
7045
7046   if (mode == DImode || mode == DFmode)
7047     {
7048       if (code == CONST_INT)
7049         {
7050           HOST_WIDE_INT val = INTVAL (index);
7051
7052           if (TARGET_LDRD)
7053             return val > -256 && val < 256;
7054           else
7055             return val > -4096 && val < 4092;
7056         }
7057
7058       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7059     }
7060
7061   if (GET_MODE_SIZE (mode) <= 4
7062       && ! (arm_arch4
7063             && (mode == HImode
7064                 || mode == HFmode
7065                 || (mode == QImode && outer == SIGN_EXTEND))))
7066     {
7067       if (code == MULT)
7068         {
7069           rtx xiop0 = XEXP (index, 0);
7070           rtx xiop1 = XEXP (index, 1);
7071
7072           return ((arm_address_register_rtx_p (xiop0, strict_p)
7073                    && power_of_two_operand (xiop1, SImode))
7074                   || (arm_address_register_rtx_p (xiop1, strict_p)
7075                       && power_of_two_operand (xiop0, SImode)));
7076         }
7077       else if (code == LSHIFTRT || code == ASHIFTRT
7078                || code == ASHIFT || code == ROTATERT)
7079         {
7080           rtx op = XEXP (index, 1);
7081
7082           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7083                   && CONST_INT_P (op)
7084                   && INTVAL (op) > 0
7085                   && INTVAL (op) <= 31);
7086         }
7087     }
7088
7089   /* For ARM v4 we may be doing a sign-extend operation during the
7090      load.  */
7091   if (arm_arch4)
7092     {
7093       if (mode == HImode
7094           || mode == HFmode
7095           || (outer == SIGN_EXTEND && mode == QImode))
7096         range = 256;
7097       else
7098         range = 4096;
7099     }
7100   else
7101     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7102
7103   return (code == CONST_INT
7104           && INTVAL (index) < range
7105           && INTVAL (index) > -range);
7106 }
7107
7108 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7109    index operand.  i.e. 1, 2, 4 or 8.  */
7110 static bool
7111 thumb2_index_mul_operand (rtx op)
7112 {
7113   HOST_WIDE_INT val;
7114
7115   if (!CONST_INT_P (op))
7116     return false;
7117
7118   val = INTVAL(op);
7119   return (val == 1 || val == 2 || val == 4 || val == 8);
7120 }
7121
7122 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
7123 static int
7124 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7125 {
7126   enum rtx_code code = GET_CODE (index);
7127
7128   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
7129   /* Standard coprocessor addressing modes.  */
7130   if (TARGET_HARD_FLOAT
7131       && TARGET_VFP
7132       && (mode == SFmode || mode == DFmode))
7133     return (code == CONST_INT && INTVAL (index) < 1024
7134             /* Thumb-2 allows only > -256 index range for it's core register
7135                load/stores. Since we allow SF/DF in core registers, we have
7136                to use the intersection between -256~4096 (core) and -1024~1024
7137                (coprocessor).  */
7138             && INTVAL (index) > -256
7139             && (INTVAL (index) & 3) == 0);
7140
7141   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7142     {
7143       /* For DImode assume values will usually live in core regs
7144          and only allow LDRD addressing modes.  */
7145       if (!TARGET_LDRD || mode != DImode)
7146         return (code == CONST_INT
7147                 && INTVAL (index) < 1024
7148                 && INTVAL (index) > -1024
7149                 && (INTVAL (index) & 3) == 0);
7150     }
7151
7152   /* For quad modes, we restrict the constant offset to be slightly less
7153      than what the instruction format permits.  We do this because for
7154      quad mode moves, we will actually decompose them into two separate
7155      double-mode reads or writes.  INDEX must therefore be a valid
7156      (double-mode) offset and so should INDEX+8.  */
7157   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7158     return (code == CONST_INT
7159             && INTVAL (index) < 1016
7160             && INTVAL (index) > -1024
7161             && (INTVAL (index) & 3) == 0);
7162
7163   /* We have no such constraint on double mode offsets, so we permit the
7164      full range of the instruction format.  */
7165   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7166     return (code == CONST_INT
7167             && INTVAL (index) < 1024
7168             && INTVAL (index) > -1024
7169             && (INTVAL (index) & 3) == 0);
7170
7171   if (arm_address_register_rtx_p (index, strict_p)
7172       && (GET_MODE_SIZE (mode) <= 4))
7173     return 1;
7174
7175   if (mode == DImode || mode == DFmode)
7176     {
7177       if (code == CONST_INT)
7178         {
7179           HOST_WIDE_INT val = INTVAL (index);
7180           /* ??? Can we assume ldrd for thumb2?  */
7181           /* Thumb-2 ldrd only has reg+const addressing modes.  */
7182           /* ldrd supports offsets of +-1020.
7183              However the ldr fallback does not.  */
7184           return val > -256 && val < 256 && (val & 3) == 0;
7185         }
7186       else
7187         return 0;
7188     }
7189
7190   if (code == MULT)
7191     {
7192       rtx xiop0 = XEXP (index, 0);
7193       rtx xiop1 = XEXP (index, 1);
7194
7195       return ((arm_address_register_rtx_p (xiop0, strict_p)
7196                && thumb2_index_mul_operand (xiop1))
7197               || (arm_address_register_rtx_p (xiop1, strict_p)
7198                   && thumb2_index_mul_operand (xiop0)));
7199     }
7200   else if (code == ASHIFT)
7201     {
7202       rtx op = XEXP (index, 1);
7203
7204       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7205               && CONST_INT_P (op)
7206               && INTVAL (op) > 0
7207               && INTVAL (op) <= 3);
7208     }
7209
7210   return (code == CONST_INT
7211           && INTVAL (index) < 4096
7212           && INTVAL (index) > -256);
7213 }
7214
7215 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
7216 static int
7217 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7218 {
7219   int regno;
7220
7221   if (!REG_P (x))
7222     return 0;
7223
7224   regno = REGNO (x);
7225
7226   if (strict_p)
7227     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7228
7229   return (regno <= LAST_LO_REGNUM
7230           || regno > LAST_VIRTUAL_REGISTER
7231           || regno == FRAME_POINTER_REGNUM
7232           || (GET_MODE_SIZE (mode) >= 4
7233               && (regno == STACK_POINTER_REGNUM
7234                   || regno >= FIRST_PSEUDO_REGISTER
7235                   || x == hard_frame_pointer_rtx
7236                   || x == arg_pointer_rtx)));
7237 }
7238
7239 /* Return nonzero if x is a legitimate index register.  This is the case
7240    for any base register that can access a QImode object.  */
7241 inline static int
7242 thumb1_index_register_rtx_p (rtx x, int strict_p)
7243 {
7244   return thumb1_base_register_rtx_p (x, QImode, strict_p);
7245 }
7246
7247 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7248
7249    The AP may be eliminated to either the SP or the FP, so we use the
7250    least common denominator, e.g. SImode, and offsets from 0 to 64.
7251
7252    ??? Verify whether the above is the right approach.
7253
7254    ??? Also, the FP may be eliminated to the SP, so perhaps that
7255    needs special handling also.
7256
7257    ??? Look at how the mips16 port solves this problem.  It probably uses
7258    better ways to solve some of these problems.
7259
7260    Although it is not incorrect, we don't accept QImode and HImode
7261    addresses based on the frame pointer or arg pointer until the
7262    reload pass starts.  This is so that eliminating such addresses
7263    into stack based ones won't produce impossible code.  */
7264 int
7265 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7266 {
7267   /* ??? Not clear if this is right.  Experiment.  */
7268   if (GET_MODE_SIZE (mode) < 4
7269       && !(reload_in_progress || reload_completed)
7270       && (reg_mentioned_p (frame_pointer_rtx, x)
7271           || reg_mentioned_p (arg_pointer_rtx, x)
7272           || reg_mentioned_p (virtual_incoming_args_rtx, x)
7273           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7274           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7275           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7276     return 0;
7277
7278   /* Accept any base register.  SP only in SImode or larger.  */
7279   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7280     return 1;
7281
7282   /* This is PC relative data before arm_reorg runs.  */
7283   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7284            && GET_CODE (x) == SYMBOL_REF
7285            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7286     return 1;
7287
7288   /* This is PC relative data after arm_reorg runs.  */
7289   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7290            && reload_completed
7291            && (GET_CODE (x) == LABEL_REF
7292                || (GET_CODE (x) == CONST
7293                    && GET_CODE (XEXP (x, 0)) == PLUS
7294                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7295                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7296     return 1;
7297
7298   /* Post-inc indexing only supported for SImode and larger.  */
7299   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7300            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7301     return 1;
7302
7303   else if (GET_CODE (x) == PLUS)
7304     {
7305       /* REG+REG address can be any two index registers.  */
7306       /* We disallow FRAME+REG addressing since we know that FRAME
7307          will be replaced with STACK, and SP relative addressing only
7308          permits SP+OFFSET.  */
7309       if (GET_MODE_SIZE (mode) <= 4
7310           && XEXP (x, 0) != frame_pointer_rtx
7311           && XEXP (x, 1) != frame_pointer_rtx
7312           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7313           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7314               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7315         return 1;
7316
7317       /* REG+const has 5-7 bit offset for non-SP registers.  */
7318       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7319                 || XEXP (x, 0) == arg_pointer_rtx)
7320                && CONST_INT_P (XEXP (x, 1))
7321                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7322         return 1;
7323
7324       /* REG+const has 10-bit offset for SP, but only SImode and
7325          larger is supported.  */
7326       /* ??? Should probably check for DI/DFmode overflow here
7327          just like GO_IF_LEGITIMATE_OFFSET does.  */
7328       else if (REG_P (XEXP (x, 0))
7329                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7330                && GET_MODE_SIZE (mode) >= 4
7331                && CONST_INT_P (XEXP (x, 1))
7332                && INTVAL (XEXP (x, 1)) >= 0
7333                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7334                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7335         return 1;
7336
7337       else if (REG_P (XEXP (x, 0))
7338                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7339                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7340                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7341                        && REGNO (XEXP (x, 0))
7342                           <= LAST_VIRTUAL_POINTER_REGISTER))
7343                && GET_MODE_SIZE (mode) >= 4
7344                && CONST_INT_P (XEXP (x, 1))
7345                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7346         return 1;
7347     }
7348
7349   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7350            && GET_MODE_SIZE (mode) == 4
7351            && GET_CODE (x) == SYMBOL_REF
7352            && CONSTANT_POOL_ADDRESS_P (x)
7353            && ! (flag_pic
7354                  && symbol_mentioned_p (get_pool_constant (x))
7355                  && ! pcrel_constant_p (get_pool_constant (x))))
7356     return 1;
7357
7358   return 0;
7359 }
7360
7361 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7362    instruction of mode MODE.  */
7363 int
7364 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7365 {
7366   switch (GET_MODE_SIZE (mode))
7367     {
7368     case 1:
7369       return val >= 0 && val < 32;
7370
7371     case 2:
7372       return val >= 0 && val < 64 && (val & 1) == 0;
7373
7374     default:
7375       return (val >= 0
7376               && (val + GET_MODE_SIZE (mode)) <= 128
7377               && (val & 3) == 0);
7378     }
7379 }
7380
7381 bool
7382 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7383 {
7384   if (TARGET_ARM)
7385     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7386   else if (TARGET_THUMB2)
7387     return thumb2_legitimate_address_p (mode, x, strict_p);
7388   else /* if (TARGET_THUMB1) */
7389     return thumb1_legitimate_address_p (mode, x, strict_p);
7390 }
7391
7392 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7393
7394    Given an rtx X being reloaded into a reg required to be
7395    in class CLASS, return the class of reg to actually use.
7396    In general this is just CLASS, but for the Thumb core registers and
7397    immediate constants we prefer a LO_REGS class or a subset.  */
7398
7399 static reg_class_t
7400 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7401 {
7402   if (TARGET_32BIT)
7403     return rclass;
7404   else
7405     {
7406       if (rclass == GENERAL_REGS)
7407         return LO_REGS;
7408       else
7409         return rclass;
7410     }
7411 }
7412
7413 /* Build the SYMBOL_REF for __tls_get_addr.  */
7414
7415 static GTY(()) rtx tls_get_addr_libfunc;
7416
7417 static rtx
7418 get_tls_get_addr (void)
7419 {
7420   if (!tls_get_addr_libfunc)
7421     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7422   return tls_get_addr_libfunc;
7423 }
7424
7425 rtx
7426 arm_load_tp (rtx target)
7427 {
7428   if (!target)
7429     target = gen_reg_rtx (SImode);
7430
7431   if (TARGET_HARD_TP)
7432     {
7433       /* Can return in any reg.  */
7434       emit_insn (gen_load_tp_hard (target));
7435     }
7436   else
7437     {
7438       /* Always returned in r0.  Immediately copy the result into a pseudo,
7439          otherwise other uses of r0 (e.g. setting up function arguments) may
7440          clobber the value.  */
7441
7442       rtx tmp;
7443
7444       emit_insn (gen_load_tp_soft ());
7445
7446       tmp = gen_rtx_REG (SImode, 0);
7447       emit_move_insn (target, tmp);
7448     }
7449   return target;
7450 }
7451
7452 static rtx
7453 load_tls_operand (rtx x, rtx reg)
7454 {
7455   rtx tmp;
7456
7457   if (reg == NULL_RTX)
7458     reg = gen_reg_rtx (SImode);
7459
7460   tmp = gen_rtx_CONST (SImode, x);
7461
7462   emit_move_insn (reg, tmp);
7463
7464   return reg;
7465 }
7466
7467 static rtx
7468 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7469 {
7470   rtx insns, label, labelno, sum;
7471
7472   gcc_assert (reloc != TLS_DESCSEQ);
7473   start_sequence ();
7474
7475   labelno = GEN_INT (pic_labelno++);
7476   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7477   label = gen_rtx_CONST (VOIDmode, label);
7478
7479   sum = gen_rtx_UNSPEC (Pmode,
7480                         gen_rtvec (4, x, GEN_INT (reloc), label,
7481                                    GEN_INT (TARGET_ARM ? 8 : 4)),
7482                         UNSPEC_TLS);
7483   reg = load_tls_operand (sum, reg);
7484
7485   if (TARGET_ARM)
7486     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7487   else
7488     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7489
7490   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7491                                      LCT_PURE, /* LCT_CONST?  */
7492                                      Pmode, 1, reg, Pmode);
7493
7494   insns = get_insns ();
7495   end_sequence ();
7496
7497   return insns;
7498 }
7499
7500 static rtx
7501 arm_tls_descseq_addr (rtx x, rtx reg)
7502 {
7503   rtx labelno = GEN_INT (pic_labelno++);
7504   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7505   rtx sum = gen_rtx_UNSPEC (Pmode,
7506                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7507                                        gen_rtx_CONST (VOIDmode, label),
7508                                        GEN_INT (!TARGET_ARM)),
7509                             UNSPEC_TLS);
7510   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7511
7512   emit_insn (gen_tlscall (x, labelno));
7513   if (!reg)
7514     reg = gen_reg_rtx (SImode);
7515   else
7516     gcc_assert (REGNO (reg) != 0);
7517
7518   emit_move_insn (reg, reg0);
7519
7520   return reg;
7521 }
7522
7523 rtx
7524 legitimize_tls_address (rtx x, rtx reg)
7525 {
7526   rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7527   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7528
7529   switch (model)
7530     {
7531     case TLS_MODEL_GLOBAL_DYNAMIC:
7532       if (TARGET_GNU2_TLS)
7533         {
7534           reg = arm_tls_descseq_addr (x, reg);
7535
7536           tp = arm_load_tp (NULL_RTX);
7537
7538           dest = gen_rtx_PLUS (Pmode, tp, reg);
7539         }
7540       else
7541         {
7542           /* Original scheme */
7543           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7544           dest = gen_reg_rtx (Pmode);
7545           emit_libcall_block (insns, dest, ret, x);
7546         }
7547       return dest;
7548
7549     case TLS_MODEL_LOCAL_DYNAMIC:
7550       if (TARGET_GNU2_TLS)
7551         {
7552           reg = arm_tls_descseq_addr (x, reg);
7553
7554           tp = arm_load_tp (NULL_RTX);
7555
7556           dest = gen_rtx_PLUS (Pmode, tp, reg);
7557         }
7558       else
7559         {
7560           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7561
7562           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7563              share the LDM result with other LD model accesses.  */
7564           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7565                                 UNSPEC_TLS);
7566           dest = gen_reg_rtx (Pmode);
7567           emit_libcall_block (insns, dest, ret, eqv);
7568
7569           /* Load the addend.  */
7570           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7571                                                      GEN_INT (TLS_LDO32)),
7572                                    UNSPEC_TLS);
7573           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7574           dest = gen_rtx_PLUS (Pmode, dest, addend);
7575         }
7576       return dest;
7577
7578     case TLS_MODEL_INITIAL_EXEC:
7579       labelno = GEN_INT (pic_labelno++);
7580       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7581       label = gen_rtx_CONST (VOIDmode, label);
7582       sum = gen_rtx_UNSPEC (Pmode,
7583                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7584                                        GEN_INT (TARGET_ARM ? 8 : 4)),
7585                             UNSPEC_TLS);
7586       reg = load_tls_operand (sum, reg);
7587
7588       if (TARGET_ARM)
7589         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7590       else if (TARGET_THUMB2)
7591         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7592       else
7593         {
7594           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7595           emit_move_insn (reg, gen_const_mem (SImode, reg));
7596         }
7597
7598       tp = arm_load_tp (NULL_RTX);
7599
7600       return gen_rtx_PLUS (Pmode, tp, reg);
7601
7602     case TLS_MODEL_LOCAL_EXEC:
7603       tp = arm_load_tp (NULL_RTX);
7604
7605       reg = gen_rtx_UNSPEC (Pmode,
7606                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7607                             UNSPEC_TLS);
7608       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7609
7610       return gen_rtx_PLUS (Pmode, tp, reg);
7611
7612     default:
7613       abort ();
7614     }
7615 }
7616
7617 /* Try machine-dependent ways of modifying an illegitimate address
7618    to be legitimate.  If we find one, return the new, valid address.  */
7619 rtx
7620 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7621 {
7622   if (arm_tls_referenced_p (x))
7623     {
7624       rtx addend = NULL;
7625
7626       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7627         {
7628           addend = XEXP (XEXP (x, 0), 1);
7629           x = XEXP (XEXP (x, 0), 0);
7630         }
7631
7632       if (GET_CODE (x) != SYMBOL_REF)
7633         return x;
7634
7635       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7636
7637       x = legitimize_tls_address (x, NULL_RTX);
7638
7639       if (addend)
7640         {
7641           x = gen_rtx_PLUS (SImode, x, addend);
7642           orig_x = x;
7643         }
7644       else
7645         return x;
7646     }
7647
7648   if (!TARGET_ARM)
7649     {
7650       /* TODO: legitimize_address for Thumb2.  */
7651       if (TARGET_THUMB2)
7652         return x;
7653       return thumb_legitimize_address (x, orig_x, mode);
7654     }
7655
7656   if (GET_CODE (x) == PLUS)
7657     {
7658       rtx xop0 = XEXP (x, 0);
7659       rtx xop1 = XEXP (x, 1);
7660
7661       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7662         xop0 = force_reg (SImode, xop0);
7663
7664       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7665           && !symbol_mentioned_p (xop1))
7666         xop1 = force_reg (SImode, xop1);
7667
7668       if (ARM_BASE_REGISTER_RTX_P (xop0)
7669           && CONST_INT_P (xop1))
7670         {
7671           HOST_WIDE_INT n, low_n;
7672           rtx base_reg, val;
7673           n = INTVAL (xop1);
7674
7675           /* VFP addressing modes actually allow greater offsets, but for
7676              now we just stick with the lowest common denominator.  */
7677           if (mode == DImode
7678               || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7679             {
7680               low_n = n & 0x0f;
7681               n &= ~0x0f;
7682               if (low_n > 4)
7683                 {
7684                   n += 16;
7685                   low_n -= 16;
7686                 }
7687             }
7688           else
7689             {
7690               low_n = ((mode) == TImode ? 0
7691                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7692               n -= low_n;
7693             }
7694
7695           base_reg = gen_reg_rtx (SImode);
7696           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7697           emit_move_insn (base_reg, val);
7698           x = plus_constant (Pmode, base_reg, low_n);
7699         }
7700       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7701         x = gen_rtx_PLUS (SImode, xop0, xop1);
7702     }
7703
7704   /* XXX We don't allow MINUS any more -- see comment in
7705      arm_legitimate_address_outer_p ().  */
7706   else if (GET_CODE (x) == MINUS)
7707     {
7708       rtx xop0 = XEXP (x, 0);
7709       rtx xop1 = XEXP (x, 1);
7710
7711       if (CONSTANT_P (xop0))
7712         xop0 = force_reg (SImode, xop0);
7713
7714       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7715         xop1 = force_reg (SImode, xop1);
7716
7717       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7718         x = gen_rtx_MINUS (SImode, xop0, xop1);
7719     }
7720
7721   /* Make sure to take full advantage of the pre-indexed addressing mode
7722      with absolute addresses which often allows for the base register to
7723      be factorized for multiple adjacent memory references, and it might
7724      even allows for the mini pool to be avoided entirely. */
7725   else if (CONST_INT_P (x) && optimize > 0)
7726     {
7727       unsigned int bits;
7728       HOST_WIDE_INT mask, base, index;
7729       rtx base_reg;
7730
7731       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7732          use a 8-bit index. So let's use a 12-bit index for SImode only and
7733          hope that arm_gen_constant will enable ldrb to use more bits. */
7734       bits = (mode == SImode) ? 12 : 8;
7735       mask = (1 << bits) - 1;
7736       base = INTVAL (x) & ~mask;
7737       index = INTVAL (x) & mask;
7738       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7739         {
7740           /* It'll most probably be more efficient to generate the base
7741              with more bits set and use a negative index instead. */
7742           base |= mask;
7743           index -= mask;
7744         }
7745       base_reg = force_reg (SImode, GEN_INT (base));
7746       x = plus_constant (Pmode, base_reg, index);
7747     }
7748
7749   if (flag_pic)
7750     {
7751       /* We need to find and carefully transform any SYMBOL and LABEL
7752          references; so go back to the original address expression.  */
7753       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7754
7755       if (new_x != orig_x)
7756         x = new_x;
7757     }
7758
7759   return x;
7760 }
7761
7762
7763 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7764    to be legitimate.  If we find one, return the new, valid address.  */
7765 rtx
7766 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7767 {
7768   if (GET_CODE (x) == PLUS
7769       && CONST_INT_P (XEXP (x, 1))
7770       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7771           || INTVAL (XEXP (x, 1)) < 0))
7772     {
7773       rtx xop0 = XEXP (x, 0);
7774       rtx xop1 = XEXP (x, 1);
7775       HOST_WIDE_INT offset = INTVAL (xop1);
7776
7777       /* Try and fold the offset into a biasing of the base register and
7778          then offsetting that.  Don't do this when optimizing for space
7779          since it can cause too many CSEs.  */
7780       if (optimize_size && offset >= 0
7781           && offset < 256 + 31 * GET_MODE_SIZE (mode))
7782         {
7783           HOST_WIDE_INT delta;
7784
7785           if (offset >= 256)
7786             delta = offset - (256 - GET_MODE_SIZE (mode));
7787           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7788             delta = 31 * GET_MODE_SIZE (mode);
7789           else
7790             delta = offset & (~31 * GET_MODE_SIZE (mode));
7791
7792           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7793                                 NULL_RTX);
7794           x = plus_constant (Pmode, xop0, delta);
7795         }
7796       else if (offset < 0 && offset > -256)
7797         /* Small negative offsets are best done with a subtract before the
7798            dereference, forcing these into a register normally takes two
7799            instructions.  */
7800         x = force_operand (x, NULL_RTX);
7801       else
7802         {
7803           /* For the remaining cases, force the constant into a register.  */
7804           xop1 = force_reg (SImode, xop1);
7805           x = gen_rtx_PLUS (SImode, xop0, xop1);
7806         }
7807     }
7808   else if (GET_CODE (x) == PLUS
7809            && s_register_operand (XEXP (x, 1), SImode)
7810            && !s_register_operand (XEXP (x, 0), SImode))
7811     {
7812       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7813
7814       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7815     }
7816
7817   if (flag_pic)
7818     {
7819       /* We need to find and carefully transform any SYMBOL and LABEL
7820          references; so go back to the original address expression.  */
7821       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7822
7823       if (new_x != orig_x)
7824         x = new_x;
7825     }
7826
7827   return x;
7828 }
7829
7830 bool
7831 arm_legitimize_reload_address (rtx *p,
7832                                machine_mode mode,
7833                                int opnum, int type,
7834                                int ind_levels ATTRIBUTE_UNUSED)
7835 {
7836   /* We must recognize output that we have already generated ourselves.  */
7837   if (GET_CODE (*p) == PLUS
7838       && GET_CODE (XEXP (*p, 0)) == PLUS
7839       && REG_P (XEXP (XEXP (*p, 0), 0))
7840       && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7841       && CONST_INT_P (XEXP (*p, 1)))
7842     {
7843       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7844                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7845                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
7846       return true;
7847     }
7848
7849   if (GET_CODE (*p) == PLUS
7850       && REG_P (XEXP (*p, 0))
7851       && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7852       /* If the base register is equivalent to a constant, let the generic
7853          code handle it.  Otherwise we will run into problems if a future
7854          reload pass decides to rematerialize the constant.  */
7855       && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7856       && CONST_INT_P (XEXP (*p, 1)))
7857     {
7858       HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7859       HOST_WIDE_INT low, high;
7860
7861       /* Detect coprocessor load/stores.  */
7862       bool coproc_p = ((TARGET_HARD_FLOAT
7863                         && TARGET_VFP
7864                         && (mode == SFmode || mode == DFmode))
7865                        || (TARGET_REALLY_IWMMXT
7866                            && VALID_IWMMXT_REG_MODE (mode))
7867                        || (TARGET_NEON
7868                            && (VALID_NEON_DREG_MODE (mode)
7869                                || VALID_NEON_QREG_MODE (mode))));
7870
7871       /* For some conditions, bail out when lower two bits are unaligned.  */
7872       if ((val & 0x3) != 0
7873           /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
7874           && (coproc_p
7875               /* For DI, and DF under soft-float: */
7876               || ((mode == DImode || mode == DFmode)
7877                   /* Without ldrd, we use stm/ldm, which does not
7878                      fair well with unaligned bits.  */
7879                   && (! TARGET_LDRD
7880                       /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
7881                       || TARGET_THUMB2))))
7882         return false;
7883
7884       /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7885          of which the (reg+high) gets turned into a reload add insn,
7886          we try to decompose the index into high/low values that can often
7887          also lead to better reload CSE.
7888          For example:
7889                  ldr r0, [r2, #4100]  // Offset too large
7890                  ldr r1, [r2, #4104]  // Offset too large
7891
7892          is best reloaded as:
7893                  add t1, r2, #4096
7894                  ldr r0, [t1, #4]
7895                  add t2, r2, #4096
7896                  ldr r1, [t2, #8]
7897
7898          which post-reload CSE can simplify in most cases to eliminate the
7899          second add instruction:
7900                  add t1, r2, #4096
7901                  ldr r0, [t1, #4]
7902                  ldr r1, [t1, #8]
7903
7904          The idea here is that we want to split out the bits of the constant
7905          as a mask, rather than as subtracting the maximum offset that the
7906          respective type of load/store used can handle.
7907
7908          When encountering negative offsets, we can still utilize it even if
7909          the overall offset is positive; sometimes this may lead to an immediate
7910          that can be constructed with fewer instructions.
7911          For example:
7912                  ldr r0, [r2, #0x3FFFFC]
7913
7914          This is best reloaded as:
7915                  add t1, r2, #0x400000
7916                  ldr r0, [t1, #-4]
7917
7918          The trick for spotting this for a load insn with N bits of offset
7919          (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7920          negative offset that is going to make bit N and all the bits below
7921          it become zero in the remainder part.
7922
7923          The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7924          to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7925          used in most cases of ARM load/store instructions.  */
7926
7927 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N)                                  \
7928       (((VAL) & ((1 << (N)) - 1))                                       \
7929        ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))   \
7930        : 0)
7931
7932       if (coproc_p)
7933         {
7934           low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7935
7936           /* NEON quad-word load/stores are made of two double-word accesses,
7937              so the valid index range is reduced by 8. Treat as 9-bit range if
7938              we go over it.  */
7939           if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7940             low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7941         }
7942       else if (GET_MODE_SIZE (mode) == 8)
7943         {
7944           if (TARGET_LDRD)
7945             low = (TARGET_THUMB2
7946                    ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7947                    : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7948           else
7949             /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7950                to access doublewords. The supported load/store offsets are
7951                -8, -4, and 4, which we try to produce here.  */
7952             low = ((val & 0xf) ^ 0x8) - 0x8;
7953         }
7954       else if (GET_MODE_SIZE (mode) < 8)
7955         {
7956           /* NEON element load/stores do not have an offset.  */
7957           if (TARGET_NEON_FP16 && mode == HFmode)
7958             return false;
7959
7960           if (TARGET_THUMB2)
7961             {
7962               /* Thumb-2 has an asymmetrical index range of (-256,4096).
7963                  Try the wider 12-bit range first, and re-try if the result
7964                  is out of range.  */
7965               low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7966               if (low < -255)
7967                 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7968             }
7969           else
7970             {
7971               if (mode == HImode || mode == HFmode)
7972                 {
7973                   if (arm_arch4)
7974                     low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7975                   else
7976                     {
7977                       /* The storehi/movhi_bytes fallbacks can use only
7978                          [-4094,+4094] of the full ldrb/strb index range.  */
7979                       low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7980                       if (low == 4095 || low == -4095)
7981                         return false;
7982                     }
7983                 }
7984               else
7985                 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7986             }
7987         }
7988       else
7989         return false;
7990
7991       high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7992                ^ (unsigned HOST_WIDE_INT) 0x80000000)
7993               - (unsigned HOST_WIDE_INT) 0x80000000);
7994       /* Check for overflow or zero */
7995       if (low == 0 || high == 0 || (high + low != val))
7996         return false;
7997
7998       /* Reload the high part into a base reg; leave the low part
7999          in the mem.
8000          Note that replacing this gen_rtx_PLUS with plus_constant is
8001          wrong in this case because we rely on the
8002          (plus (plus reg c1) c2) structure being preserved so that
8003          XEXP (*p, 0) in push_reload below uses the correct term.  */
8004       *p = gen_rtx_PLUS (GET_MODE (*p),
8005                          gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8006                                        GEN_INT (high)),
8007                          GEN_INT (low));
8008       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8009                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8010                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
8011       return true;
8012     }
8013
8014   return false;
8015 }
8016
8017 rtx
8018 thumb_legitimize_reload_address (rtx *x_p,
8019                                  machine_mode mode,
8020                                  int opnum, int type,
8021                                  int ind_levels ATTRIBUTE_UNUSED)
8022 {
8023   rtx x = *x_p;
8024
8025   if (GET_CODE (x) == PLUS
8026       && GET_MODE_SIZE (mode) < 4
8027       && REG_P (XEXP (x, 0))
8028       && XEXP (x, 0) == stack_pointer_rtx
8029       && CONST_INT_P (XEXP (x, 1))
8030       && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8031     {
8032       rtx orig_x = x;
8033
8034       x = copy_rtx (x);
8035       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8036                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8037       return x;
8038     }
8039
8040   /* If both registers are hi-regs, then it's better to reload the
8041      entire expression rather than each register individually.  That
8042      only requires one reload register rather than two.  */
8043   if (GET_CODE (x) == PLUS
8044       && REG_P (XEXP (x, 0))
8045       && REG_P (XEXP (x, 1))
8046       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8047       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8048     {
8049       rtx orig_x = x;
8050
8051       x = copy_rtx (x);
8052       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8053                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8054       return x;
8055     }
8056
8057   return NULL;
8058 }
8059
8060 /* Return TRUE if X contains any TLS symbol references.  */
8061
8062 bool
8063 arm_tls_referenced_p (rtx x)
8064 {
8065   if (! TARGET_HAVE_TLS)
8066     return false;
8067
8068   subrtx_iterator::array_type array;
8069   FOR_EACH_SUBRTX (iter, array, x, ALL)
8070     {
8071       const_rtx x = *iter;
8072       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8073         return true;
8074
8075       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8076          TLS offsets, not real symbol references.  */
8077       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8078         iter.skip_subrtxes ();
8079     }
8080   return false;
8081 }
8082
8083 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8084
8085    On the ARM, allow any integer (invalid ones are removed later by insn
8086    patterns), nice doubles and symbol_refs which refer to the function's
8087    constant pool XXX.
8088
8089    When generating pic allow anything.  */
8090
8091 static bool
8092 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8093 {
8094   /* At present, we have no support for Neon structure constants, so forbid
8095      them here.  It might be possible to handle simple cases like 0 and -1
8096      in future.  */
8097   if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8098     return false;
8099
8100   return flag_pic || !label_mentioned_p (x);
8101 }
8102
8103 static bool
8104 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8105 {
8106   return (CONST_INT_P (x)
8107           || CONST_DOUBLE_P (x)
8108           || CONSTANT_ADDRESS_P (x)
8109           || flag_pic);
8110 }
8111
8112 static bool
8113 arm_legitimate_constant_p (machine_mode mode, rtx x)
8114 {
8115   return (!arm_cannot_force_const_mem (mode, x)
8116           && (TARGET_32BIT
8117               ? arm_legitimate_constant_p_1 (mode, x)
8118               : thumb_legitimate_constant_p (mode, x)));
8119 }
8120
8121 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8122
8123 static bool
8124 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8125 {
8126   rtx base, offset;
8127
8128   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8129     {
8130       split_const (x, &base, &offset);
8131       if (GET_CODE (base) == SYMBOL_REF
8132           && !offset_within_block_p (base, INTVAL (offset)))
8133         return true;
8134     }
8135   return arm_tls_referenced_p (x);
8136 }
8137 \f
8138 #define REG_OR_SUBREG_REG(X)                                            \
8139   (REG_P (X)                                                    \
8140    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8141
8142 #define REG_OR_SUBREG_RTX(X)                    \
8143    (REG_P (X) ? (X) : SUBREG_REG (X))
8144
8145 static inline int
8146 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8147 {
8148   machine_mode mode = GET_MODE (x);
8149   int total, words;
8150
8151   switch (code)
8152     {
8153     case ASHIFT:
8154     case ASHIFTRT:
8155     case LSHIFTRT:
8156     case ROTATERT:
8157       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8158
8159     case PLUS:
8160     case MINUS:
8161     case COMPARE:
8162     case NEG:
8163     case NOT:
8164       return COSTS_N_INSNS (1);
8165
8166     case MULT:
8167       if (CONST_INT_P (XEXP (x, 1)))
8168         {
8169           int cycles = 0;
8170           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8171
8172           while (i)
8173             {
8174               i >>= 2;
8175               cycles++;
8176             }
8177           return COSTS_N_INSNS (2) + cycles;
8178         }
8179       return COSTS_N_INSNS (1) + 16;
8180
8181     case SET:
8182       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8183          the mode.  */
8184       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8185       return (COSTS_N_INSNS (words)
8186               + 4 * ((MEM_P (SET_SRC (x)))
8187                      + MEM_P (SET_DEST (x))));
8188
8189     case CONST_INT:
8190       if (outer == SET)
8191         {
8192           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8193             return 0;
8194           if (thumb_shiftable_const (INTVAL (x)))
8195             return COSTS_N_INSNS (2);
8196           return COSTS_N_INSNS (3);
8197         }
8198       else if ((outer == PLUS || outer == COMPARE)
8199                && INTVAL (x) < 256 && INTVAL (x) > -256)
8200         return 0;
8201       else if ((outer == IOR || outer == XOR || outer == AND)
8202                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8203         return COSTS_N_INSNS (1);
8204       else if (outer == AND)
8205         {
8206           int i;
8207           /* This duplicates the tests in the andsi3 expander.  */
8208           for (i = 9; i <= 31; i++)
8209             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8210                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8211               return COSTS_N_INSNS (2);
8212         }
8213       else if (outer == ASHIFT || outer == ASHIFTRT
8214                || outer == LSHIFTRT)
8215         return 0;
8216       return COSTS_N_INSNS (2);
8217
8218     case CONST:
8219     case CONST_DOUBLE:
8220     case LABEL_REF:
8221     case SYMBOL_REF:
8222       return COSTS_N_INSNS (3);
8223
8224     case UDIV:
8225     case UMOD:
8226     case DIV:
8227     case MOD:
8228       return 100;
8229
8230     case TRUNCATE:
8231       return 99;
8232
8233     case AND:
8234     case XOR:
8235     case IOR:
8236       /* XXX guess.  */
8237       return 8;
8238
8239     case MEM:
8240       /* XXX another guess.  */
8241       /* Memory costs quite a lot for the first word, but subsequent words
8242          load at the equivalent of a single insn each.  */
8243       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8244               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8245                  ? 4 : 0));
8246
8247     case IF_THEN_ELSE:
8248       /* XXX a guess.  */
8249       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8250         return 14;
8251       return 2;
8252
8253     case SIGN_EXTEND:
8254     case ZERO_EXTEND:
8255       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8256       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8257
8258       if (mode == SImode)
8259         return total;
8260
8261       if (arm_arch6)
8262         return total + COSTS_N_INSNS (1);
8263
8264       /* Assume a two-shift sequence.  Increase the cost slightly so
8265          we prefer actual shifts over an extend operation.  */
8266       return total + 1 + COSTS_N_INSNS (2);
8267
8268     default:
8269       return 99;
8270     }
8271 }
8272
8273 static inline bool
8274 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8275 {
8276   machine_mode mode = GET_MODE (x);
8277   enum rtx_code subcode;
8278   rtx operand;
8279   enum rtx_code code = GET_CODE (x);
8280   *total = 0;
8281
8282   switch (code)
8283     {
8284     case MEM:
8285       /* Memory costs quite a lot for the first word, but subsequent words
8286          load at the equivalent of a single insn each.  */
8287       *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8288       return true;
8289
8290     case DIV:
8291     case MOD:
8292     case UDIV:
8293     case UMOD:
8294       if (TARGET_HARD_FLOAT && mode == SFmode)
8295         *total = COSTS_N_INSNS (2);
8296       else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8297         *total = COSTS_N_INSNS (4);
8298       else
8299         *total = COSTS_N_INSNS (20);
8300       return false;
8301
8302     case ROTATE:
8303       if (REG_P (XEXP (x, 1)))
8304         *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8305       else if (!CONST_INT_P (XEXP (x, 1)))
8306         *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8307
8308       /* Fall through */
8309     case ROTATERT:
8310       if (mode != SImode)
8311         {
8312           *total += COSTS_N_INSNS (4);
8313           return true;
8314         }
8315
8316       /* Fall through */
8317     case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8318       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8319       if (mode == DImode)
8320         {
8321           *total += COSTS_N_INSNS (3);
8322           return true;
8323         }
8324
8325       *total += COSTS_N_INSNS (1);
8326       /* Increase the cost of complex shifts because they aren't any faster,
8327          and reduce dual issue opportunities.  */
8328       if (arm_tune_cortex_a9
8329           && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8330         ++*total;
8331
8332       return true;
8333
8334     case MINUS:
8335       if (mode == DImode)
8336         {
8337           *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8338           if (CONST_INT_P (XEXP (x, 0))
8339               && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8340             {
8341               *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8342               return true;
8343             }
8344
8345           if (CONST_INT_P (XEXP (x, 1))
8346               && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8347             {
8348               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8349               return true;
8350             }
8351
8352           return false;
8353         }
8354
8355       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8356         {
8357           if (TARGET_HARD_FLOAT
8358               && (mode == SFmode
8359                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8360             {
8361               *total = COSTS_N_INSNS (1);
8362               if (CONST_DOUBLE_P (XEXP (x, 0))
8363                   && arm_const_double_rtx (XEXP (x, 0)))
8364                 {
8365                   *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8366                   return true;
8367                 }
8368
8369               if (CONST_DOUBLE_P (XEXP (x, 1))
8370                   && arm_const_double_rtx (XEXP (x, 1)))
8371                 {
8372                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8373                   return true;
8374                 }
8375
8376               return false;
8377             }
8378           *total = COSTS_N_INSNS (20);
8379           return false;
8380         }
8381
8382       *total = COSTS_N_INSNS (1);
8383       if (CONST_INT_P (XEXP (x, 0))
8384           && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8385         {
8386           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8387           return true;
8388         }
8389
8390       subcode = GET_CODE (XEXP (x, 1));
8391       if (subcode == ASHIFT || subcode == ASHIFTRT
8392           || subcode == LSHIFTRT
8393           || subcode == ROTATE || subcode == ROTATERT)
8394         {
8395           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8396           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8397           return true;
8398         }
8399
8400       /* A shift as a part of RSB costs no more than RSB itself.  */
8401       if (GET_CODE (XEXP (x, 0)) == MULT
8402           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8403         {
8404           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8405           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8406           return true;
8407         }
8408
8409       if (subcode == MULT
8410           && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8411         {
8412           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8413           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8414           return true;
8415         }
8416
8417       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8418           || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8419         {
8420           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8421           if (REG_P (XEXP (XEXP (x, 1), 0))
8422               && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8423             *total += COSTS_N_INSNS (1);
8424
8425           return true;
8426         }
8427
8428       /* Fall through */
8429
8430     case PLUS:
8431       if (code == PLUS && arm_arch6 && mode == SImode
8432           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8433               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8434         {
8435           *total = COSTS_N_INSNS (1);
8436           *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8437                               0, speed);
8438           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8439           return true;
8440         }
8441
8442       /* MLA: All arguments must be registers.  We filter out
8443          multiplication by a power of two, so that we fall down into
8444          the code below.  */
8445       if (GET_CODE (XEXP (x, 0)) == MULT
8446           && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8447         {
8448           /* The cost comes from the cost of the multiply.  */
8449           return false;
8450         }
8451
8452       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8453         {
8454           if (TARGET_HARD_FLOAT
8455               && (mode == SFmode
8456                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8457             {
8458               *total = COSTS_N_INSNS (1);
8459               if (CONST_DOUBLE_P (XEXP (x, 1))
8460                   && arm_const_double_rtx (XEXP (x, 1)))
8461                 {
8462                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8463                   return true;
8464                 }
8465
8466               return false;
8467             }
8468
8469           *total = COSTS_N_INSNS (20);
8470           return false;
8471         }
8472
8473       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8474           || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8475         {
8476           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8477           if (REG_P (XEXP (XEXP (x, 0), 0))
8478               && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8479             *total += COSTS_N_INSNS (1);
8480           return true;
8481         }
8482
8483       /* Fall through */
8484
8485     case AND: case XOR: case IOR:
8486
8487       /* Normally the frame registers will be spilt into reg+const during
8488          reload, so it is a bad idea to combine them with other instructions,
8489          since then they might not be moved outside of loops.  As a compromise
8490          we allow integration with ops that have a constant as their second
8491          operand.  */
8492       if (REG_OR_SUBREG_REG (XEXP (x, 0))
8493           && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8494           && !CONST_INT_P (XEXP (x, 1)))
8495         *total = COSTS_N_INSNS (1);
8496
8497       if (mode == DImode)
8498         {
8499           *total += COSTS_N_INSNS (2);
8500           if (CONST_INT_P (XEXP (x, 1))
8501               && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8502             {
8503               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8504               return true;
8505             }
8506
8507           return false;
8508         }
8509
8510       *total += COSTS_N_INSNS (1);
8511       if (CONST_INT_P (XEXP (x, 1))
8512           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8513         {
8514           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8515           return true;
8516         }
8517       subcode = GET_CODE (XEXP (x, 0));
8518       if (subcode == ASHIFT || subcode == ASHIFTRT
8519           || subcode == LSHIFTRT
8520           || subcode == ROTATE || subcode == ROTATERT)
8521         {
8522           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8523           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8524           return true;
8525         }
8526
8527       if (subcode == MULT
8528           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8529         {
8530           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8531           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8532           return true;
8533         }
8534
8535       if (subcode == UMIN || subcode == UMAX
8536           || subcode == SMIN || subcode == SMAX)
8537         {
8538           *total = COSTS_N_INSNS (3);
8539           return true;
8540         }
8541
8542       return false;
8543
8544     case MULT:
8545       /* This should have been handled by the CPU specific routines.  */
8546       gcc_unreachable ();
8547
8548     case TRUNCATE:
8549       if (arm_arch3m && mode == SImode
8550           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8551           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8552           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8553               == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8554           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8555               || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8556         {
8557           *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8558           return true;
8559         }
8560       *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8561       return false;
8562
8563     case NEG:
8564       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8565         {
8566           if (TARGET_HARD_FLOAT
8567               && (mode == SFmode
8568                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8569             {
8570               *total = COSTS_N_INSNS (1);
8571               return false;
8572             }
8573           *total = COSTS_N_INSNS (2);
8574           return false;
8575         }
8576
8577       /* Fall through */
8578     case NOT:
8579       *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8580       if (mode == SImode && code == NOT)
8581         {
8582           subcode = GET_CODE (XEXP (x, 0));
8583           if (subcode == ASHIFT || subcode == ASHIFTRT
8584               || subcode == LSHIFTRT
8585               || subcode == ROTATE || subcode == ROTATERT
8586               || (subcode == MULT
8587                   && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8588             {
8589               *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8590               /* Register shifts cost an extra cycle.  */
8591               if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8592                 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8593                                                         subcode, 1, speed);
8594               return true;
8595             }
8596         }
8597
8598       return false;
8599
8600     case IF_THEN_ELSE:
8601       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8602         {
8603           *total = COSTS_N_INSNS (4);
8604           return true;
8605         }
8606
8607       operand = XEXP (x, 0);
8608
8609       if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8610              || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8611             && REG_P (XEXP (operand, 0))
8612             && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8613         *total += COSTS_N_INSNS (1);
8614       *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8615                  + rtx_cost (XEXP (x, 2), code, 2, speed));
8616       return true;
8617
8618     case NE:
8619       if (mode == SImode && XEXP (x, 1) == const0_rtx)
8620         {
8621           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8622           return true;
8623         }
8624       goto scc_insn;
8625
8626     case GE:
8627       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8628           && mode == SImode && XEXP (x, 1) == const0_rtx)
8629         {
8630           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8631           return true;
8632         }
8633       goto scc_insn;
8634
8635     case LT:
8636       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8637           && mode == SImode && XEXP (x, 1) == const0_rtx)
8638         {
8639           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8640           return true;
8641         }
8642       goto scc_insn;
8643
8644     case EQ:
8645     case GT:
8646     case LE:
8647     case GEU:
8648     case LTU:
8649     case GTU:
8650     case LEU:
8651     case UNORDERED:
8652     case ORDERED:
8653     case UNEQ:
8654     case UNGE:
8655     case UNLT:
8656     case UNGT:
8657     case UNLE:
8658     scc_insn:
8659       /* SCC insns.  In the case where the comparison has already been
8660          performed, then they cost 2 instructions.  Otherwise they need
8661          an additional comparison before them.  */
8662       *total = COSTS_N_INSNS (2);
8663       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8664         {
8665           return true;
8666         }
8667
8668       /* Fall through */
8669     case COMPARE:
8670       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8671         {
8672           *total = 0;
8673           return true;
8674         }
8675
8676       *total += COSTS_N_INSNS (1);
8677       if (CONST_INT_P (XEXP (x, 1))
8678           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8679         {
8680           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8681           return true;
8682         }
8683
8684       subcode = GET_CODE (XEXP (x, 0));
8685       if (subcode == ASHIFT || subcode == ASHIFTRT
8686           || subcode == LSHIFTRT
8687           || subcode == ROTATE || subcode == ROTATERT)
8688         {
8689           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8690           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8691           return true;
8692         }
8693
8694       if (subcode == MULT
8695           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8696         {
8697           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8698           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8699           return true;
8700         }
8701
8702       return false;
8703
8704     case UMIN:
8705     case UMAX:
8706     case SMIN:
8707     case SMAX:
8708       *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8709       if (!CONST_INT_P (XEXP (x, 1))
8710           || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8711         *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8712       return true;
8713
8714     case ABS:
8715       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8716         {
8717           if (TARGET_HARD_FLOAT
8718               && (mode == SFmode
8719                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8720             {
8721               *total = COSTS_N_INSNS (1);
8722               return false;
8723             }
8724           *total = COSTS_N_INSNS (20);
8725           return false;
8726         }
8727       *total = COSTS_N_INSNS (1);
8728       if (mode == DImode)
8729         *total += COSTS_N_INSNS (3);
8730       return false;
8731
8732     case SIGN_EXTEND:
8733     case ZERO_EXTEND:
8734       *total = 0;
8735       if (GET_MODE_CLASS (mode) == MODE_INT)
8736         {
8737           rtx op = XEXP (x, 0);
8738           machine_mode opmode = GET_MODE (op);
8739
8740           if (mode == DImode)
8741             *total += COSTS_N_INSNS (1);
8742
8743           if (opmode != SImode)
8744             {
8745               if (MEM_P (op))
8746                 {
8747                   /* If !arm_arch4, we use one of the extendhisi2_mem
8748                      or movhi_bytes patterns for HImode.  For a QImode
8749                      sign extension, we first zero-extend from memory
8750                      and then perform a shift sequence.  */
8751                   if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8752                     *total += COSTS_N_INSNS (2);
8753                 }
8754               else if (arm_arch6)
8755                 *total += COSTS_N_INSNS (1);
8756
8757               /* We don't have the necessary insn, so we need to perform some
8758                  other operation.  */
8759               else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8760                 /* An and with constant 255.  */
8761                 *total += COSTS_N_INSNS (1);
8762               else
8763                 /* A shift sequence.  Increase costs slightly to avoid
8764                    combining two shifts into an extend operation.  */
8765                 *total += COSTS_N_INSNS (2) + 1;
8766             }
8767
8768           return false;
8769         }
8770
8771       switch (GET_MODE (XEXP (x, 0)))
8772         {
8773         case V8QImode:
8774         case V4HImode:
8775         case V2SImode:
8776         case V4QImode:
8777         case V2HImode:
8778           *total = COSTS_N_INSNS (1);
8779           return false;
8780
8781         default:
8782           gcc_unreachable ();
8783         }
8784       gcc_unreachable ();
8785
8786     case ZERO_EXTRACT:
8787     case SIGN_EXTRACT:
8788       *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8789       return true;
8790
8791     case CONST_INT:
8792       if (const_ok_for_arm (INTVAL (x))
8793           || const_ok_for_arm (~INTVAL (x)))
8794         *total = COSTS_N_INSNS (1);
8795       else
8796         *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8797                                                   INTVAL (x), NULL_RTX,
8798                                                   NULL_RTX, 0, 0));
8799       return true;
8800
8801     case CONST:
8802     case LABEL_REF:
8803     case SYMBOL_REF:
8804       *total = COSTS_N_INSNS (3);
8805       return true;
8806
8807     case HIGH:
8808       *total = COSTS_N_INSNS (1);
8809       return true;
8810
8811     case LO_SUM:
8812       *total = COSTS_N_INSNS (1);
8813       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8814       return true;
8815
8816     case CONST_DOUBLE:
8817       if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8818           && (mode == SFmode || !TARGET_VFP_SINGLE))
8819         *total = COSTS_N_INSNS (1);
8820       else
8821         *total = COSTS_N_INSNS (4);
8822       return true;
8823
8824     case SET:
8825       /* The vec_extract patterns accept memory operands that require an
8826          address reload.  Account for the cost of that reload to give the
8827          auto-inc-dec pass an incentive to try to replace them.  */
8828       if (TARGET_NEON && MEM_P (SET_DEST (x))
8829           && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8830         {
8831           *total = rtx_cost (SET_DEST (x), code, 0, speed);
8832           if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8833             *total += COSTS_N_INSNS (1);
8834           return true;
8835         }
8836       /* Likewise for the vec_set patterns.  */
8837       if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8838           && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8839           && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8840         {
8841           rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8842           *total = rtx_cost (mem, code, 0, speed);
8843           if (!neon_vector_mem_operand (mem, 2, true))
8844             *total += COSTS_N_INSNS (1);
8845           return true;
8846         }
8847       return false;
8848
8849     case UNSPEC:
8850       /* We cost this as high as our memory costs to allow this to
8851          be hoisted from loops.  */
8852       if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8853         {
8854           *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8855         }
8856       return true;
8857
8858     case CONST_VECTOR:
8859       if (TARGET_NEON
8860           && TARGET_HARD_FLOAT
8861           && outer == SET
8862           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8863           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8864         *total = COSTS_N_INSNS (1);
8865       else
8866         *total = COSTS_N_INSNS (4);
8867       return true;
8868
8869     default:
8870       *total = COSTS_N_INSNS (4);
8871       return false;
8872     }
8873 }
8874
8875 /* Estimates the size cost of thumb1 instructions.
8876    For now most of the code is copied from thumb1_rtx_costs. We need more
8877    fine grain tuning when we have more related test cases.  */
8878 static inline int
8879 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8880 {
8881   machine_mode mode = GET_MODE (x);
8882   int words;
8883
8884   switch (code)
8885     {
8886     case ASHIFT:
8887     case ASHIFTRT:
8888     case LSHIFTRT:
8889     case ROTATERT:
8890       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8891
8892     case PLUS:
8893     case MINUS:
8894       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8895          defined by RTL expansion, especially for the expansion of
8896          multiplication.  */
8897       if ((GET_CODE (XEXP (x, 0)) == MULT
8898            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8899           || (GET_CODE (XEXP (x, 1)) == MULT
8900               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8901         return COSTS_N_INSNS (2);
8902       /* On purpose fall through for normal RTX.  */
8903     case COMPARE:
8904     case NEG:
8905     case NOT:
8906       return COSTS_N_INSNS (1);
8907
8908     case MULT:
8909       if (CONST_INT_P (XEXP (x, 1)))
8910         {
8911           /* Thumb1 mul instruction can't operate on const. We must Load it
8912              into a register first.  */
8913           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8914           /* For the targets which have a very small and high-latency multiply
8915              unit, we prefer to synthesize the mult with up to 5 instructions,
8916              giving a good balance between size and performance.  */
8917           if (arm_arch6m && arm_m_profile_small_mul)
8918             return COSTS_N_INSNS (5);
8919           else
8920             return COSTS_N_INSNS (1) + const_size;
8921         }
8922       return COSTS_N_INSNS (1);
8923
8924     case SET:
8925       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8926          the mode.  */
8927       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8928       return COSTS_N_INSNS (words)
8929              + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8930                                     || satisfies_constraint_K (SET_SRC (x))
8931                                        /* thumb1_movdi_insn.  */
8932                                     || ((words > 1) && MEM_P (SET_SRC (x))));
8933
8934     case CONST_INT:
8935       if (outer == SET)
8936         {
8937           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8938             return COSTS_N_INSNS (1);
8939           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
8940           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8941             return COSTS_N_INSNS (2);
8942           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
8943           if (thumb_shiftable_const (INTVAL (x)))
8944             return COSTS_N_INSNS (2);
8945           return COSTS_N_INSNS (3);
8946         }
8947       else if ((outer == PLUS || outer == COMPARE)
8948                && INTVAL (x) < 256 && INTVAL (x) > -256)
8949         return 0;
8950       else if ((outer == IOR || outer == XOR || outer == AND)
8951                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8952         return COSTS_N_INSNS (1);
8953       else if (outer == AND)
8954         {
8955           int i;
8956           /* This duplicates the tests in the andsi3 expander.  */
8957           for (i = 9; i <= 31; i++)
8958             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8959                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8960               return COSTS_N_INSNS (2);
8961         }
8962       else if (outer == ASHIFT || outer == ASHIFTRT
8963                || outer == LSHIFTRT)
8964         return 0;
8965       return COSTS_N_INSNS (2);
8966
8967     case CONST:
8968     case CONST_DOUBLE:
8969     case LABEL_REF:
8970     case SYMBOL_REF:
8971       return COSTS_N_INSNS (3);
8972
8973     case UDIV:
8974     case UMOD:
8975     case DIV:
8976     case MOD:
8977       return 100;
8978
8979     case TRUNCATE:
8980       return 99;
8981
8982     case AND:
8983     case XOR:
8984     case IOR:
8985       return COSTS_N_INSNS (1);
8986
8987     case MEM:
8988       return (COSTS_N_INSNS (1)
8989               + COSTS_N_INSNS (1)
8990                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8991               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8992                  ? COSTS_N_INSNS (1) : 0));
8993
8994     case IF_THEN_ELSE:
8995       /* XXX a guess.  */
8996       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8997         return 14;
8998       return 2;
8999
9000     case ZERO_EXTEND:
9001       /* XXX still guessing.  */
9002       switch (GET_MODE (XEXP (x, 0)))
9003         {
9004           case QImode:
9005             return (1 + (mode == DImode ? 4 : 0)
9006                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9007
9008           case HImode:
9009             return (4 + (mode == DImode ? 4 : 0)
9010                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9011
9012           case SImode:
9013             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9014
9015           default:
9016             return 99;
9017         }
9018
9019     default:
9020       return 99;
9021     }
9022 }
9023
9024 /* RTX costs when optimizing for size.  */
9025 static bool
9026 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9027                     int *total)
9028 {
9029   machine_mode mode = GET_MODE (x);
9030   if (TARGET_THUMB1)
9031     {
9032       *total = thumb1_size_rtx_costs (x, code, outer_code);
9033       return true;
9034     }
9035
9036   /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
9037   switch (code)
9038     {
9039     case MEM:
9040       /* A memory access costs 1 insn if the mode is small, or the address is
9041          a single register, otherwise it costs one insn per word.  */
9042       if (REG_P (XEXP (x, 0)))
9043         *total = COSTS_N_INSNS (1);
9044       else if (flag_pic
9045                && GET_CODE (XEXP (x, 0)) == PLUS
9046                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9047         /* This will be split into two instructions.
9048            See arm.md:calculate_pic_address.  */
9049         *total = COSTS_N_INSNS (2);
9050       else
9051         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9052       return true;
9053
9054     case DIV:
9055     case MOD:
9056     case UDIV:
9057     case UMOD:
9058       /* Needs a libcall, so it costs about this.  */
9059       *total = COSTS_N_INSNS (2);
9060       return false;
9061
9062     case ROTATE:
9063       if (mode == SImode && REG_P (XEXP (x, 1)))
9064         {
9065           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9066           return true;
9067         }
9068       /* Fall through */
9069     case ROTATERT:
9070     case ASHIFT:
9071     case LSHIFTRT:
9072     case ASHIFTRT:
9073       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9074         {
9075           *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9076           return true;
9077         }
9078       else if (mode == SImode)
9079         {
9080           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9081           /* Slightly disparage register shifts, but not by much.  */
9082           if (!CONST_INT_P (XEXP (x, 1)))
9083             *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9084           return true;
9085         }
9086
9087       /* Needs a libcall.  */
9088       *total = COSTS_N_INSNS (2);
9089       return false;
9090
9091     case MINUS:
9092       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9093           && (mode == SFmode || !TARGET_VFP_SINGLE))
9094         {
9095           *total = COSTS_N_INSNS (1);
9096           return false;
9097         }
9098
9099       if (mode == SImode)
9100         {
9101           enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9102           enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9103
9104           if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9105               || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9106               || subcode1 == ROTATE || subcode1 == ROTATERT
9107               || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9108               || subcode1 == ASHIFTRT)
9109             {
9110               /* It's just the cost of the two operands.  */
9111               *total = 0;
9112               return false;
9113             }
9114
9115           *total = COSTS_N_INSNS (1);
9116           return false;
9117         }
9118
9119       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9120       return false;
9121
9122     case PLUS:
9123       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9124           && (mode == SFmode || !TARGET_VFP_SINGLE))
9125         {
9126           *total = COSTS_N_INSNS (1);
9127           return false;
9128         }
9129
9130       /* A shift as a part of ADD costs nothing.  */
9131       if (GET_CODE (XEXP (x, 0)) == MULT
9132           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9133         {
9134           *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9135           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9136           *total += rtx_cost (XEXP (x, 1), code, 1, false);
9137           return true;
9138         }
9139
9140       /* Fall through */
9141     case AND: case XOR: case IOR:
9142       if (mode == SImode)
9143         {
9144           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9145
9146           if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9147               || subcode == LSHIFTRT || subcode == ASHIFTRT
9148               || (code == AND && subcode == NOT))
9149             {
9150               /* It's just the cost of the two operands.  */
9151               *total = 0;
9152               return false;
9153             }
9154         }
9155
9156       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9157       return false;
9158
9159     case MULT:
9160       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9161       return false;
9162
9163     case NEG:
9164       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9165           && (mode == SFmode || !TARGET_VFP_SINGLE))
9166         {
9167           *total = COSTS_N_INSNS (1);
9168           return false;
9169         }
9170
9171       /* Fall through */
9172     case NOT:
9173       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9174
9175       return false;
9176
9177     case IF_THEN_ELSE:
9178       *total = 0;
9179       return false;
9180
9181     case COMPARE:
9182       if (cc_register (XEXP (x, 0), VOIDmode))
9183         * total = 0;
9184       else
9185         *total = COSTS_N_INSNS (1);
9186       return false;
9187
9188     case ABS:
9189       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9190           && (mode == SFmode || !TARGET_VFP_SINGLE))
9191         *total = COSTS_N_INSNS (1);
9192       else
9193         *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9194       return false;
9195
9196     case SIGN_EXTEND:
9197     case ZERO_EXTEND:
9198       return arm_rtx_costs_1 (x, outer_code, total, 0);
9199
9200     case CONST_INT:
9201       if (const_ok_for_arm (INTVAL (x)))
9202         /* A multiplication by a constant requires another instruction
9203            to load the constant to a register.  */
9204         *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9205                                 ? 1 : 0);
9206       else if (const_ok_for_arm (~INTVAL (x)))
9207         *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9208       else if (const_ok_for_arm (-INTVAL (x)))
9209         {
9210           if (outer_code == COMPARE || outer_code == PLUS
9211               || outer_code == MINUS)
9212             *total = 0;
9213           else
9214             *total = COSTS_N_INSNS (1);
9215         }
9216       else
9217         *total = COSTS_N_INSNS (2);
9218       return true;
9219
9220     case CONST:
9221     case LABEL_REF:
9222     case SYMBOL_REF:
9223       *total = COSTS_N_INSNS (2);
9224       return true;
9225
9226     case CONST_DOUBLE:
9227       *total = COSTS_N_INSNS (4);
9228       return true;
9229
9230     case CONST_VECTOR:
9231       if (TARGET_NEON
9232           && TARGET_HARD_FLOAT
9233           && outer_code == SET
9234           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9235           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9236         *total = COSTS_N_INSNS (1);
9237       else
9238         *total = COSTS_N_INSNS (4);
9239       return true;
9240
9241     case HIGH:
9242     case LO_SUM:
9243       /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9244          cost of these slightly.  */
9245       *total = COSTS_N_INSNS (1) + 1;
9246       return true;
9247
9248     case SET:
9249       return false;
9250
9251     default:
9252       if (mode != VOIDmode)
9253         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9254       else
9255         *total = COSTS_N_INSNS (4); /* How knows?  */
9256       return false;
9257     }
9258 }
9259
9260 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9261    operand, then return the operand that is being shifted.  If the shift
9262    is not by a constant, then set SHIFT_REG to point to the operand.
9263    Return NULL if OP is not a shifter operand.  */
9264 static rtx
9265 shifter_op_p (rtx op, rtx *shift_reg)
9266 {
9267   enum rtx_code code = GET_CODE (op);
9268
9269   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9270       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9271     return XEXP (op, 0);
9272   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9273     return XEXP (op, 0);
9274   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9275            || code == ASHIFTRT)
9276     {
9277       if (!CONST_INT_P (XEXP (op, 1)))
9278         *shift_reg = XEXP (op, 1);
9279       return XEXP (op, 0);
9280     }
9281
9282   return NULL;
9283 }
9284
9285 static bool
9286 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9287 {
9288   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9289   gcc_assert (GET_CODE (x) == UNSPEC);
9290
9291   switch (XINT (x, 1))
9292     {
9293     case UNSPEC_UNALIGNED_LOAD:
9294       /* We can only do unaligned loads into the integer unit, and we can't
9295          use LDM or LDRD.  */
9296       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9297       if (speed_p)
9298         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9299                   + extra_cost->ldst.load_unaligned);
9300
9301 #ifdef NOT_YET
9302       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9303                                  ADDR_SPACE_GENERIC, speed_p);
9304 #endif
9305       return true;
9306
9307     case UNSPEC_UNALIGNED_STORE:
9308       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9309       if (speed_p)
9310         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9311                   + extra_cost->ldst.store_unaligned);
9312
9313       *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9314 #ifdef NOT_YET
9315       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9316                                  ADDR_SPACE_GENERIC, speed_p);
9317 #endif
9318       return true;
9319
9320     case UNSPEC_VRINTZ:
9321     case UNSPEC_VRINTP:
9322     case UNSPEC_VRINTM:
9323     case UNSPEC_VRINTR:
9324     case UNSPEC_VRINTX:
9325     case UNSPEC_VRINTA:
9326       *cost = COSTS_N_INSNS (1);
9327       if (speed_p)
9328         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9329
9330       return true;
9331     default:
9332       *cost = COSTS_N_INSNS (2);
9333       break;
9334     }
9335   return false;
9336 }
9337
9338 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9339    call (one insn for -Os) and then one for processing the result.  */
9340 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9341
9342 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9343         do                                                              \
9344           {                                                             \
9345             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9346             if (shift_op != NULL                                        \
9347                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9348               {                                                         \
9349                 if (shift_reg)                                          \
9350                   {                                                     \
9351                     if (speed_p)                                        \
9352                       *cost += extra_cost->alu.arith_shift_reg; \
9353                     *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);  \
9354                   }                                                     \
9355                 else if (speed_p)                                       \
9356                   *cost += extra_cost->alu.arith_shift;         \
9357                                                                         \
9358                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)     \
9359                           + rtx_cost (XEXP (x, 1 - IDX),                \
9360                                       OP, 1, speed_p));         \
9361                 return true;                                            \
9362               }                                                         \
9363           }                                                             \
9364         while (0);
9365
9366 /* RTX costs.  Make an estimate of the cost of executing the operation
9367    X, which is contained with an operation with code OUTER_CODE.
9368    SPEED_P indicates whether the cost desired is the performance cost,
9369    or the size cost.  The estimate is stored in COST and the return
9370    value is TRUE if the cost calculation is final, or FALSE if the
9371    caller should recurse through the operands of X to add additional
9372    costs.
9373
9374    We currently make no attempt to model the size savings of Thumb-2
9375    16-bit instructions.  At the normal points in compilation where
9376    this code is called we have no measure of whether the condition
9377    flags are live or not, and thus no realistic way to determine what
9378    the size will eventually be.  */
9379 static bool
9380 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9381                    const struct cpu_cost_table *extra_cost,
9382                    int *cost, bool speed_p)
9383 {
9384   machine_mode mode = GET_MODE (x);
9385
9386   if (TARGET_THUMB1)
9387     {
9388       if (speed_p)
9389         *cost = thumb1_rtx_costs (x, code, outer_code);
9390       else
9391         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9392       return true;
9393     }
9394
9395   switch (code)
9396     {
9397     case SET:
9398       *cost = 0;
9399       /* SET RTXs don't have a mode so we get it from the destination.  */
9400       mode = GET_MODE (SET_DEST (x));
9401
9402       if (REG_P (SET_SRC (x))
9403           && REG_P (SET_DEST (x)))
9404         {
9405           /* Assume that most copies can be done with a single insn,
9406              unless we don't have HW FP, in which case everything
9407              larger than word mode will require two insns.  */
9408           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9409                                    && GET_MODE_SIZE (mode) > 4)
9410                                   || mode == DImode)
9411                                  ? 2 : 1);
9412           /* Conditional register moves can be encoded
9413              in 16 bits in Thumb mode.  */
9414           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9415             *cost >>= 1;
9416
9417           return true;
9418         }
9419
9420       if (CONST_INT_P (SET_SRC (x)))
9421         {
9422           /* Handle CONST_INT here, since the value doesn't have a mode
9423              and we would otherwise be unable to work out the true cost.  */
9424           *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9425           outer_code = SET;
9426           /* Slightly lower the cost of setting a core reg to a constant.
9427              This helps break up chains and allows for better scheduling.  */
9428           if (REG_P (SET_DEST (x))
9429               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9430             *cost -= 1;
9431           x = SET_SRC (x);
9432           /* Immediate moves with an immediate in the range [0, 255] can be
9433              encoded in 16 bits in Thumb mode.  */
9434           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9435               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9436             *cost >>= 1;
9437           goto const_int_cost;
9438         }
9439
9440       return false;
9441
9442     case MEM:
9443       /* A memory access costs 1 insn if the mode is small, or the address is
9444          a single register, otherwise it costs one insn per word.  */
9445       if (REG_P (XEXP (x, 0)))
9446         *cost = COSTS_N_INSNS (1);
9447       else if (flag_pic
9448                && GET_CODE (XEXP (x, 0)) == PLUS
9449                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9450         /* This will be split into two instructions.
9451            See arm.md:calculate_pic_address.  */
9452         *cost = COSTS_N_INSNS (2);
9453       else
9454         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9455
9456       /* For speed optimizations, add the costs of the address and
9457          accessing memory.  */
9458       if (speed_p)
9459 #ifdef NOT_YET
9460         *cost += (extra_cost->ldst.load
9461                   + arm_address_cost (XEXP (x, 0), mode,
9462                                       ADDR_SPACE_GENERIC, speed_p));
9463 #else
9464         *cost += extra_cost->ldst.load;
9465 #endif
9466       return true;
9467
9468     case PARALLEL:
9469     {
9470    /* Calculations of LDM costs are complex.  We assume an initial cost
9471    (ldm_1st) which will load the number of registers mentioned in
9472    ldm_regs_per_insn_1st registers; then each additional
9473    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9474    formula for N regs is thus:
9475
9476    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9477                              + ldm_regs_per_insn_subsequent - 1)
9478                             / ldm_regs_per_insn_subsequent).
9479
9480    Additional costs may also be added for addressing.  A similar
9481    formula is used for STM.  */
9482
9483       bool is_ldm = load_multiple_operation (x, SImode);
9484       bool is_stm = store_multiple_operation (x, SImode);
9485
9486       *cost = COSTS_N_INSNS (1);
9487
9488       if (is_ldm || is_stm)
9489         {
9490           if (speed_p)
9491             {
9492               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9493               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9494                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9495                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9496               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9497                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9498                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9499
9500               *cost += regs_per_insn_1st
9501                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9502                                             + regs_per_insn_sub - 1)
9503                                           / regs_per_insn_sub);
9504               return true;
9505             }
9506
9507         }
9508       return false;
9509     }
9510     case DIV:
9511     case UDIV:
9512       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9513           && (mode == SFmode || !TARGET_VFP_SINGLE))
9514         *cost = COSTS_N_INSNS (speed_p
9515                                ? extra_cost->fp[mode != SFmode].div : 1);
9516       else if (mode == SImode && TARGET_IDIV)
9517         *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9518       else
9519         *cost = LIBCALL_COST (2);
9520       return false;     /* All arguments must be in registers.  */
9521
9522     case MOD:
9523     case UMOD:
9524       *cost = LIBCALL_COST (2);
9525       return false;     /* All arguments must be in registers.  */
9526
9527     case ROTATE:
9528       if (mode == SImode && REG_P (XEXP (x, 1)))
9529         {
9530           *cost = (COSTS_N_INSNS (2)
9531                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9532           if (speed_p)
9533             *cost += extra_cost->alu.shift_reg;
9534           return true;
9535         }
9536       /* Fall through */
9537     case ROTATERT:
9538     case ASHIFT:
9539     case LSHIFTRT:
9540     case ASHIFTRT:
9541       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9542         {
9543           *cost = (COSTS_N_INSNS (3)
9544                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9545           if (speed_p)
9546             *cost += 2 * extra_cost->alu.shift;
9547           return true;
9548         }
9549       else if (mode == SImode)
9550         {
9551           *cost = (COSTS_N_INSNS (1)
9552                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9553           /* Slightly disparage register shifts at -Os, but not by much.  */
9554           if (!CONST_INT_P (XEXP (x, 1)))
9555             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9556                       + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9557           return true;
9558         }
9559       else if (GET_MODE_CLASS (mode) == MODE_INT
9560                && GET_MODE_SIZE (mode) < 4)
9561         {
9562           if (code == ASHIFT)
9563             {
9564               *cost = (COSTS_N_INSNS (1)
9565                        + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9566               /* Slightly disparage register shifts at -Os, but not by
9567                  much.  */
9568               if (!CONST_INT_P (XEXP (x, 1)))
9569                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9570                           + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9571             }
9572           else if (code == LSHIFTRT || code == ASHIFTRT)
9573             {
9574               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9575                 {
9576                   /* Can use SBFX/UBFX.  */
9577                   *cost = COSTS_N_INSNS (1);
9578                   if (speed_p)
9579                     *cost += extra_cost->alu.bfx;
9580                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9581                 }
9582               else
9583                 {
9584                   *cost = COSTS_N_INSNS (2);
9585                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9586                   if (speed_p)
9587                     {
9588                       if (CONST_INT_P (XEXP (x, 1)))
9589                         *cost += 2 * extra_cost->alu.shift;
9590                       else
9591                         *cost += (extra_cost->alu.shift
9592                                   + extra_cost->alu.shift_reg);
9593                     }
9594                   else
9595                     /* Slightly disparage register shifts.  */
9596                     *cost += !CONST_INT_P (XEXP (x, 1));
9597                 }
9598             }
9599           else /* Rotates.  */
9600             {
9601               *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9602               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9603               if (speed_p)
9604                 {
9605                   if (CONST_INT_P (XEXP (x, 1)))
9606                     *cost += (2 * extra_cost->alu.shift
9607                               + extra_cost->alu.log_shift);
9608                   else
9609                     *cost += (extra_cost->alu.shift
9610                               + extra_cost->alu.shift_reg
9611                               + extra_cost->alu.log_shift_reg);
9612                 }
9613             }
9614           return true;
9615         }
9616
9617       *cost = LIBCALL_COST (2);
9618       return false;
9619
9620     case BSWAP:
9621       if (arm_arch6)
9622         {
9623           if (mode == SImode)
9624             {
9625               *cost = COSTS_N_INSNS (1);
9626               if (speed_p)
9627                 *cost += extra_cost->alu.rev;
9628
9629               return false;
9630             }
9631         }
9632       else
9633         {
9634         /* No rev instruction available.  Look at arm_legacy_rev
9635            and thumb_legacy_rev for the form of RTL used then.  */
9636           if (TARGET_THUMB)
9637             {
9638               *cost = COSTS_N_INSNS (10);
9639
9640               if (speed_p)
9641                 {
9642                   *cost += 6 * extra_cost->alu.shift;
9643                   *cost += 3 * extra_cost->alu.logical;
9644                 }
9645             }
9646           else
9647             {
9648               *cost = COSTS_N_INSNS (5);
9649
9650               if (speed_p)
9651                 {
9652                   *cost += 2 * extra_cost->alu.shift;
9653                   *cost += extra_cost->alu.arith_shift;
9654                   *cost += 2 * extra_cost->alu.logical;
9655                 }
9656             }
9657           return true;
9658         }
9659       return false;
9660
9661     case MINUS:
9662       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9663           && (mode == SFmode || !TARGET_VFP_SINGLE))
9664         {
9665           *cost = COSTS_N_INSNS (1);
9666           if (GET_CODE (XEXP (x, 0)) == MULT
9667               || GET_CODE (XEXP (x, 1)) == MULT)
9668             {
9669               rtx mul_op0, mul_op1, sub_op;
9670
9671               if (speed_p)
9672                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9673
9674               if (GET_CODE (XEXP (x, 0)) == MULT)
9675                 {
9676                   mul_op0 = XEXP (XEXP (x, 0), 0);
9677                   mul_op1 = XEXP (XEXP (x, 0), 1);
9678                   sub_op = XEXP (x, 1);
9679                 }
9680               else
9681                 {
9682                   mul_op0 = XEXP (XEXP (x, 1), 0);
9683                   mul_op1 = XEXP (XEXP (x, 1), 1);
9684                   sub_op = XEXP (x, 0);
9685                 }
9686
9687               /* The first operand of the multiply may be optionally
9688                  negated.  */
9689               if (GET_CODE (mul_op0) == NEG)
9690                 mul_op0 = XEXP (mul_op0, 0);
9691
9692               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9693                         + rtx_cost (mul_op1, code, 0, speed_p)
9694                         + rtx_cost (sub_op, code, 0, speed_p));
9695
9696               return true;
9697             }
9698
9699           if (speed_p)
9700             *cost += extra_cost->fp[mode != SFmode].addsub;
9701           return false;
9702         }
9703
9704       if (mode == SImode)
9705         {
9706           rtx shift_by_reg = NULL;
9707           rtx shift_op;
9708           rtx non_shift_op;
9709
9710           *cost = COSTS_N_INSNS (1);
9711
9712           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9713           if (shift_op == NULL)
9714             {
9715               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9716               non_shift_op = XEXP (x, 0);
9717             }
9718           else
9719             non_shift_op = XEXP (x, 1);
9720
9721           if (shift_op != NULL)
9722             {
9723               if (shift_by_reg != NULL)
9724                 {
9725                   if (speed_p)
9726                     *cost += extra_cost->alu.arith_shift_reg;
9727                   *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9728                 }
9729               else if (speed_p)
9730                 *cost += extra_cost->alu.arith_shift;
9731
9732               *cost += (rtx_cost (shift_op, code, 0, speed_p)
9733                         + rtx_cost (non_shift_op, code, 0, speed_p));
9734               return true;
9735             }
9736
9737           if (arm_arch_thumb2
9738               && GET_CODE (XEXP (x, 1)) == MULT)
9739             {
9740               /* MLS.  */
9741               if (speed_p)
9742                 *cost += extra_cost->mult[0].add;
9743               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9744                         + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9745                         + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9746               return true;
9747             }
9748
9749           if (CONST_INT_P (XEXP (x, 0)))
9750             {
9751               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9752                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9753                                             NULL_RTX, 1, 0);
9754               *cost = COSTS_N_INSNS (insns);
9755               if (speed_p)
9756                 *cost += insns * extra_cost->alu.arith;
9757               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9758               return true;
9759             }
9760           else if (speed_p)
9761             *cost += extra_cost->alu.arith;
9762
9763           return false;
9764         }
9765
9766       if (GET_MODE_CLASS (mode) == MODE_INT
9767           && GET_MODE_SIZE (mode) < 4)
9768         {
9769           rtx shift_op, shift_reg;
9770           shift_reg = NULL;
9771
9772           /* We check both sides of the MINUS for shifter operands since,
9773              unlike PLUS, it's not commutative.  */
9774
9775           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9776           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9777
9778           /* Slightly disparage, as we might need to widen the result.  */
9779           *cost = 1 + COSTS_N_INSNS (1);
9780           if (speed_p)
9781             *cost += extra_cost->alu.arith;
9782
9783           if (CONST_INT_P (XEXP (x, 0)))
9784             {
9785               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9786               return true;
9787             }
9788
9789           return false;
9790         }
9791
9792       if (mode == DImode)
9793         {
9794           *cost = COSTS_N_INSNS (2);
9795
9796           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9797             {
9798               rtx op1 = XEXP (x, 1);
9799
9800               if (speed_p)
9801                 *cost += 2 * extra_cost->alu.arith;
9802
9803               if (GET_CODE (op1) == ZERO_EXTEND)
9804                 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9805               else
9806                 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9807               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9808                                  0, speed_p);
9809               return true;
9810             }
9811           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9812             {
9813               if (speed_p)
9814                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9815               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9816                                   0, speed_p)
9817                         + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9818               return true;
9819             }
9820           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9821                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9822             {
9823               if (speed_p)
9824                 *cost += (extra_cost->alu.arith
9825                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9826                              ? extra_cost->alu.arith
9827                              : extra_cost->alu.arith_shift));
9828               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9829                         + rtx_cost (XEXP (XEXP (x, 1), 0),
9830                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9831               return true;
9832             }
9833
9834           if (speed_p)
9835             *cost += 2 * extra_cost->alu.arith;
9836           return false;
9837         }
9838
9839       /* Vector mode?  */
9840
9841       *cost = LIBCALL_COST (2);
9842       return false;
9843
9844     case PLUS:
9845       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9846           && (mode == SFmode || !TARGET_VFP_SINGLE))
9847         {
9848           *cost = COSTS_N_INSNS (1);
9849           if (GET_CODE (XEXP (x, 0)) == MULT)
9850             {
9851               rtx mul_op0, mul_op1, add_op;
9852
9853               if (speed_p)
9854                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9855
9856               mul_op0 = XEXP (XEXP (x, 0), 0);
9857               mul_op1 = XEXP (XEXP (x, 0), 1);
9858               add_op = XEXP (x, 1);
9859
9860               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9861                         + rtx_cost (mul_op1, code, 0, speed_p)
9862                         + rtx_cost (add_op, code, 0, speed_p));
9863
9864               return true;
9865             }
9866
9867           if (speed_p)
9868             *cost += extra_cost->fp[mode != SFmode].addsub;
9869           return false;
9870         }
9871       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9872         {
9873           *cost = LIBCALL_COST (2);
9874           return false;
9875         }
9876
9877         /* Narrow modes can be synthesized in SImode, but the range
9878            of useful sub-operations is limited.  Check for shift operations
9879            on one of the operands.  Only left shifts can be used in the
9880            narrow modes.  */
9881       if (GET_MODE_CLASS (mode) == MODE_INT
9882           && GET_MODE_SIZE (mode) < 4)
9883         {
9884           rtx shift_op, shift_reg;
9885           shift_reg = NULL;
9886
9887           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9888
9889           if (CONST_INT_P (XEXP (x, 1)))
9890             {
9891               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9892                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9893                                             NULL_RTX, 1, 0);
9894               *cost = COSTS_N_INSNS (insns);
9895               if (speed_p)
9896                 *cost += insns * extra_cost->alu.arith;
9897               /* Slightly penalize a narrow operation as the result may
9898                  need widening.  */
9899               *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9900               return true;
9901             }
9902
9903           /* Slightly penalize a narrow operation as the result may
9904              need widening.  */
9905           *cost = 1 + COSTS_N_INSNS (1);
9906           if (speed_p)
9907             *cost += extra_cost->alu.arith;
9908
9909           return false;
9910         }
9911
9912       if (mode == SImode)
9913         {
9914           rtx shift_op, shift_reg;
9915
9916           *cost = COSTS_N_INSNS (1);
9917           if (TARGET_INT_SIMD
9918               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9919                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9920             {
9921               /* UXTA[BH] or SXTA[BH].  */
9922               if (speed_p)
9923                 *cost += extra_cost->alu.extend_arith;
9924               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9925                                   speed_p)
9926                         + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9927               return true;
9928             }
9929
9930           shift_reg = NULL;
9931           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9932           if (shift_op != NULL)
9933             {
9934               if (shift_reg)
9935                 {
9936                   if (speed_p)
9937                     *cost += extra_cost->alu.arith_shift_reg;
9938                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9939                 }
9940               else if (speed_p)
9941                 *cost += extra_cost->alu.arith_shift;
9942
9943               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9944                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9945               return true;
9946             }
9947           if (GET_CODE (XEXP (x, 0)) == MULT)
9948             {
9949               rtx mul_op = XEXP (x, 0);
9950
9951               *cost = COSTS_N_INSNS (1);
9952
9953               if (TARGET_DSP_MULTIPLY
9954                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9955                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9956                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9957                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9958                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9959                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9960                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9961                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9962                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9963                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9964                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9965                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9966                                       == 16))))))
9967                 {
9968                   /* SMLA[BT][BT].  */
9969                   if (speed_p)
9970                     *cost += extra_cost->mult[0].extend_add;
9971                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9972                                       SIGN_EXTEND, 0, speed_p)
9973                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9974                                         SIGN_EXTEND, 0, speed_p)
9975                             + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9976                   return true;
9977                 }
9978
9979               if (speed_p)
9980                 *cost += extra_cost->mult[0].add;
9981               *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9982                         + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9983                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9984               return true;
9985             }
9986           if (CONST_INT_P (XEXP (x, 1)))
9987             {
9988               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9989                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9990                                             NULL_RTX, 1, 0);
9991               *cost = COSTS_N_INSNS (insns);
9992               if (speed_p)
9993                 *cost += insns * extra_cost->alu.arith;
9994               *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9995               return true;
9996             }
9997           else if (speed_p)
9998             *cost += extra_cost->alu.arith;
9999
10000           return false;
10001         }
10002
10003       if (mode == DImode)
10004         {
10005           if (arm_arch3m
10006               && GET_CODE (XEXP (x, 0)) == MULT
10007               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10008                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10009                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10010                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10011             {
10012               *cost = COSTS_N_INSNS (1);
10013               if (speed_p)
10014                 *cost += extra_cost->mult[1].extend_add;
10015               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10016                                   ZERO_EXTEND, 0, speed_p)
10017                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10018                                     ZERO_EXTEND, 0, speed_p)
10019                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10020               return true;
10021             }
10022
10023           *cost = COSTS_N_INSNS (2);
10024
10025           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10026               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10027             {
10028               if (speed_p)
10029                 *cost += (extra_cost->alu.arith
10030                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10031                              ? extra_cost->alu.arith
10032                              : extra_cost->alu.arith_shift));
10033
10034               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10035                                   speed_p)
10036                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10037               return true;
10038             }
10039
10040           if (speed_p)
10041             *cost += 2 * extra_cost->alu.arith;
10042           return false;
10043         }
10044
10045       /* Vector mode?  */
10046       *cost = LIBCALL_COST (2);
10047       return false;
10048     case IOR:
10049       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10050         {
10051           *cost = COSTS_N_INSNS (1);
10052           if (speed_p)
10053             *cost += extra_cost->alu.rev;
10054
10055           return true;
10056         }
10057     /* Fall through.  */
10058     case AND: case XOR:
10059       if (mode == SImode)
10060         {
10061           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10062           rtx op0 = XEXP (x, 0);
10063           rtx shift_op, shift_reg;
10064
10065           *cost = COSTS_N_INSNS (1);
10066
10067           if (subcode == NOT
10068               && (code == AND
10069                   || (code == IOR && TARGET_THUMB2)))
10070             op0 = XEXP (op0, 0);
10071
10072           shift_reg = NULL;
10073           shift_op = shifter_op_p (op0, &shift_reg);
10074           if (shift_op != NULL)
10075             {
10076               if (shift_reg)
10077                 {
10078                   if (speed_p)
10079                     *cost += extra_cost->alu.log_shift_reg;
10080                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10081                 }
10082               else if (speed_p)
10083                 *cost += extra_cost->alu.log_shift;
10084
10085               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10086                         + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10087               return true;
10088             }
10089
10090           if (CONST_INT_P (XEXP (x, 1)))
10091             {
10092               int insns = arm_gen_constant (code, SImode, NULL_RTX,
10093                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10094                                             NULL_RTX, 1, 0);
10095
10096               *cost = COSTS_N_INSNS (insns);
10097               if (speed_p)
10098                 *cost += insns * extra_cost->alu.logical;
10099               *cost += rtx_cost (op0, code, 0, speed_p);
10100               return true;
10101             }
10102
10103           if (speed_p)
10104             *cost += extra_cost->alu.logical;
10105           *cost += (rtx_cost (op0, code, 0, speed_p)
10106                     + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10107           return true;
10108         }
10109
10110       if (mode == DImode)
10111         {
10112           rtx op0 = XEXP (x, 0);
10113           enum rtx_code subcode = GET_CODE (op0);
10114
10115           *cost = COSTS_N_INSNS (2);
10116
10117           if (subcode == NOT
10118               && (code == AND
10119                   || (code == IOR && TARGET_THUMB2)))
10120             op0 = XEXP (op0, 0);
10121
10122           if (GET_CODE (op0) == ZERO_EXTEND)
10123             {
10124               if (speed_p)
10125                 *cost += 2 * extra_cost->alu.logical;
10126
10127               *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10128                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10129               return true;
10130             }
10131           else if (GET_CODE (op0) == SIGN_EXTEND)
10132             {
10133               if (speed_p)
10134                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10135
10136               *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10137                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10138               return true;
10139             }
10140
10141           if (speed_p)
10142             *cost += 2 * extra_cost->alu.logical;
10143
10144           return true;
10145         }
10146       /* Vector mode?  */
10147
10148       *cost = LIBCALL_COST (2);
10149       return false;
10150
10151     case MULT:
10152       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10153           && (mode == SFmode || !TARGET_VFP_SINGLE))
10154         {
10155           rtx op0 = XEXP (x, 0);
10156
10157           *cost = COSTS_N_INSNS (1);
10158
10159           if (GET_CODE (op0) == NEG)
10160             op0 = XEXP (op0, 0);
10161
10162           if (speed_p)
10163             *cost += extra_cost->fp[mode != SFmode].mult;
10164
10165           *cost += (rtx_cost (op0, MULT, 0, speed_p)
10166                     + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10167           return true;
10168         }
10169       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10170         {
10171           *cost = LIBCALL_COST (2);
10172           return false;
10173         }
10174
10175       if (mode == SImode)
10176         {
10177           *cost = COSTS_N_INSNS (1);
10178           if (TARGET_DSP_MULTIPLY
10179               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10180                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10181                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10182                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10183                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10184                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10185                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10186                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10187                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10188                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10189                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10190                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10191                                   == 16))))))
10192             {
10193               /* SMUL[TB][TB].  */
10194               if (speed_p)
10195                 *cost += extra_cost->mult[0].extend;
10196               *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10197                         + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10198               return true;
10199             }
10200           if (speed_p)
10201             *cost += extra_cost->mult[0].simple;
10202           return false;
10203         }
10204
10205       if (mode == DImode)
10206         {
10207           if (arm_arch3m
10208               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10209                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10210                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10211                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10212             {
10213               *cost = COSTS_N_INSNS (1);
10214               if (speed_p)
10215                 *cost += extra_cost->mult[1].extend;
10216               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10217                                   ZERO_EXTEND, 0, speed_p)
10218                         + rtx_cost (XEXP (XEXP (x, 1), 0),
10219                                     ZERO_EXTEND, 0, speed_p));
10220               return true;
10221             }
10222
10223           *cost = LIBCALL_COST (2);
10224           return false;
10225         }
10226
10227       /* Vector mode?  */
10228       *cost = LIBCALL_COST (2);
10229       return false;
10230
10231     case NEG:
10232       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10233           && (mode == SFmode || !TARGET_VFP_SINGLE))
10234         {
10235           *cost = COSTS_N_INSNS (1);
10236           if (speed_p)
10237             *cost += extra_cost->fp[mode != SFmode].neg;
10238
10239           return false;
10240         }
10241       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10242         {
10243           *cost = LIBCALL_COST (1);
10244           return false;
10245         }
10246
10247       if (mode == SImode)
10248         {
10249           if (GET_CODE (XEXP (x, 0)) == ABS)
10250             {
10251               *cost = COSTS_N_INSNS (2);
10252               /* Assume the non-flag-changing variant.  */
10253               if (speed_p)
10254                 *cost += (extra_cost->alu.log_shift
10255                           + extra_cost->alu.arith_shift);
10256               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10257               return true;
10258             }
10259
10260           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10261               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10262             {
10263               *cost = COSTS_N_INSNS (2);
10264               /* No extra cost for MOV imm and MVN imm.  */
10265               /* If the comparison op is using the flags, there's no further
10266                  cost, otherwise we need to add the cost of the comparison.  */
10267               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10268                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10269                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10270                 {
10271                   *cost += (COSTS_N_INSNS (1)
10272                             + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10273                                         speed_p)
10274                             + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10275                                         speed_p));
10276                   if (speed_p)
10277                     *cost += extra_cost->alu.arith;
10278                 }
10279               return true;
10280             }
10281           *cost = COSTS_N_INSNS (1);
10282           if (speed_p)
10283             *cost += extra_cost->alu.arith;
10284           return false;
10285         }
10286
10287       if (GET_MODE_CLASS (mode) == MODE_INT
10288           && GET_MODE_SIZE (mode) < 4)
10289         {
10290           /* Slightly disparage, as we might need an extend operation.  */
10291           *cost = 1 + COSTS_N_INSNS (1);
10292           if (speed_p)
10293             *cost += extra_cost->alu.arith;
10294           return false;
10295         }
10296
10297       if (mode == DImode)
10298         {
10299           *cost = COSTS_N_INSNS (2);
10300           if (speed_p)
10301             *cost += 2 * extra_cost->alu.arith;
10302           return false;
10303         }
10304
10305       /* Vector mode?  */
10306       *cost = LIBCALL_COST (1);
10307       return false;
10308
10309     case NOT:
10310       if (mode == SImode)
10311         {
10312           rtx shift_op;
10313           rtx shift_reg = NULL;
10314
10315           *cost = COSTS_N_INSNS (1);
10316           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10317
10318           if (shift_op)
10319             {
10320               if (shift_reg != NULL)
10321                 {
10322                   if (speed_p)
10323                     *cost += extra_cost->alu.log_shift_reg;
10324                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10325                 }
10326               else if (speed_p)
10327                 *cost += extra_cost->alu.log_shift;
10328               *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10329               return true;
10330             }
10331
10332           if (speed_p)
10333             *cost += extra_cost->alu.logical;
10334           return false;
10335         }
10336       if (mode == DImode)
10337         {
10338           *cost = COSTS_N_INSNS (2);
10339           return false;
10340         }
10341
10342       /* Vector mode?  */
10343
10344       *cost += LIBCALL_COST (1);
10345       return false;
10346
10347     case IF_THEN_ELSE:
10348       {
10349         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10350           {
10351             *cost = COSTS_N_INSNS (4);
10352             return true;
10353           }
10354         int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10355         int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10356
10357         *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10358         /* Assume that if one arm of the if_then_else is a register,
10359            that it will be tied with the result and eliminate the
10360            conditional insn.  */
10361         if (REG_P (XEXP (x, 1)))
10362           *cost += op2cost;
10363         else if (REG_P (XEXP (x, 2)))
10364           *cost += op1cost;
10365         else
10366           {
10367             if (speed_p)
10368               {
10369                 if (extra_cost->alu.non_exec_costs_exec)
10370                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10371                 else
10372                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10373               }
10374             else
10375               *cost += op1cost + op2cost;
10376           }
10377       }
10378       return true;
10379
10380     case COMPARE:
10381       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10382         *cost = 0;
10383       else
10384         {
10385           machine_mode op0mode;
10386           /* We'll mostly assume that the cost of a compare is the cost of the
10387              LHS.  However, there are some notable exceptions.  */
10388
10389           /* Floating point compares are never done as side-effects.  */
10390           op0mode = GET_MODE (XEXP (x, 0));
10391           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10392               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10393             {
10394               *cost = COSTS_N_INSNS (1);
10395               if (speed_p)
10396                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10397
10398               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10399                 {
10400                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10401                   return true;
10402                 }
10403
10404               return false;
10405             }
10406           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10407             {
10408               *cost = LIBCALL_COST (2);
10409               return false;
10410             }
10411
10412           /* DImode compares normally take two insns.  */
10413           if (op0mode == DImode)
10414             {
10415               *cost = COSTS_N_INSNS (2);
10416               if (speed_p)
10417                 *cost += 2 * extra_cost->alu.arith;
10418               return false;
10419             }
10420
10421           if (op0mode == SImode)
10422             {
10423               rtx shift_op;
10424               rtx shift_reg;
10425
10426               if (XEXP (x, 1) == const0_rtx
10427                   && !(REG_P (XEXP (x, 0))
10428                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10429                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10430                 {
10431                   *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10432
10433                   /* Multiply operations that set the flags are often
10434                      significantly more expensive.  */
10435                   if (speed_p
10436                       && GET_CODE (XEXP (x, 0)) == MULT
10437                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10438                     *cost += extra_cost->mult[0].flag_setting;
10439
10440                   if (speed_p
10441                       && GET_CODE (XEXP (x, 0)) == PLUS
10442                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10443                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10444                                                             0), 1), mode))
10445                     *cost += extra_cost->mult[0].flag_setting;
10446                   return true;
10447                 }
10448
10449               shift_reg = NULL;
10450               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10451               if (shift_op != NULL)
10452                 {
10453                   *cost = COSTS_N_INSNS (1);
10454                   if (shift_reg != NULL)
10455                     {
10456                       *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10457                       if (speed_p)
10458                         *cost += extra_cost->alu.arith_shift_reg;
10459                     }
10460                   else if (speed_p)
10461                     *cost += extra_cost->alu.arith_shift;
10462                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10463                             + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10464                   return true;
10465                 }
10466
10467               *cost = COSTS_N_INSNS (1);
10468               if (speed_p)
10469                 *cost += extra_cost->alu.arith;
10470               if (CONST_INT_P (XEXP (x, 1))
10471                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10472                 {
10473                   *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10474                   return true;
10475                 }
10476               return false;
10477             }
10478
10479           /* Vector mode?  */
10480
10481           *cost = LIBCALL_COST (2);
10482           return false;
10483         }
10484       return true;
10485
10486     case EQ:
10487     case NE:
10488     case LT:
10489     case LE:
10490     case GT:
10491     case GE:
10492     case LTU:
10493     case LEU:
10494     case GEU:
10495     case GTU:
10496     case ORDERED:
10497     case UNORDERED:
10498     case UNEQ:
10499     case UNLE:
10500     case UNLT:
10501     case UNGE:
10502     case UNGT:
10503     case LTGT:
10504       if (outer_code == SET)
10505         {
10506           /* Is it a store-flag operation?  */
10507           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10508               && XEXP (x, 1) == const0_rtx)
10509             {
10510               /* Thumb also needs an IT insn.  */
10511               *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10512               return true;
10513             }
10514           if (XEXP (x, 1) == const0_rtx)
10515             {
10516               switch (code)
10517                 {
10518                 case LT:
10519                   /* LSR Rd, Rn, #31.  */
10520                   *cost = COSTS_N_INSNS (1);
10521                   if (speed_p)
10522                     *cost += extra_cost->alu.shift;
10523                   break;
10524
10525                 case EQ:
10526                   /* RSBS T1, Rn, #0
10527                      ADC  Rd, Rn, T1.  */
10528
10529                 case NE:
10530                   /* SUBS T1, Rn, #1
10531                      SBC  Rd, Rn, T1.  */
10532                   *cost = COSTS_N_INSNS (2);
10533                   break;
10534
10535                 case LE:
10536                   /* RSBS T1, Rn, Rn, LSR #31
10537                      ADC  Rd, Rn, T1. */
10538                   *cost = COSTS_N_INSNS (2);
10539                   if (speed_p)
10540                     *cost += extra_cost->alu.arith_shift;
10541                   break;
10542
10543                 case GT:
10544                   /* RSB  Rd, Rn, Rn, ASR #1
10545                      LSR  Rd, Rd, #31.  */
10546                   *cost = COSTS_N_INSNS (2);
10547                   if (speed_p)
10548                     *cost += (extra_cost->alu.arith_shift
10549                               + extra_cost->alu.shift);
10550                   break;
10551
10552                 case GE:
10553                   /* ASR  Rd, Rn, #31
10554                      ADD  Rd, Rn, #1.  */
10555                   *cost = COSTS_N_INSNS (2);
10556                   if (speed_p)
10557                     *cost += extra_cost->alu.shift;
10558                   break;
10559
10560                 default:
10561                   /* Remaining cases are either meaningless or would take
10562                      three insns anyway.  */
10563                   *cost = COSTS_N_INSNS (3);
10564                   break;
10565                 }
10566               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10567               return true;
10568             }
10569           else
10570             {
10571               *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10572               if (CONST_INT_P (XEXP (x, 1))
10573                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10574                 {
10575                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10576                   return true;
10577                 }
10578
10579               return false;
10580             }
10581         }
10582       /* Not directly inside a set.  If it involves the condition code
10583          register it must be the condition for a branch, cond_exec or
10584          I_T_E operation.  Since the comparison is performed elsewhere
10585          this is just the control part which has no additional
10586          cost.  */
10587       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10588                && XEXP (x, 1) == const0_rtx)
10589         {
10590           *cost = 0;
10591           return true;
10592         }
10593       return false;
10594
10595     case ABS:
10596       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10597           && (mode == SFmode || !TARGET_VFP_SINGLE))
10598         {
10599           *cost = COSTS_N_INSNS (1);
10600           if (speed_p)
10601             *cost += extra_cost->fp[mode != SFmode].neg;
10602
10603           return false;
10604         }
10605       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10606         {
10607           *cost = LIBCALL_COST (1);
10608           return false;
10609         }
10610
10611       if (mode == SImode)
10612         {
10613           *cost = COSTS_N_INSNS (1);
10614           if (speed_p)
10615             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10616           return false;
10617         }
10618       /* Vector mode?  */
10619       *cost = LIBCALL_COST (1);
10620       return false;
10621
10622     case SIGN_EXTEND:
10623       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10624           && MEM_P (XEXP (x, 0)))
10625         {
10626           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10627
10628           if (mode == DImode)
10629             *cost += COSTS_N_INSNS (1);
10630
10631           if (!speed_p)
10632             return true;
10633
10634           if (GET_MODE (XEXP (x, 0)) == SImode)
10635             *cost += extra_cost->ldst.load;
10636           else
10637             *cost += extra_cost->ldst.load_sign_extend;
10638
10639           if (mode == DImode)
10640             *cost += extra_cost->alu.shift;
10641
10642           return true;
10643         }
10644
10645       /* Widening from less than 32-bits requires an extend operation.  */
10646       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10647         {
10648           /* We have SXTB/SXTH.  */
10649           *cost = COSTS_N_INSNS (1);
10650           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10651           if (speed_p)
10652             *cost += extra_cost->alu.extend;
10653         }
10654       else if (GET_MODE (XEXP (x, 0)) != SImode)
10655         {
10656           /* Needs two shifts.  */
10657           *cost = COSTS_N_INSNS (2);
10658           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10659           if (speed_p)
10660             *cost += 2 * extra_cost->alu.shift;
10661         }
10662
10663       /* Widening beyond 32-bits requires one more insn.  */
10664       if (mode == DImode)
10665         {
10666           *cost += COSTS_N_INSNS (1);
10667           if (speed_p)
10668             *cost += extra_cost->alu.shift;
10669         }
10670
10671       return true;
10672
10673     case ZERO_EXTEND:
10674       if ((arm_arch4
10675            || GET_MODE (XEXP (x, 0)) == SImode
10676            || GET_MODE (XEXP (x, 0)) == QImode)
10677           && MEM_P (XEXP (x, 0)))
10678         {
10679           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10680
10681           if (mode == DImode)
10682             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10683
10684           return true;
10685         }
10686
10687       /* Widening from less than 32-bits requires an extend operation.  */
10688       if (GET_MODE (XEXP (x, 0)) == QImode)
10689         {
10690           /* UXTB can be a shorter instruction in Thumb2, but it might
10691              be slower than the AND Rd, Rn, #255 alternative.  When
10692              optimizing for speed it should never be slower to use
10693              AND, and we don't really model 16-bit vs 32-bit insns
10694              here.  */
10695           *cost = COSTS_N_INSNS (1);
10696           if (speed_p)
10697             *cost += extra_cost->alu.logical;
10698         }
10699       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10700         {
10701           /* We have UXTB/UXTH.  */
10702           *cost = COSTS_N_INSNS (1);
10703           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10704           if (speed_p)
10705             *cost += extra_cost->alu.extend;
10706         }
10707       else if (GET_MODE (XEXP (x, 0)) != SImode)
10708         {
10709           /* Needs two shifts.  It's marginally preferable to use
10710              shifts rather than two BIC instructions as the second
10711              shift may merge with a subsequent insn as a shifter
10712              op.  */
10713           *cost = COSTS_N_INSNS (2);
10714           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10715           if (speed_p)
10716             *cost += 2 * extra_cost->alu.shift;
10717         }
10718       else  /* GET_MODE (XEXP (x, 0)) == SImode.  */
10719         *cost = COSTS_N_INSNS (1);
10720
10721       /* Widening beyond 32-bits requires one more insn.  */
10722       if (mode == DImode)
10723         {
10724           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10725         }
10726
10727       return true;
10728
10729     case CONST_INT:
10730       *cost = 0;
10731       /* CONST_INT has no mode, so we cannot tell for sure how many
10732          insns are really going to be needed.  The best we can do is
10733          look at the value passed.  If it fits in SImode, then assume
10734          that's the mode it will be used for.  Otherwise assume it
10735          will be used in DImode.  */
10736       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10737         mode = SImode;
10738       else
10739         mode = DImode;
10740
10741       /* Avoid blowing up in arm_gen_constant ().  */
10742       if (!(outer_code == PLUS
10743             || outer_code == AND
10744             || outer_code == IOR
10745             || outer_code == XOR
10746             || outer_code == MINUS))
10747         outer_code = SET;
10748
10749     const_int_cost:
10750       if (mode == SImode)
10751         {
10752           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10753                                                     INTVAL (x), NULL, NULL,
10754                                                     0, 0));
10755           /* Extra costs?  */
10756         }
10757       else
10758         {
10759           *cost += COSTS_N_INSNS (arm_gen_constant
10760                                   (outer_code, SImode, NULL,
10761                                    trunc_int_for_mode (INTVAL (x), SImode),
10762                                    NULL, NULL, 0, 0)
10763                                   + arm_gen_constant (outer_code, SImode, NULL,
10764                                                       INTVAL (x) >> 32, NULL,
10765                                                       NULL, 0, 0));
10766           /* Extra costs?  */
10767         }
10768
10769       return true;
10770
10771     case CONST:
10772     case LABEL_REF:
10773     case SYMBOL_REF:
10774       if (speed_p)
10775         {
10776           if (arm_arch_thumb2 && !flag_pic)
10777             *cost = COSTS_N_INSNS (2);
10778           else
10779             *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10780         }
10781       else
10782         *cost = COSTS_N_INSNS (2);
10783
10784       if (flag_pic)
10785         {
10786           *cost += COSTS_N_INSNS (1);
10787           if (speed_p)
10788             *cost += extra_cost->alu.arith;
10789         }
10790
10791       return true;
10792
10793     case CONST_FIXED:
10794       *cost = COSTS_N_INSNS (4);
10795       /* Fixme.  */
10796       return true;
10797
10798     case CONST_DOUBLE:
10799       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10800           && (mode == SFmode || !TARGET_VFP_SINGLE))
10801         {
10802           if (vfp3_const_double_rtx (x))
10803             {
10804               *cost = COSTS_N_INSNS (1);
10805               if (speed_p)
10806                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10807               return true;
10808             }
10809
10810           if (speed_p)
10811             {
10812               *cost = COSTS_N_INSNS (1);
10813               if (mode == DFmode)
10814                 *cost += extra_cost->ldst.loadd;
10815               else
10816                 *cost += extra_cost->ldst.loadf;
10817             }
10818           else
10819             *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10820
10821           return true;
10822         }
10823       *cost = COSTS_N_INSNS (4);
10824       return true;
10825
10826     case CONST_VECTOR:
10827       /* Fixme.  */
10828       if (TARGET_NEON
10829           && TARGET_HARD_FLOAT
10830           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10831           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10832         *cost = COSTS_N_INSNS (1);
10833       else
10834         *cost = COSTS_N_INSNS (4);
10835       return true;
10836
10837     case HIGH:
10838     case LO_SUM:
10839       *cost = COSTS_N_INSNS (1);
10840       /* When optimizing for size, we prefer constant pool entries to
10841          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10842       if (!speed_p)
10843         *cost += 1;
10844       return true;
10845
10846     case CLZ:
10847       *cost = COSTS_N_INSNS (1);
10848       if (speed_p)
10849         *cost += extra_cost->alu.clz;
10850       return false;
10851
10852     case SMIN:
10853       if (XEXP (x, 1) == const0_rtx)
10854         {
10855           *cost = COSTS_N_INSNS (1);
10856           if (speed_p)
10857             *cost += extra_cost->alu.log_shift;
10858           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10859           return true;
10860         }
10861       /* Fall through.  */
10862     case SMAX:
10863     case UMIN:
10864     case UMAX:
10865       *cost = COSTS_N_INSNS (2);
10866       return false;
10867
10868     case TRUNCATE:
10869       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10870           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10871           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10872           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10873           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10874                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10875               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10876                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10877                       == ZERO_EXTEND))))
10878         {
10879           *cost = COSTS_N_INSNS (1);
10880           if (speed_p)
10881             *cost += extra_cost->mult[1].extend;
10882           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10883                               speed_p)
10884                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10885                                 0, speed_p));
10886           return true;
10887         }
10888       *cost = LIBCALL_COST (1);
10889       return false;
10890
10891     case UNSPEC:
10892       return arm_unspec_cost (x, outer_code, speed_p, cost);
10893
10894     case PC:
10895       /* Reading the PC is like reading any other register.  Writing it
10896          is more expensive, but we take that into account elsewhere.  */
10897       *cost = 0;
10898       return true;
10899
10900     case ZERO_EXTRACT:
10901       /* TODO: Simple zero_extract of bottom bits using AND.  */
10902       /* Fall through.  */
10903     case SIGN_EXTRACT:
10904       if (arm_arch6
10905           && mode == SImode
10906           && CONST_INT_P (XEXP (x, 1))
10907           && CONST_INT_P (XEXP (x, 2)))
10908         {
10909           *cost = COSTS_N_INSNS (1);
10910           if (speed_p)
10911             *cost += extra_cost->alu.bfx;
10912           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10913           return true;
10914         }
10915       /* Without UBFX/SBFX, need to resort to shift operations.  */
10916       *cost = COSTS_N_INSNS (2);
10917       if (speed_p)
10918         *cost += 2 * extra_cost->alu.shift;
10919       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10920       return true;
10921
10922     case FLOAT_EXTEND:
10923       if (TARGET_HARD_FLOAT)
10924         {
10925           *cost = COSTS_N_INSNS (1);
10926           if (speed_p)
10927             *cost += extra_cost->fp[mode == DFmode].widen;
10928           if (!TARGET_FPU_ARMV8
10929               && GET_MODE (XEXP (x, 0)) == HFmode)
10930             {
10931               /* Pre v8, widening HF->DF is a two-step process, first
10932                  widening to SFmode.  */
10933               *cost += COSTS_N_INSNS (1);
10934               if (speed_p)
10935                 *cost += extra_cost->fp[0].widen;
10936             }
10937           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10938           return true;
10939         }
10940
10941       *cost = LIBCALL_COST (1);
10942       return false;
10943
10944     case FLOAT_TRUNCATE:
10945       if (TARGET_HARD_FLOAT)
10946         {
10947           *cost = COSTS_N_INSNS (1);
10948           if (speed_p)
10949             *cost += extra_cost->fp[mode == DFmode].narrow;
10950           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10951           return true;
10952           /* Vector modes?  */
10953         }
10954       *cost = LIBCALL_COST (1);
10955       return false;
10956
10957     case FMA:
10958       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10959         {
10960           rtx op0 = XEXP (x, 0);
10961           rtx op1 = XEXP (x, 1);
10962           rtx op2 = XEXP (x, 2);
10963
10964           *cost = COSTS_N_INSNS (1);
10965
10966           /* vfms or vfnma.  */
10967           if (GET_CODE (op0) == NEG)
10968             op0 = XEXP (op0, 0);
10969
10970           /* vfnms or vfnma.  */
10971           if (GET_CODE (op2) == NEG)
10972             op2 = XEXP (op2, 0);
10973
10974           *cost += rtx_cost (op0, FMA, 0, speed_p);
10975           *cost += rtx_cost (op1, FMA, 1, speed_p);
10976           *cost += rtx_cost (op2, FMA, 2, speed_p);
10977
10978           if (speed_p)
10979             *cost += extra_cost->fp[mode ==DFmode].fma;
10980
10981           return true;
10982         }
10983
10984       *cost = LIBCALL_COST (3);
10985       return false;
10986
10987     case FIX:
10988     case UNSIGNED_FIX:
10989       if (TARGET_HARD_FLOAT)
10990         {
10991           if (GET_MODE_CLASS (mode) == MODE_INT)
10992             {
10993               *cost = COSTS_N_INSNS (1);
10994               if (speed_p)
10995                 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10996               /* Strip of the 'cost' of rounding towards zero.  */
10997               if (GET_CODE (XEXP (x, 0)) == FIX)
10998                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10999               else
11000                 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11001               /* ??? Increase the cost to deal with transferring from
11002                  FP -> CORE registers?  */
11003               return true;
11004             }
11005           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11006                    && TARGET_FPU_ARMV8)
11007             {
11008               *cost = COSTS_N_INSNS (1);
11009               if (speed_p)
11010                 *cost += extra_cost->fp[mode == DFmode].roundint;
11011               return false;
11012             }
11013           /* Vector costs? */
11014         }
11015       *cost = LIBCALL_COST (1);
11016       return false;
11017
11018     case FLOAT:
11019     case UNSIGNED_FLOAT:
11020       if (TARGET_HARD_FLOAT)
11021         {
11022           /* ??? Increase the cost to deal with transferring from CORE
11023              -> FP registers?  */
11024           *cost = COSTS_N_INSNS (1);
11025           if (speed_p)
11026             *cost += extra_cost->fp[mode == DFmode].fromint;
11027           return false;
11028         }
11029       *cost = LIBCALL_COST (1);
11030       return false;
11031
11032     case CALL:
11033       *cost = COSTS_N_INSNS (1);
11034       return true;
11035
11036     case ASM_OPERANDS:
11037       {
11038       /* Just a guess.  Guess number of instructions in the asm
11039          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11040          though (see PR60663).  */
11041         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11042         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11043
11044         *cost = COSTS_N_INSNS (asm_length + num_operands);
11045         return true;
11046       }
11047     default:
11048       if (mode != VOIDmode)
11049         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11050       else
11051         *cost = COSTS_N_INSNS (4); /* Who knows?  */
11052       return false;
11053     }
11054 }
11055
11056 #undef HANDLE_NARROW_SHIFT_ARITH
11057
11058 /* RTX costs when optimizing for size.  */
11059 static bool
11060 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11061                int *total, bool speed)
11062 {
11063   bool result;
11064
11065   if (TARGET_OLD_RTX_COSTS
11066       || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11067     {
11068       /* Old way.  (Deprecated.)  */
11069       if (!speed)
11070         result = arm_size_rtx_costs (x, (enum rtx_code) code,
11071                                      (enum rtx_code) outer_code, total);
11072       else
11073         result = current_tune->rtx_costs (x,  (enum rtx_code) code,
11074                                           (enum rtx_code) outer_code, total,
11075                                           speed);
11076     }
11077   else
11078     {
11079     /* New way.  */
11080       if (current_tune->insn_extra_cost)
11081         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11082                                      (enum rtx_code) outer_code,
11083                                      current_tune->insn_extra_cost,
11084                                      total, speed);
11085     /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11086        && current_tune->insn_extra_cost != NULL  */
11087       else
11088         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11089                                     (enum rtx_code) outer_code,
11090                                     &generic_extra_costs, total, speed);
11091     }
11092
11093   if (dump_file && (dump_flags & TDF_DETAILS))
11094     {
11095       print_rtl_single (dump_file, x);
11096       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11097                *total, result ? "final" : "partial");
11098     }
11099   return result;
11100 }
11101
11102 /* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
11103    supported on any "slowmul" cores, so it can be ignored.  */
11104
11105 static bool
11106 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11107                        int *total, bool speed)
11108 {
11109   machine_mode mode = GET_MODE (x);
11110
11111   if (TARGET_THUMB)
11112     {
11113       *total = thumb1_rtx_costs (x, code, outer_code);
11114       return true;
11115     }
11116
11117   switch (code)
11118     {
11119     case MULT:
11120       if (GET_MODE_CLASS (mode) == MODE_FLOAT
11121           || mode == DImode)
11122         {
11123           *total = COSTS_N_INSNS (20);
11124           return false;
11125         }
11126
11127       if (CONST_INT_P (XEXP (x, 1)))
11128         {
11129           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11130                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11131           int cost, const_ok = const_ok_for_arm (i);
11132           int j, booth_unit_size;
11133
11134           /* Tune as appropriate.  */
11135           cost = const_ok ? 4 : 8;
11136           booth_unit_size = 2;
11137           for (j = 0; i && j < 32; j += booth_unit_size)
11138             {
11139               i >>= booth_unit_size;
11140               cost++;
11141             }
11142
11143           *total = COSTS_N_INSNS (cost);
11144           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11145           return true;
11146         }
11147
11148       *total = COSTS_N_INSNS (20);
11149       return false;
11150
11151     default:
11152       return arm_rtx_costs_1 (x, outer_code, total, speed);;
11153     }
11154 }
11155
11156
11157 /* RTX cost for cores with a fast multiply unit (M variants).  */
11158
11159 static bool
11160 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11161                        int *total, bool speed)
11162 {
11163   machine_mode mode = GET_MODE (x);
11164
11165   if (TARGET_THUMB1)
11166     {
11167       *total = thumb1_rtx_costs (x, code, outer_code);
11168       return true;
11169     }
11170
11171   /* ??? should thumb2 use different costs?  */
11172   switch (code)
11173     {
11174     case MULT:
11175       /* There is no point basing this on the tuning, since it is always the
11176          fast variant if it exists at all.  */
11177       if (mode == DImode
11178           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11179           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11180               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11181         {
11182           *total = COSTS_N_INSNS(2);
11183           return false;
11184         }
11185
11186
11187       if (mode == DImode)
11188         {
11189           *total = COSTS_N_INSNS (5);
11190           return false;
11191         }
11192
11193       if (CONST_INT_P (XEXP (x, 1)))
11194         {
11195           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11196                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11197           int cost, const_ok = const_ok_for_arm (i);
11198           int j, booth_unit_size;
11199
11200           /* Tune as appropriate.  */
11201           cost = const_ok ? 4 : 8;
11202           booth_unit_size = 8;
11203           for (j = 0; i && j < 32; j += booth_unit_size)
11204             {
11205               i >>= booth_unit_size;
11206               cost++;
11207             }
11208
11209           *total = COSTS_N_INSNS(cost);
11210           return false;
11211         }
11212
11213       if (mode == SImode)
11214         {
11215           *total = COSTS_N_INSNS (4);
11216           return false;
11217         }
11218
11219       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11220         {
11221           if (TARGET_HARD_FLOAT
11222               && (mode == SFmode
11223                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11224             {
11225               *total = COSTS_N_INSNS (1);
11226               return false;
11227             }
11228         }
11229
11230       /* Requires a lib call */
11231       *total = COSTS_N_INSNS (20);
11232       return false;
11233
11234     default:
11235       return arm_rtx_costs_1 (x, outer_code, total, speed);
11236     }
11237 }
11238
11239
11240 /* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
11241    so it can be ignored.  */
11242
11243 static bool
11244 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11245                       int *total, bool speed)
11246 {
11247   machine_mode mode = GET_MODE (x);
11248
11249   if (TARGET_THUMB)
11250     {
11251       *total = thumb1_rtx_costs (x, code, outer_code);
11252       return true;
11253     }
11254
11255   switch (code)
11256     {
11257     case COMPARE:
11258       if (GET_CODE (XEXP (x, 0)) != MULT)
11259         return arm_rtx_costs_1 (x, outer_code, total, speed);
11260
11261       /* A COMPARE of a MULT is slow on XScale; the muls instruction
11262          will stall until the multiplication is complete.  */
11263       *total = COSTS_N_INSNS (3);
11264       return false;
11265
11266     case MULT:
11267       /* There is no point basing this on the tuning, since it is always the
11268          fast variant if it exists at all.  */
11269       if (mode == DImode
11270           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11271           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11272               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11273         {
11274           *total = COSTS_N_INSNS (2);
11275           return false;
11276         }
11277
11278
11279       if (mode == DImode)
11280         {
11281           *total = COSTS_N_INSNS (5);
11282           return false;
11283         }
11284
11285       if (CONST_INT_P (XEXP (x, 1)))
11286         {
11287           /* If operand 1 is a constant we can more accurately
11288              calculate the cost of the multiply.  The multiplier can
11289              retire 15 bits on the first cycle and a further 12 on the
11290              second.  We do, of course, have to load the constant into
11291              a register first.  */
11292           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11293           /* There's a general overhead of one cycle.  */
11294           int cost = 1;
11295           unsigned HOST_WIDE_INT masked_const;
11296
11297           if (i & 0x80000000)
11298             i = ~i;
11299
11300           i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11301
11302           masked_const = i & 0xffff8000;
11303           if (masked_const != 0)
11304             {
11305               cost++;
11306               masked_const = i & 0xf8000000;
11307               if (masked_const != 0)
11308                 cost++;
11309             }
11310           *total = COSTS_N_INSNS (cost);
11311           return false;
11312         }
11313
11314       if (mode == SImode)
11315         {
11316           *total = COSTS_N_INSNS (3);
11317           return false;
11318         }
11319
11320       /* Requires a lib call */
11321       *total = COSTS_N_INSNS (20);
11322       return false;
11323
11324     default:
11325       return arm_rtx_costs_1 (x, outer_code, total, speed);
11326     }
11327 }
11328
11329
11330 /* RTX costs for 9e (and later) cores.  */
11331
11332 static bool
11333 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11334                   int *total, bool speed)
11335 {
11336   machine_mode mode = GET_MODE (x);
11337
11338   if (TARGET_THUMB1)
11339     {
11340       switch (code)
11341         {
11342         case MULT:
11343           /* Small multiply: 32 cycles for an integer multiply inst.  */
11344           if (arm_arch6m && arm_m_profile_small_mul)
11345             *total = COSTS_N_INSNS (32);
11346           else
11347             *total = COSTS_N_INSNS (3);
11348           return true;
11349
11350         default:
11351           *total = thumb1_rtx_costs (x, code, outer_code);
11352           return true;
11353         }
11354     }
11355
11356   switch (code)
11357     {
11358     case MULT:
11359       /* There is no point basing this on the tuning, since it is always the
11360          fast variant if it exists at all.  */
11361       if (mode == DImode
11362           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11363           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11364               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11365         {
11366           *total = COSTS_N_INSNS (2);
11367           return false;
11368         }
11369
11370
11371       if (mode == DImode)
11372         {
11373           *total = COSTS_N_INSNS (5);
11374           return false;
11375         }
11376
11377       if (mode == SImode)
11378         {
11379           *total = COSTS_N_INSNS (2);
11380           return false;
11381         }
11382
11383       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11384         {
11385           if (TARGET_HARD_FLOAT
11386               && (mode == SFmode
11387                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11388             {
11389               *total = COSTS_N_INSNS (1);
11390               return false;
11391             }
11392         }
11393
11394       *total = COSTS_N_INSNS (20);
11395       return false;
11396
11397     default:
11398       return arm_rtx_costs_1 (x, outer_code, total, speed);
11399     }
11400 }
11401 /* All address computations that can be done are free, but rtx cost returns
11402    the same for practically all of them.  So we weight the different types
11403    of address here in the order (most pref first):
11404    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11405 static inline int
11406 arm_arm_address_cost (rtx x)
11407 {
11408   enum rtx_code c  = GET_CODE (x);
11409
11410   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11411     return 0;
11412   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11413     return 10;
11414
11415   if (c == PLUS)
11416     {
11417       if (CONST_INT_P (XEXP (x, 1)))
11418         return 2;
11419
11420       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11421         return 3;
11422
11423       return 4;
11424     }
11425
11426   return 6;
11427 }
11428
11429 static inline int
11430 arm_thumb_address_cost (rtx x)
11431 {
11432   enum rtx_code c  = GET_CODE (x);
11433
11434   if (c == REG)
11435     return 1;
11436   if (c == PLUS
11437       && REG_P (XEXP (x, 0))
11438       && CONST_INT_P (XEXP (x, 1)))
11439     return 1;
11440
11441   return 2;
11442 }
11443
11444 static int
11445 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11446                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11447 {
11448   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11449 }
11450
11451 /* Adjust cost hook for XScale.  */
11452 static bool
11453 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11454 {
11455   /* Some true dependencies can have a higher cost depending
11456      on precisely how certain input operands are used.  */
11457   if (REG_NOTE_KIND(link) == 0
11458       && recog_memoized (insn) >= 0
11459       && recog_memoized (dep) >= 0)
11460     {
11461       int shift_opnum = get_attr_shift (insn);
11462       enum attr_type attr_type = get_attr_type (dep);
11463
11464       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11465          operand for INSN.  If we have a shifted input operand and the
11466          instruction we depend on is another ALU instruction, then we may
11467          have to account for an additional stall.  */
11468       if (shift_opnum != 0
11469           && (attr_type == TYPE_ALU_SHIFT_IMM
11470               || attr_type == TYPE_ALUS_SHIFT_IMM
11471               || attr_type == TYPE_LOGIC_SHIFT_IMM
11472               || attr_type == TYPE_LOGICS_SHIFT_IMM
11473               || attr_type == TYPE_ALU_SHIFT_REG
11474               || attr_type == TYPE_ALUS_SHIFT_REG
11475               || attr_type == TYPE_LOGIC_SHIFT_REG
11476               || attr_type == TYPE_LOGICS_SHIFT_REG
11477               || attr_type == TYPE_MOV_SHIFT
11478               || attr_type == TYPE_MVN_SHIFT
11479               || attr_type == TYPE_MOV_SHIFT_REG
11480               || attr_type == TYPE_MVN_SHIFT_REG))
11481         {
11482           rtx shifted_operand;
11483           int opno;
11484
11485           /* Get the shifted operand.  */
11486           extract_insn (insn);
11487           shifted_operand = recog_data.operand[shift_opnum];
11488
11489           /* Iterate over all the operands in DEP.  If we write an operand
11490              that overlaps with SHIFTED_OPERAND, then we have increase the
11491              cost of this dependency.  */
11492           extract_insn (dep);
11493           preprocess_constraints (dep);
11494           for (opno = 0; opno < recog_data.n_operands; opno++)
11495             {
11496               /* We can ignore strict inputs.  */
11497               if (recog_data.operand_type[opno] == OP_IN)
11498                 continue;
11499
11500               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11501                                            shifted_operand))
11502                 {
11503                   *cost = 2;
11504                   return false;
11505                 }
11506             }
11507         }
11508     }
11509   return true;
11510 }
11511
11512 /* Adjust cost hook for Cortex A9.  */
11513 static bool
11514 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11515 {
11516   switch (REG_NOTE_KIND (link))
11517     {
11518     case REG_DEP_ANTI:
11519       *cost = 0;
11520       return false;
11521
11522     case REG_DEP_TRUE:
11523     case REG_DEP_OUTPUT:
11524         if (recog_memoized (insn) >= 0
11525             && recog_memoized (dep) >= 0)
11526           {
11527             if (GET_CODE (PATTERN (insn)) == SET)
11528               {
11529                 if (GET_MODE_CLASS
11530                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11531                   || GET_MODE_CLASS
11532                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11533                   {
11534                     enum attr_type attr_type_insn = get_attr_type (insn);
11535                     enum attr_type attr_type_dep = get_attr_type (dep);
11536
11537                     /* By default all dependencies of the form
11538                        s0 = s0 <op> s1
11539                        s0 = s0 <op> s2
11540                        have an extra latency of 1 cycle because
11541                        of the input and output dependency in this
11542                        case. However this gets modeled as an true
11543                        dependency and hence all these checks.  */
11544                     if (REG_P (SET_DEST (PATTERN (insn)))
11545                         && REG_P (SET_DEST (PATTERN (dep)))
11546                         && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11547                                                     SET_DEST (PATTERN (dep))))
11548                       {
11549                         /* FMACS is a special case where the dependent
11550                            instruction can be issued 3 cycles before
11551                            the normal latency in case of an output
11552                            dependency.  */
11553                         if ((attr_type_insn == TYPE_FMACS
11554                              || attr_type_insn == TYPE_FMACD)
11555                             && (attr_type_dep == TYPE_FMACS
11556                                 || attr_type_dep == TYPE_FMACD))
11557                           {
11558                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11559                               *cost = insn_default_latency (dep) - 3;
11560                             else
11561                               *cost = insn_default_latency (dep);
11562                             return false;
11563                           }
11564                         else
11565                           {
11566                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11567                               *cost = insn_default_latency (dep) + 1;
11568                             else
11569                               *cost = insn_default_latency (dep);
11570                           }
11571                         return false;
11572                       }
11573                   }
11574               }
11575           }
11576         break;
11577
11578     default:
11579       gcc_unreachable ();
11580     }
11581
11582   return true;
11583 }
11584
11585 /* Adjust cost hook for FA726TE.  */
11586 static bool
11587 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11588 {
11589   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11590      have penalty of 3.  */
11591   if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11592       && recog_memoized (insn) >= 0
11593       && recog_memoized (dep) >= 0
11594       && get_attr_conds (dep) == CONDS_SET)
11595     {
11596       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11597       if (get_attr_conds (insn) == CONDS_USE
11598           && get_attr_type (insn) != TYPE_BRANCH)
11599         {
11600           *cost = 3;
11601           return false;
11602         }
11603
11604       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11605           || get_attr_conds (insn) == CONDS_USE)
11606         {
11607           *cost = 0;
11608           return false;
11609         }
11610     }
11611
11612   return true;
11613 }
11614
11615 /* Implement TARGET_REGISTER_MOVE_COST.
11616
11617    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11618    it is typically more expensive than a single memory access.  We set
11619    the cost to less than two memory accesses so that floating
11620    point to integer conversion does not go through memory.  */
11621
11622 int
11623 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11624                         reg_class_t from, reg_class_t to)
11625 {
11626   if (TARGET_32BIT)
11627     {
11628       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11629           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11630         return 15;
11631       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11632                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11633         return 4;
11634       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11635         return 20;
11636       else
11637         return 2;
11638     }
11639   else
11640     {
11641       if (from == HI_REGS || to == HI_REGS)
11642         return 4;
11643       else
11644         return 2;
11645     }
11646 }
11647
11648 /* Implement TARGET_MEMORY_MOVE_COST.  */
11649
11650 int
11651 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11652                       bool in ATTRIBUTE_UNUSED)
11653 {
11654   if (TARGET_32BIT)
11655     return 10;
11656   else
11657     {
11658       if (GET_MODE_SIZE (mode) < 4)
11659         return 8;
11660       else
11661         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11662     }
11663 }
11664
11665 /* Vectorizer cost model implementation.  */
11666
11667 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11668 static int
11669 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11670                                 tree vectype,
11671                                 int misalign ATTRIBUTE_UNUSED)
11672 {
11673   unsigned elements;
11674
11675   switch (type_of_cost)
11676     {
11677       case scalar_stmt:
11678         return current_tune->vec_costs->scalar_stmt_cost;
11679
11680       case scalar_load:
11681         return current_tune->vec_costs->scalar_load_cost;
11682
11683       case scalar_store:
11684         return current_tune->vec_costs->scalar_store_cost;
11685
11686       case vector_stmt:
11687         return current_tune->vec_costs->vec_stmt_cost;
11688
11689       case vector_load:
11690         return current_tune->vec_costs->vec_align_load_cost;
11691
11692       case vector_store:
11693         return current_tune->vec_costs->vec_store_cost;
11694
11695       case vec_to_scalar:
11696         return current_tune->vec_costs->vec_to_scalar_cost;
11697
11698       case scalar_to_vec:
11699         return current_tune->vec_costs->scalar_to_vec_cost;
11700
11701       case unaligned_load:
11702         return current_tune->vec_costs->vec_unalign_load_cost;
11703
11704       case unaligned_store:
11705         return current_tune->vec_costs->vec_unalign_store_cost;
11706
11707       case cond_branch_taken:
11708         return current_tune->vec_costs->cond_taken_branch_cost;
11709
11710       case cond_branch_not_taken:
11711         return current_tune->vec_costs->cond_not_taken_branch_cost;
11712
11713       case vec_perm:
11714       case vec_promote_demote:
11715         return current_tune->vec_costs->vec_stmt_cost;
11716
11717       case vec_construct:
11718         elements = TYPE_VECTOR_SUBPARTS (vectype);
11719         return elements / 2 + 1;
11720
11721       default:
11722         gcc_unreachable ();
11723     }
11724 }
11725
11726 /* Implement targetm.vectorize.add_stmt_cost.  */
11727
11728 static unsigned
11729 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11730                    struct _stmt_vec_info *stmt_info, int misalign,
11731                    enum vect_cost_model_location where)
11732 {
11733   unsigned *cost = (unsigned *) data;
11734   unsigned retval = 0;
11735
11736   if (flag_vect_cost_model)
11737     {
11738       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11739       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11740
11741       /* Statements in an inner loop relative to the loop being
11742          vectorized are weighted more heavily.  The value here is
11743          arbitrary and could potentially be improved with analysis.  */
11744       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11745         count *= 50;  /* FIXME.  */
11746
11747       retval = (unsigned) (count * stmt_cost);
11748       cost[where] += retval;
11749     }
11750
11751   return retval;
11752 }
11753
11754 /* Return true if and only if this insn can dual-issue only as older.  */
11755 static bool
11756 cortexa7_older_only (rtx_insn *insn)
11757 {
11758   if (recog_memoized (insn) < 0)
11759     return false;
11760
11761   switch (get_attr_type (insn))
11762     {
11763     case TYPE_ALU_DSP_REG:
11764     case TYPE_ALU_SREG:
11765     case TYPE_ALUS_SREG:
11766     case TYPE_LOGIC_REG:
11767     case TYPE_LOGICS_REG:
11768     case TYPE_ADC_REG:
11769     case TYPE_ADCS_REG:
11770     case TYPE_ADR:
11771     case TYPE_BFM:
11772     case TYPE_REV:
11773     case TYPE_MVN_REG:
11774     case TYPE_SHIFT_IMM:
11775     case TYPE_SHIFT_REG:
11776     case TYPE_LOAD_BYTE:
11777     case TYPE_LOAD1:
11778     case TYPE_STORE1:
11779     case TYPE_FFARITHS:
11780     case TYPE_FADDS:
11781     case TYPE_FFARITHD:
11782     case TYPE_FADDD:
11783     case TYPE_FMOV:
11784     case TYPE_F_CVT:
11785     case TYPE_FCMPS:
11786     case TYPE_FCMPD:
11787     case TYPE_FCONSTS:
11788     case TYPE_FCONSTD:
11789     case TYPE_FMULS:
11790     case TYPE_FMACS:
11791     case TYPE_FMULD:
11792     case TYPE_FMACD:
11793     case TYPE_FDIVS:
11794     case TYPE_FDIVD:
11795     case TYPE_F_MRC:
11796     case TYPE_F_MRRC:
11797     case TYPE_F_FLAG:
11798     case TYPE_F_LOADS:
11799     case TYPE_F_STORES:
11800       return true;
11801     default:
11802       return false;
11803     }
11804 }
11805
11806 /* Return true if and only if this insn can dual-issue as younger.  */
11807 static bool
11808 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11809 {
11810   if (recog_memoized (insn) < 0)
11811     {
11812       if (verbose > 5)
11813         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11814       return false;
11815     }
11816
11817   switch (get_attr_type (insn))
11818     {
11819     case TYPE_ALU_IMM:
11820     case TYPE_ALUS_IMM:
11821     case TYPE_LOGIC_IMM:
11822     case TYPE_LOGICS_IMM:
11823     case TYPE_EXTEND:
11824     case TYPE_MVN_IMM:
11825     case TYPE_MOV_IMM:
11826     case TYPE_MOV_REG:
11827     case TYPE_MOV_SHIFT:
11828     case TYPE_MOV_SHIFT_REG:
11829     case TYPE_BRANCH:
11830     case TYPE_CALL:
11831       return true;
11832     default:
11833       return false;
11834     }
11835 }
11836
11837
11838 /* Look for an instruction that can dual issue only as an older
11839    instruction, and move it in front of any instructions that can
11840    dual-issue as younger, while preserving the relative order of all
11841    other instructions in the ready list.  This is a hueuristic to help
11842    dual-issue in later cycles, by postponing issue of more flexible
11843    instructions.  This heuristic may affect dual issue opportunities
11844    in the current cycle.  */
11845 static void
11846 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11847                         int *n_readyp, int clock)
11848 {
11849   int i;
11850   int first_older_only = -1, first_younger = -1;
11851
11852   if (verbose > 5)
11853     fprintf (file,
11854              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11855              clock,
11856              *n_readyp);
11857
11858   /* Traverse the ready list from the head (the instruction to issue
11859      first), and looking for the first instruction that can issue as
11860      younger and the first instruction that can dual-issue only as
11861      older.  */
11862   for (i = *n_readyp - 1; i >= 0; i--)
11863     {
11864       rtx_insn *insn = ready[i];
11865       if (cortexa7_older_only (insn))
11866         {
11867           first_older_only = i;
11868           if (verbose > 5)
11869             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11870           break;
11871         }
11872       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11873         first_younger = i;
11874     }
11875
11876   /* Nothing to reorder because either no younger insn found or insn
11877      that can dual-issue only as older appears before any insn that
11878      can dual-issue as younger.  */
11879   if (first_younger == -1)
11880     {
11881       if (verbose > 5)
11882         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11883       return;
11884     }
11885
11886   /* Nothing to reorder because no older-only insn in the ready list.  */
11887   if (first_older_only == -1)
11888     {
11889       if (verbose > 5)
11890         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11891       return;
11892     }
11893
11894   /* Move first_older_only insn before first_younger.  */
11895   if (verbose > 5)
11896     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11897              INSN_UID(ready [first_older_only]),
11898              INSN_UID(ready [first_younger]));
11899   rtx_insn *first_older_only_insn = ready [first_older_only];
11900   for (i = first_older_only; i < first_younger; i++)
11901     {
11902       ready[i] = ready[i+1];
11903     }
11904
11905   ready[i] = first_older_only_insn;
11906   return;
11907 }
11908
11909 /* Implement TARGET_SCHED_REORDER. */
11910 static int
11911 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11912                    int clock)
11913 {
11914   switch (arm_tune)
11915     {
11916     case cortexa7:
11917       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11918       break;
11919     default:
11920       /* Do nothing for other cores.  */
11921       break;
11922     }
11923
11924   return arm_issue_rate ();
11925 }
11926
11927 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11928    It corrects the value of COST based on the relationship between
11929    INSN and DEP through the dependence LINK.  It returns the new
11930    value. There is a per-core adjust_cost hook to adjust scheduler costs
11931    and the per-core hook can choose to completely override the generic
11932    adjust_cost function. Only put bits of code into arm_adjust_cost that
11933    are common across all cores.  */
11934 static int
11935 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11936 {
11937   rtx i_pat, d_pat;
11938
11939  /* When generating Thumb-1 code, we want to place flag-setting operations
11940     close to a conditional branch which depends on them, so that we can
11941     omit the comparison. */
11942   if (TARGET_THUMB1
11943       && REG_NOTE_KIND (link) == 0
11944       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11945       && recog_memoized (dep) >= 0
11946       && get_attr_conds (dep) == CONDS_SET)
11947     return 0;
11948
11949   if (current_tune->sched_adjust_cost != NULL)
11950     {
11951       if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11952         return cost;
11953     }
11954
11955   /* XXX Is this strictly true?  */
11956   if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11957       || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11958     return 0;
11959
11960   /* Call insns don't incur a stall, even if they follow a load.  */
11961   if (REG_NOTE_KIND (link) == 0
11962       && CALL_P (insn))
11963     return 1;
11964
11965   if ((i_pat = single_set (insn)) != NULL
11966       && MEM_P (SET_SRC (i_pat))
11967       && (d_pat = single_set (dep)) != NULL
11968       && MEM_P (SET_DEST (d_pat)))
11969     {
11970       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11971       /* This is a load after a store, there is no conflict if the load reads
11972          from a cached area.  Assume that loads from the stack, and from the
11973          constant pool are cached, and that others will miss.  This is a
11974          hack.  */
11975
11976       if ((GET_CODE (src_mem) == SYMBOL_REF
11977            && CONSTANT_POOL_ADDRESS_P (src_mem))
11978           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11979           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11980           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11981         return 1;
11982     }
11983
11984   return cost;
11985 }
11986
11987 int
11988 arm_max_conditional_execute (void)
11989 {
11990   return max_insns_skipped;
11991 }
11992
11993 static int
11994 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11995 {
11996   if (TARGET_32BIT)
11997     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11998   else
11999     return (optimize > 0) ? 2 : 0;
12000 }
12001
12002 static int
12003 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12004 {
12005   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12006 }
12007
12008 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12009    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12010    sequences of non-executed instructions in IT blocks probably take the same
12011    amount of time as executed instructions (and the IT instruction itself takes
12012    space in icache).  This function was experimentally determined to give good
12013    results on a popular embedded benchmark.  */
12014
12015 static int
12016 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12017 {
12018   return (TARGET_32BIT && speed_p) ? 1
12019          : arm_default_branch_cost (speed_p, predictable_p);
12020 }
12021
12022 static bool fp_consts_inited = false;
12023
12024 static REAL_VALUE_TYPE value_fp0;
12025
12026 static void
12027 init_fp_table (void)
12028 {
12029   REAL_VALUE_TYPE r;
12030
12031   r = REAL_VALUE_ATOF ("0", DFmode);
12032   value_fp0 = r;
12033   fp_consts_inited = true;
12034 }
12035
12036 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12037 int
12038 arm_const_double_rtx (rtx x)
12039 {
12040   REAL_VALUE_TYPE r;
12041
12042   if (!fp_consts_inited)
12043     init_fp_table ();
12044
12045   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12046   if (REAL_VALUE_MINUS_ZERO (r))
12047     return 0;
12048
12049   if (REAL_VALUES_EQUAL (r, value_fp0))
12050     return 1;
12051
12052   return 0;
12053 }
12054
12055 /* VFPv3 has a fairly wide range of representable immediates, formed from
12056    "quarter-precision" floating-point values. These can be evaluated using this
12057    formula (with ^ for exponentiation):
12058
12059      -1^s * n * 2^-r
12060
12061    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12062    16 <= n <= 31 and 0 <= r <= 7.
12063
12064    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12065
12066      - A (most-significant) is the sign bit.
12067      - BCD are the exponent (encoded as r XOR 3).
12068      - EFGH are the mantissa (encoded as n - 16).
12069 */
12070
12071 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12072    fconst[sd] instruction, or -1 if X isn't suitable.  */
12073 static int
12074 vfp3_const_double_index (rtx x)
12075 {
12076   REAL_VALUE_TYPE r, m;
12077   int sign, exponent;
12078   unsigned HOST_WIDE_INT mantissa, mant_hi;
12079   unsigned HOST_WIDE_INT mask;
12080   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12081   bool fail;
12082
12083   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12084     return -1;
12085
12086   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12087
12088   /* We can't represent these things, so detect them first.  */
12089   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12090     return -1;
12091
12092   /* Extract sign, exponent and mantissa.  */
12093   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12094   r = real_value_abs (&r);
12095   exponent = REAL_EXP (&r);
12096   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12097      highest (sign) bit, with a fixed binary point at bit point_pos.
12098      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12099      bits for the mantissa, this may fail (low bits would be lost).  */
12100   real_ldexp (&m, &r, point_pos - exponent);
12101   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12102   mantissa = w.elt (0);
12103   mant_hi = w.elt (1);
12104
12105   /* If there are bits set in the low part of the mantissa, we can't
12106      represent this value.  */
12107   if (mantissa != 0)
12108     return -1;
12109
12110   /* Now make it so that mantissa contains the most-significant bits, and move
12111      the point_pos to indicate that the least-significant bits have been
12112      discarded.  */
12113   point_pos -= HOST_BITS_PER_WIDE_INT;
12114   mantissa = mant_hi;
12115
12116   /* We can permit four significant bits of mantissa only, plus a high bit
12117      which is always 1.  */
12118   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12119   if ((mantissa & mask) != 0)
12120     return -1;
12121
12122   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12123   mantissa >>= point_pos - 5;
12124
12125   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12126      floating-point immediate zero with Neon using an integer-zero load, but
12127      that case is handled elsewhere.)  */
12128   if (mantissa == 0)
12129     return -1;
12130
12131   gcc_assert (mantissa >= 16 && mantissa <= 31);
12132
12133   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12134      normalized significands are in the range [1, 2). (Our mantissa is shifted
12135      left 4 places at this point relative to normalized IEEE754 values).  GCC
12136      internally uses [0.5, 1) (see real.c), so the exponent returned from
12137      REAL_EXP must be altered.  */
12138   exponent = 5 - exponent;
12139
12140   if (exponent < 0 || exponent > 7)
12141     return -1;
12142
12143   /* Sign, mantissa and exponent are now in the correct form to plug into the
12144      formula described in the comment above.  */
12145   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12146 }
12147
12148 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12149 int
12150 vfp3_const_double_rtx (rtx x)
12151 {
12152   if (!TARGET_VFP3)
12153     return 0;
12154
12155   return vfp3_const_double_index (x) != -1;
12156 }
12157
12158 /* Recognize immediates which can be used in various Neon instructions. Legal
12159    immediates are described by the following table (for VMVN variants, the
12160    bitwise inverse of the constant shown is recognized. In either case, VMOV
12161    is output and the correct instruction to use for a given constant is chosen
12162    by the assembler). The constant shown is replicated across all elements of
12163    the destination vector.
12164
12165    insn elems variant constant (binary)
12166    ---- ----- ------- -----------------
12167    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12168    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12169    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12170    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12171    vmov  i16     4    00000000 abcdefgh
12172    vmov  i16     5    abcdefgh 00000000
12173    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12174    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12175    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12176    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12177    vmvn  i16    10    00000000 abcdefgh
12178    vmvn  i16    11    abcdefgh 00000000
12179    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12180    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12181    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12182    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12183    vmov   i8    16    abcdefgh
12184    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12185                       eeeeeeee ffffffff gggggggg hhhhhhhh
12186    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12187    vmov  f32    19    00000000 00000000 00000000 00000000
12188
12189    For case 18, B = !b. Representable values are exactly those accepted by
12190    vfp3_const_double_index, but are output as floating-point numbers rather
12191    than indices.
12192
12193    For case 19, we will change it to vmov.i32 when assembling.
12194
12195    Variants 0-5 (inclusive) may also be used as immediates for the second
12196    operand of VORR/VBIC instructions.
12197
12198    The INVERSE argument causes the bitwise inverse of the given operand to be
12199    recognized instead (used for recognizing legal immediates for the VAND/VORN
12200    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12201    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12202    output, rather than the real insns vbic/vorr).
12203
12204    INVERSE makes no difference to the recognition of float vectors.
12205
12206    The return value is the variant of immediate as shown in the above table, or
12207    -1 if the given value doesn't match any of the listed patterns.
12208 */
12209 static int
12210 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12211                       rtx *modconst, int *elementwidth)
12212 {
12213 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12214   matches = 1;                                  \
12215   for (i = 0; i < idx; i += (STRIDE))           \
12216     if (!(TEST))                                \
12217       matches = 0;                              \
12218   if (matches)                                  \
12219     {                                           \
12220       immtype = (CLASS);                        \
12221       elsize = (ELSIZE);                        \
12222       break;                                    \
12223     }
12224
12225   unsigned int i, elsize = 0, idx = 0, n_elts;
12226   unsigned int innersize;
12227   unsigned char bytes[16];
12228   int immtype = -1, matches;
12229   unsigned int invmask = inverse ? 0xff : 0;
12230   bool vector = GET_CODE (op) == CONST_VECTOR;
12231
12232   if (vector)
12233     {
12234       n_elts = CONST_VECTOR_NUNITS (op);
12235       innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12236     }
12237   else
12238     {
12239       n_elts = 1;
12240       if (mode == VOIDmode)
12241         mode = DImode;
12242       innersize = GET_MODE_SIZE (mode);
12243     }
12244
12245   /* Vectors of float constants.  */
12246   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12247     {
12248       rtx el0 = CONST_VECTOR_ELT (op, 0);
12249       REAL_VALUE_TYPE r0;
12250
12251       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12252         return -1;
12253
12254       REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12255
12256       for (i = 1; i < n_elts; i++)
12257         {
12258           rtx elt = CONST_VECTOR_ELT (op, i);
12259           REAL_VALUE_TYPE re;
12260
12261           REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12262
12263           if (!REAL_VALUES_EQUAL (r0, re))
12264             return -1;
12265         }
12266
12267       if (modconst)
12268         *modconst = CONST_VECTOR_ELT (op, 0);
12269
12270       if (elementwidth)
12271         *elementwidth = 0;
12272
12273       if (el0 == CONST0_RTX (GET_MODE (el0)))
12274         return 19;
12275       else
12276         return 18;
12277     }
12278
12279   /* Splat vector constant out into a byte vector.  */
12280   for (i = 0; i < n_elts; i++)
12281     {
12282       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12283       unsigned HOST_WIDE_INT elpart;
12284       unsigned int part, parts;
12285
12286       if (CONST_INT_P (el))
12287         {
12288           elpart = INTVAL (el);
12289           parts = 1;
12290         }
12291       else if (CONST_DOUBLE_P (el))
12292         {
12293           elpart = CONST_DOUBLE_LOW (el);
12294           parts = 2;
12295         }
12296       else
12297         gcc_unreachable ();
12298
12299       for (part = 0; part < parts; part++)
12300         {
12301           unsigned int byte;
12302           for (byte = 0; byte < innersize; byte++)
12303             {
12304               bytes[idx++] = (elpart & 0xff) ^ invmask;
12305               elpart >>= BITS_PER_UNIT;
12306             }
12307           if (CONST_DOUBLE_P (el))
12308             elpart = CONST_DOUBLE_HIGH (el);
12309         }
12310     }
12311
12312   /* Sanity check.  */
12313   gcc_assert (idx == GET_MODE_SIZE (mode));
12314
12315   do
12316     {
12317       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12318                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12319
12320       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12321                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12322
12323       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12324                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12325
12326       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12327                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12328
12329       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12330
12331       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12332
12333       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12334                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12335
12336       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12337                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12338
12339       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12340                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12341
12342       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12343                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12344
12345       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12346
12347       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12348
12349       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12350                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12351
12352       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12353                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12354
12355       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12356                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12357
12358       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12359                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12360
12361       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12362
12363       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12364                         && bytes[i] == bytes[(i + 8) % idx]);
12365     }
12366   while (0);
12367
12368   if (immtype == -1)
12369     return -1;
12370
12371   if (elementwidth)
12372     *elementwidth = elsize;
12373
12374   if (modconst)
12375     {
12376       unsigned HOST_WIDE_INT imm = 0;
12377
12378       /* Un-invert bytes of recognized vector, if necessary.  */
12379       if (invmask != 0)
12380         for (i = 0; i < idx; i++)
12381           bytes[i] ^= invmask;
12382
12383       if (immtype == 17)
12384         {
12385           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12386           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12387
12388           for (i = 0; i < 8; i++)
12389             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12390                    << (i * BITS_PER_UNIT);
12391
12392           *modconst = GEN_INT (imm);
12393         }
12394       else
12395         {
12396           unsigned HOST_WIDE_INT imm = 0;
12397
12398           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12399             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12400
12401           *modconst = GEN_INT (imm);
12402         }
12403     }
12404
12405   return immtype;
12406 #undef CHECK
12407 }
12408
12409 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12410    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12411    float elements), and a modified constant (whatever should be output for a
12412    VMOV) in *MODCONST.  */
12413
12414 int
12415 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12416                                rtx *modconst, int *elementwidth)
12417 {
12418   rtx tmpconst;
12419   int tmpwidth;
12420   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12421
12422   if (retval == -1)
12423     return 0;
12424
12425   if (modconst)
12426     *modconst = tmpconst;
12427
12428   if (elementwidth)
12429     *elementwidth = tmpwidth;
12430
12431   return 1;
12432 }
12433
12434 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12435    the immediate is valid, write a constant suitable for using as an operand
12436    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12437    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12438
12439 int
12440 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12441                                 rtx *modconst, int *elementwidth)
12442 {
12443   rtx tmpconst;
12444   int tmpwidth;
12445   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12446
12447   if (retval < 0 || retval > 5)
12448     return 0;
12449
12450   if (modconst)
12451     *modconst = tmpconst;
12452
12453   if (elementwidth)
12454     *elementwidth = tmpwidth;
12455
12456   return 1;
12457 }
12458
12459 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12460    the immediate is valid, write a constant suitable for using as an operand
12461    to VSHR/VSHL to *MODCONST and the corresponding element width to
12462    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12463    because they have different limitations.  */
12464
12465 int
12466 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12467                                 rtx *modconst, int *elementwidth,
12468                                 bool isleftshift)
12469 {
12470   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12471   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12472   unsigned HOST_WIDE_INT last_elt = 0;
12473   unsigned HOST_WIDE_INT maxshift;
12474
12475   /* Split vector constant out into a byte vector.  */
12476   for (i = 0; i < n_elts; i++)
12477     {
12478       rtx el = CONST_VECTOR_ELT (op, i);
12479       unsigned HOST_WIDE_INT elpart;
12480
12481       if (CONST_INT_P (el))
12482         elpart = INTVAL (el);
12483       else if (CONST_DOUBLE_P (el))
12484         return 0;
12485       else
12486         gcc_unreachable ();
12487
12488       if (i != 0 && elpart != last_elt)
12489         return 0;
12490
12491       last_elt = elpart;
12492     }
12493
12494   /* Shift less than element size.  */
12495   maxshift = innersize * 8;
12496
12497   if (isleftshift)
12498     {
12499       /* Left shift immediate value can be from 0 to <size>-1.  */
12500       if (last_elt >= maxshift)
12501         return 0;
12502     }
12503   else
12504     {
12505       /* Right shift immediate value can be from 1 to <size>.  */
12506       if (last_elt == 0 || last_elt > maxshift)
12507         return 0;
12508     }
12509
12510   if (elementwidth)
12511     *elementwidth = innersize * 8;
12512
12513   if (modconst)
12514     *modconst = CONST_VECTOR_ELT (op, 0);
12515
12516   return 1;
12517 }
12518
12519 /* Return a string suitable for output of Neon immediate logic operation
12520    MNEM.  */
12521
12522 char *
12523 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12524                              int inverse, int quad)
12525 {
12526   int width, is_valid;
12527   static char templ[40];
12528
12529   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12530
12531   gcc_assert (is_valid != 0);
12532
12533   if (quad)
12534     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12535   else
12536     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12537
12538   return templ;
12539 }
12540
12541 /* Return a string suitable for output of Neon immediate shift operation
12542    (VSHR or VSHL) MNEM.  */
12543
12544 char *
12545 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12546                              machine_mode mode, int quad,
12547                              bool isleftshift)
12548 {
12549   int width, is_valid;
12550   static char templ[40];
12551
12552   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12553   gcc_assert (is_valid != 0);
12554
12555   if (quad)
12556     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12557   else
12558     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12559
12560   return templ;
12561 }
12562
12563 /* Output a sequence of pairwise operations to implement a reduction.
12564    NOTE: We do "too much work" here, because pairwise operations work on two
12565    registers-worth of operands in one go. Unfortunately we can't exploit those
12566    extra calculations to do the full operation in fewer steps, I don't think.
12567    Although all vector elements of the result but the first are ignored, we
12568    actually calculate the same result in each of the elements. An alternative
12569    such as initially loading a vector with zero to use as each of the second
12570    operands would use up an additional register and take an extra instruction,
12571    for no particular gain.  */
12572
12573 void
12574 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12575                       rtx (*reduc) (rtx, rtx, rtx))
12576 {
12577   machine_mode inner = GET_MODE_INNER (mode);
12578   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12579   rtx tmpsum = op1;
12580
12581   for (i = parts / 2; i >= 1; i /= 2)
12582     {
12583       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12584       emit_insn (reduc (dest, tmpsum, tmpsum));
12585       tmpsum = dest;
12586     }
12587 }
12588
12589 /* If VALS is a vector constant that can be loaded into a register
12590    using VDUP, generate instructions to do so and return an RTX to
12591    assign to the register.  Otherwise return NULL_RTX.  */
12592
12593 static rtx
12594 neon_vdup_constant (rtx vals)
12595 {
12596   machine_mode mode = GET_MODE (vals);
12597   machine_mode inner_mode = GET_MODE_INNER (mode);
12598   int n_elts = GET_MODE_NUNITS (mode);
12599   bool all_same = true;
12600   rtx x;
12601   int i;
12602
12603   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12604     return NULL_RTX;
12605
12606   for (i = 0; i < n_elts; ++i)
12607     {
12608       x = XVECEXP (vals, 0, i);
12609       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12610         all_same = false;
12611     }
12612
12613   if (!all_same)
12614     /* The elements are not all the same.  We could handle repeating
12615        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12616        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12617        vdup.i16).  */
12618     return NULL_RTX;
12619
12620   /* We can load this constant by using VDUP and a constant in a
12621      single ARM register.  This will be cheaper than a vector
12622      load.  */
12623
12624   x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12625   return gen_rtx_VEC_DUPLICATE (mode, x);
12626 }
12627
12628 /* Generate code to load VALS, which is a PARALLEL containing only
12629    constants (for vec_init) or CONST_VECTOR, efficiently into a
12630    register.  Returns an RTX to copy into the register, or NULL_RTX
12631    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12632
12633 rtx
12634 neon_make_constant (rtx vals)
12635 {
12636   machine_mode mode = GET_MODE (vals);
12637   rtx target;
12638   rtx const_vec = NULL_RTX;
12639   int n_elts = GET_MODE_NUNITS (mode);
12640   int n_const = 0;
12641   int i;
12642
12643   if (GET_CODE (vals) == CONST_VECTOR)
12644     const_vec = vals;
12645   else if (GET_CODE (vals) == PARALLEL)
12646     {
12647       /* A CONST_VECTOR must contain only CONST_INTs and
12648          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12649          Only store valid constants in a CONST_VECTOR.  */
12650       for (i = 0; i < n_elts; ++i)
12651         {
12652           rtx x = XVECEXP (vals, 0, i);
12653           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12654             n_const++;
12655         }
12656       if (n_const == n_elts)
12657         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12658     }
12659   else
12660     gcc_unreachable ();
12661
12662   if (const_vec != NULL
12663       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12664     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12665     return const_vec;
12666   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12667     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12668        pipeline cycle; creating the constant takes one or two ARM
12669        pipeline cycles.  */
12670     return target;
12671   else if (const_vec != NULL_RTX)
12672     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12673        (for either double or quad vectors).  We can not take advantage
12674        of single-cycle VLD1 because we need a PC-relative addressing
12675        mode.  */
12676     return const_vec;
12677   else
12678     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12679        We can not construct an initializer.  */
12680     return NULL_RTX;
12681 }
12682
12683 /* Initialize vector TARGET to VALS.  */
12684
12685 void
12686 neon_expand_vector_init (rtx target, rtx vals)
12687 {
12688   machine_mode mode = GET_MODE (target);
12689   machine_mode inner_mode = GET_MODE_INNER (mode);
12690   int n_elts = GET_MODE_NUNITS (mode);
12691   int n_var = 0, one_var = -1;
12692   bool all_same = true;
12693   rtx x, mem;
12694   int i;
12695
12696   for (i = 0; i < n_elts; ++i)
12697     {
12698       x = XVECEXP (vals, 0, i);
12699       if (!CONSTANT_P (x))
12700         ++n_var, one_var = i;
12701
12702       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12703         all_same = false;
12704     }
12705
12706   if (n_var == 0)
12707     {
12708       rtx constant = neon_make_constant (vals);
12709       if (constant != NULL_RTX)
12710         {
12711           emit_move_insn (target, constant);
12712           return;
12713         }
12714     }
12715
12716   /* Splat a single non-constant element if we can.  */
12717   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12718     {
12719       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12720       emit_insn (gen_rtx_SET (VOIDmode, target,
12721                               gen_rtx_VEC_DUPLICATE (mode, x)));
12722       return;
12723     }
12724
12725   /* One field is non-constant.  Load constant then overwrite varying
12726      field.  This is more efficient than using the stack.  */
12727   if (n_var == 1)
12728     {
12729       rtx copy = copy_rtx (vals);
12730       rtx index = GEN_INT (one_var);
12731
12732       /* Load constant part of vector, substitute neighboring value for
12733          varying element.  */
12734       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12735       neon_expand_vector_init (target, copy);
12736
12737       /* Insert variable.  */
12738       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12739       switch (mode)
12740         {
12741         case V8QImode:
12742           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12743           break;
12744         case V16QImode:
12745           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12746           break;
12747         case V4HImode:
12748           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12749           break;
12750         case V8HImode:
12751           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12752           break;
12753         case V2SImode:
12754           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12755           break;
12756         case V4SImode:
12757           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12758           break;
12759         case V2SFmode:
12760           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12761           break;
12762         case V4SFmode:
12763           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12764           break;
12765         case V2DImode:
12766           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12767           break;
12768         default:
12769           gcc_unreachable ();
12770         }
12771       return;
12772     }
12773
12774   /* Construct the vector in memory one field at a time
12775      and load the whole vector.  */
12776   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12777   for (i = 0; i < n_elts; i++)
12778     emit_move_insn (adjust_address_nv (mem, inner_mode,
12779                                     i * GET_MODE_SIZE (inner_mode)),
12780                     XVECEXP (vals, 0, i));
12781   emit_move_insn (target, mem);
12782 }
12783
12784 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12785    ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
12786    reported source locations are bogus.  */
12787
12788 static void
12789 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12790               const char *err)
12791 {
12792   HOST_WIDE_INT lane;
12793
12794   gcc_assert (CONST_INT_P (operand));
12795
12796   lane = INTVAL (operand);
12797
12798   if (lane < low || lane >= high)
12799     error (err);
12800 }
12801
12802 /* Bounds-check lanes.  */
12803
12804 void
12805 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12806 {
12807   bounds_check (operand, low, high, "lane out of range");
12808 }
12809
12810 /* Bounds-check constants.  */
12811
12812 void
12813 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12814 {
12815   bounds_check (operand, low, high, "constant out of range");
12816 }
12817
12818 HOST_WIDE_INT
12819 neon_element_bits (machine_mode mode)
12820 {
12821   if (mode == DImode)
12822     return GET_MODE_BITSIZE (mode);
12823   else
12824     return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12825 }
12826
12827 \f
12828 /* Predicates for `match_operand' and `match_operator'.  */
12829
12830 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12831    WB is true if full writeback address modes are allowed and is false
12832    if limited writeback address modes (POST_INC and PRE_DEC) are
12833    allowed.  */
12834
12835 int
12836 arm_coproc_mem_operand (rtx op, bool wb)
12837 {
12838   rtx ind;
12839
12840   /* Reject eliminable registers.  */
12841   if (! (reload_in_progress || reload_completed || lra_in_progress)
12842       && (   reg_mentioned_p (frame_pointer_rtx, op)
12843           || reg_mentioned_p (arg_pointer_rtx, op)
12844           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12845           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12846           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12847           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12848     return FALSE;
12849
12850   /* Constants are converted into offsets from labels.  */
12851   if (!MEM_P (op))
12852     return FALSE;
12853
12854   ind = XEXP (op, 0);
12855
12856   if (reload_completed
12857       && (GET_CODE (ind) == LABEL_REF
12858           || (GET_CODE (ind) == CONST
12859               && GET_CODE (XEXP (ind, 0)) == PLUS
12860               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12861               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12862     return TRUE;
12863
12864   /* Match: (mem (reg)).  */
12865   if (REG_P (ind))
12866     return arm_address_register_rtx_p (ind, 0);
12867
12868   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12869      acceptable in any case (subject to verification by
12870      arm_address_register_rtx_p).  We need WB to be true to accept
12871      PRE_INC and POST_DEC.  */
12872   if (GET_CODE (ind) == POST_INC
12873       || GET_CODE (ind) == PRE_DEC
12874       || (wb
12875           && (GET_CODE (ind) == PRE_INC
12876               || GET_CODE (ind) == POST_DEC)))
12877     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12878
12879   if (wb
12880       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12881       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12882       && GET_CODE (XEXP (ind, 1)) == PLUS
12883       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12884     ind = XEXP (ind, 1);
12885
12886   /* Match:
12887      (plus (reg)
12888            (const)).  */
12889   if (GET_CODE (ind) == PLUS
12890       && REG_P (XEXP (ind, 0))
12891       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12892       && CONST_INT_P (XEXP (ind, 1))
12893       && INTVAL (XEXP (ind, 1)) > -1024
12894       && INTVAL (XEXP (ind, 1)) <  1024
12895       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12896     return TRUE;
12897
12898   return FALSE;
12899 }
12900
12901 /* Return TRUE if OP is a memory operand which we can load or store a vector
12902    to/from. TYPE is one of the following values:
12903     0 - Vector load/stor (vldr)
12904     1 - Core registers (ldm)
12905     2 - Element/structure loads (vld1)
12906  */
12907 int
12908 neon_vector_mem_operand (rtx op, int type, bool strict)
12909 {
12910   rtx ind;
12911
12912   /* Reject eliminable registers.  */
12913   if (! (reload_in_progress || reload_completed)
12914       && (   reg_mentioned_p (frame_pointer_rtx, op)
12915           || reg_mentioned_p (arg_pointer_rtx, op)
12916           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12917           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12918           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12919           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12920     return !strict;
12921
12922   /* Constants are converted into offsets from labels.  */
12923   if (!MEM_P (op))
12924     return FALSE;
12925
12926   ind = XEXP (op, 0);
12927
12928   if (reload_completed
12929       && (GET_CODE (ind) == LABEL_REF
12930           || (GET_CODE (ind) == CONST
12931               && GET_CODE (XEXP (ind, 0)) == PLUS
12932               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12933               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12934     return TRUE;
12935
12936   /* Match: (mem (reg)).  */
12937   if (REG_P (ind))
12938     return arm_address_register_rtx_p (ind, 0);
12939
12940   /* Allow post-increment with Neon registers.  */
12941   if ((type != 1 && GET_CODE (ind) == POST_INC)
12942       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12943     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12944
12945   /* Allow post-increment by register for VLDn */
12946   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12947       && GET_CODE (XEXP (ind, 1)) == PLUS
12948       && REG_P (XEXP (XEXP (ind, 1), 1)))
12949      return true;
12950
12951   /* Match:
12952      (plus (reg)
12953           (const)).  */
12954   if (type == 0
12955       && GET_CODE (ind) == PLUS
12956       && REG_P (XEXP (ind, 0))
12957       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12958       && CONST_INT_P (XEXP (ind, 1))
12959       && INTVAL (XEXP (ind, 1)) > -1024
12960       /* For quad modes, we restrict the constant offset to be slightly less
12961          than what the instruction format permits.  We have no such constraint
12962          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12963       && (INTVAL (XEXP (ind, 1))
12964           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12965       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12966     return TRUE;
12967
12968   return FALSE;
12969 }
12970
12971 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12972    type.  */
12973 int
12974 neon_struct_mem_operand (rtx op)
12975 {
12976   rtx ind;
12977
12978   /* Reject eliminable registers.  */
12979   if (! (reload_in_progress || reload_completed)
12980       && (   reg_mentioned_p (frame_pointer_rtx, op)
12981           || reg_mentioned_p (arg_pointer_rtx, op)
12982           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12983           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12984           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12985           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12986     return FALSE;
12987
12988   /* Constants are converted into offsets from labels.  */
12989   if (!MEM_P (op))
12990     return FALSE;
12991
12992   ind = XEXP (op, 0);
12993
12994   if (reload_completed
12995       && (GET_CODE (ind) == LABEL_REF
12996           || (GET_CODE (ind) == CONST
12997               && GET_CODE (XEXP (ind, 0)) == PLUS
12998               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12999               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13000     return TRUE;
13001
13002   /* Match: (mem (reg)).  */
13003   if (REG_P (ind))
13004     return arm_address_register_rtx_p (ind, 0);
13005
13006   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13007   if (GET_CODE (ind) == POST_INC
13008       || GET_CODE (ind) == PRE_DEC)
13009     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13010
13011   return FALSE;
13012 }
13013
13014 /* Return true if X is a register that will be eliminated later on.  */
13015 int
13016 arm_eliminable_register (rtx x)
13017 {
13018   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13019                        || REGNO (x) == ARG_POINTER_REGNUM
13020                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13021                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13022 }
13023
13024 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13025    coprocessor registers.  Otherwise return NO_REGS.  */
13026
13027 enum reg_class
13028 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13029 {
13030   if (mode == HFmode)
13031     {
13032       if (!TARGET_NEON_FP16)
13033         return GENERAL_REGS;
13034       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13035         return NO_REGS;
13036       return GENERAL_REGS;
13037     }
13038
13039   /* The neon move patterns handle all legitimate vector and struct
13040      addresses.  */
13041   if (TARGET_NEON
13042       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13043       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13044           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13045           || VALID_NEON_STRUCT_MODE (mode)))
13046     return NO_REGS;
13047
13048   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13049     return NO_REGS;
13050
13051   return GENERAL_REGS;
13052 }
13053
13054 /* Values which must be returned in the most-significant end of the return
13055    register.  */
13056
13057 static bool
13058 arm_return_in_msb (const_tree valtype)
13059 {
13060   return (TARGET_AAPCS_BASED
13061           && BYTES_BIG_ENDIAN
13062           && (AGGREGATE_TYPE_P (valtype)
13063               || TREE_CODE (valtype) == COMPLEX_TYPE
13064               || FIXED_POINT_TYPE_P (valtype)));
13065 }
13066
13067 /* Return TRUE if X references a SYMBOL_REF.  */
13068 int
13069 symbol_mentioned_p (rtx x)
13070 {
13071   const char * fmt;
13072   int i;
13073
13074   if (GET_CODE (x) == SYMBOL_REF)
13075     return 1;
13076
13077   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13078      are constant offsets, not symbols.  */
13079   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13080     return 0;
13081
13082   fmt = GET_RTX_FORMAT (GET_CODE (x));
13083
13084   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13085     {
13086       if (fmt[i] == 'E')
13087         {
13088           int j;
13089
13090           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13091             if (symbol_mentioned_p (XVECEXP (x, i, j)))
13092               return 1;
13093         }
13094       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13095         return 1;
13096     }
13097
13098   return 0;
13099 }
13100
13101 /* Return TRUE if X references a LABEL_REF.  */
13102 int
13103 label_mentioned_p (rtx x)
13104 {
13105   const char * fmt;
13106   int i;
13107
13108   if (GET_CODE (x) == LABEL_REF)
13109     return 1;
13110
13111   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13112      instruction, but they are constant offsets, not symbols.  */
13113   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13114     return 0;
13115
13116   fmt = GET_RTX_FORMAT (GET_CODE (x));
13117   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13118     {
13119       if (fmt[i] == 'E')
13120         {
13121           int j;
13122
13123           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13124             if (label_mentioned_p (XVECEXP (x, i, j)))
13125               return 1;
13126         }
13127       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13128         return 1;
13129     }
13130
13131   return 0;
13132 }
13133
13134 int
13135 tls_mentioned_p (rtx x)
13136 {
13137   switch (GET_CODE (x))
13138     {
13139     case CONST:
13140       return tls_mentioned_p (XEXP (x, 0));
13141
13142     case UNSPEC:
13143       if (XINT (x, 1) == UNSPEC_TLS)
13144         return 1;
13145
13146     default:
13147       return 0;
13148     }
13149 }
13150
13151 /* Must not copy any rtx that uses a pc-relative address.  */
13152
13153 static bool
13154 arm_cannot_copy_insn_p (rtx_insn *insn)
13155 {
13156   /* The tls call insn cannot be copied, as it is paired with a data
13157      word.  */
13158   if (recog_memoized (insn) == CODE_FOR_tlscall)
13159     return true;
13160
13161   subrtx_iterator::array_type array;
13162   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13163     {
13164       const_rtx x = *iter;
13165       if (GET_CODE (x) == UNSPEC
13166           && (XINT (x, 1) == UNSPEC_PIC_BASE
13167               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13168         return true;
13169     }
13170   return false;
13171 }
13172
13173 enum rtx_code
13174 minmax_code (rtx x)
13175 {
13176   enum rtx_code code = GET_CODE (x);
13177
13178   switch (code)
13179     {
13180     case SMAX:
13181       return GE;
13182     case SMIN:
13183       return LE;
13184     case UMIN:
13185       return LEU;
13186     case UMAX:
13187       return GEU;
13188     default:
13189       gcc_unreachable ();
13190     }
13191 }
13192
13193 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
13194
13195 bool
13196 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13197                         int *mask, bool *signed_sat)
13198 {
13199   /* The high bound must be a power of two minus one.  */
13200   int log = exact_log2 (INTVAL (hi_bound) + 1);
13201   if (log == -1)
13202     return false;
13203
13204   /* The low bound is either zero (for usat) or one less than the
13205      negation of the high bound (for ssat).  */
13206   if (INTVAL (lo_bound) == 0)
13207     {
13208       if (mask)
13209         *mask = log;
13210       if (signed_sat)
13211         *signed_sat = false;
13212
13213       return true;
13214     }
13215
13216   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13217     {
13218       if (mask)
13219         *mask = log + 1;
13220       if (signed_sat)
13221         *signed_sat = true;
13222
13223       return true;
13224     }
13225
13226   return false;
13227 }
13228
13229 /* Return 1 if memory locations are adjacent.  */
13230 int
13231 adjacent_mem_locations (rtx a, rtx b)
13232 {
13233   /* We don't guarantee to preserve the order of these memory refs.  */
13234   if (volatile_refs_p (a) || volatile_refs_p (b))
13235     return 0;
13236
13237   if ((REG_P (XEXP (a, 0))
13238        || (GET_CODE (XEXP (a, 0)) == PLUS
13239            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13240       && (REG_P (XEXP (b, 0))
13241           || (GET_CODE (XEXP (b, 0)) == PLUS
13242               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13243     {
13244       HOST_WIDE_INT val0 = 0, val1 = 0;
13245       rtx reg0, reg1;
13246       int val_diff;
13247
13248       if (GET_CODE (XEXP (a, 0)) == PLUS)
13249         {
13250           reg0 = XEXP (XEXP (a, 0), 0);
13251           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13252         }
13253       else
13254         reg0 = XEXP (a, 0);
13255
13256       if (GET_CODE (XEXP (b, 0)) == PLUS)
13257         {
13258           reg1 = XEXP (XEXP (b, 0), 0);
13259           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13260         }
13261       else
13262         reg1 = XEXP (b, 0);
13263
13264       /* Don't accept any offset that will require multiple
13265          instructions to handle, since this would cause the
13266          arith_adjacentmem pattern to output an overlong sequence.  */
13267       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13268         return 0;
13269
13270       /* Don't allow an eliminable register: register elimination can make
13271          the offset too large.  */
13272       if (arm_eliminable_register (reg0))
13273         return 0;
13274
13275       val_diff = val1 - val0;
13276
13277       if (arm_ld_sched)
13278         {
13279           /* If the target has load delay slots, then there's no benefit
13280              to using an ldm instruction unless the offset is zero and
13281              we are optimizing for size.  */
13282           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13283                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13284                   && (val_diff == 4 || val_diff == -4));
13285         }
13286
13287       return ((REGNO (reg0) == REGNO (reg1))
13288               && (val_diff == 4 || val_diff == -4));
13289     }
13290
13291   return 0;
13292 }
13293
13294 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13295    for load operations, false for store operations.  CONSECUTIVE is true
13296    if the register numbers in the operation must be consecutive in the register
13297    bank. RETURN_PC is true if value is to be loaded in PC.
13298    The pattern we are trying to match for load is:
13299      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13300       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13301        :
13302        :
13303       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13304      ]
13305      where
13306      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13307      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13308      3.  If consecutive is TRUE, then for kth register being loaded,
13309          REGNO (R_dk) = REGNO (R_d0) + k.
13310    The pattern for store is similar.  */
13311 bool
13312 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13313                      bool consecutive, bool return_pc)
13314 {
13315   HOST_WIDE_INT count = XVECLEN (op, 0);
13316   rtx reg, mem, addr;
13317   unsigned regno;
13318   unsigned first_regno;
13319   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13320   rtx elt;
13321   bool addr_reg_in_reglist = false;
13322   bool update = false;
13323   int reg_increment;
13324   int offset_adj;
13325   int regs_per_val;
13326
13327   /* If not in SImode, then registers must be consecutive
13328      (e.g., VLDM instructions for DFmode).  */
13329   gcc_assert ((mode == SImode) || consecutive);
13330   /* Setting return_pc for stores is illegal.  */
13331   gcc_assert (!return_pc || load);
13332
13333   /* Set up the increments and the regs per val based on the mode.  */
13334   reg_increment = GET_MODE_SIZE (mode);
13335   regs_per_val = reg_increment / 4;
13336   offset_adj = return_pc ? 1 : 0;
13337
13338   if (count <= 1
13339       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13340       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13341     return false;
13342
13343   /* Check if this is a write-back.  */
13344   elt = XVECEXP (op, 0, offset_adj);
13345   if (GET_CODE (SET_SRC (elt)) == PLUS)
13346     {
13347       i++;
13348       base = 1;
13349       update = true;
13350
13351       /* The offset adjustment must be the number of registers being
13352          popped times the size of a single register.  */
13353       if (!REG_P (SET_DEST (elt))
13354           || !REG_P (XEXP (SET_SRC (elt), 0))
13355           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13356           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13357           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13358              ((count - 1 - offset_adj) * reg_increment))
13359         return false;
13360     }
13361
13362   i = i + offset_adj;
13363   base = base + offset_adj;
13364   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13365      success depends on the type: VLDM can do just one reg,
13366      LDM must do at least two.  */
13367   if ((count <= i) && (mode == SImode))
13368       return false;
13369
13370   elt = XVECEXP (op, 0, i - 1);
13371   if (GET_CODE (elt) != SET)
13372     return false;
13373
13374   if (load)
13375     {
13376       reg = SET_DEST (elt);
13377       mem = SET_SRC (elt);
13378     }
13379   else
13380     {
13381       reg = SET_SRC (elt);
13382       mem = SET_DEST (elt);
13383     }
13384
13385   if (!REG_P (reg) || !MEM_P (mem))
13386     return false;
13387
13388   regno = REGNO (reg);
13389   first_regno = regno;
13390   addr = XEXP (mem, 0);
13391   if (GET_CODE (addr) == PLUS)
13392     {
13393       if (!CONST_INT_P (XEXP (addr, 1)))
13394         return false;
13395
13396       offset = INTVAL (XEXP (addr, 1));
13397       addr = XEXP (addr, 0);
13398     }
13399
13400   if (!REG_P (addr))
13401     return false;
13402
13403   /* Don't allow SP to be loaded unless it is also the base register. It
13404      guarantees that SP is reset correctly when an LDM instruction
13405      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13406   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13407     return false;
13408
13409   for (; i < count; i++)
13410     {
13411       elt = XVECEXP (op, 0, i);
13412       if (GET_CODE (elt) != SET)
13413         return false;
13414
13415       if (load)
13416         {
13417           reg = SET_DEST (elt);
13418           mem = SET_SRC (elt);
13419         }
13420       else
13421         {
13422           reg = SET_SRC (elt);
13423           mem = SET_DEST (elt);
13424         }
13425
13426       if (!REG_P (reg)
13427           || GET_MODE (reg) != mode
13428           || REGNO (reg) <= regno
13429           || (consecutive
13430               && (REGNO (reg) !=
13431                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13432           /* Don't allow SP to be loaded unless it is also the base register. It
13433              guarantees that SP is reset correctly when an LDM instruction
13434              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13435           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13436           || !MEM_P (mem)
13437           || GET_MODE (mem) != mode
13438           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13439                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13440                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13441                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13442                    offset + (i - base) * reg_increment))
13443               && (!REG_P (XEXP (mem, 0))
13444                   || offset + (i - base) * reg_increment != 0)))
13445         return false;
13446
13447       regno = REGNO (reg);
13448       if (regno == REGNO (addr))
13449         addr_reg_in_reglist = true;
13450     }
13451
13452   if (load)
13453     {
13454       if (update && addr_reg_in_reglist)
13455         return false;
13456
13457       /* For Thumb-1, address register is always modified - either by write-back
13458          or by explicit load.  If the pattern does not describe an update,
13459          then the address register must be in the list of loaded registers.  */
13460       if (TARGET_THUMB1)
13461         return update || addr_reg_in_reglist;
13462     }
13463
13464   return true;
13465 }
13466
13467 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13468    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13469    instruction.  ADD_OFFSET is nonzero if the base address register needs
13470    to be modified with an add instruction before we can use it.  */
13471
13472 static bool
13473 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13474                                  int nops, HOST_WIDE_INT add_offset)
13475  {
13476   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13477      if the offset isn't small enough.  The reason 2 ldrs are faster
13478      is because these ARMs are able to do more than one cache access
13479      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13480      whilst the ARM8 has a double bandwidth cache.  This means that
13481      these cores can do both an instruction fetch and a data fetch in
13482      a single cycle, so the trick of calculating the address into a
13483      scratch register (one of the result regs) and then doing a load
13484      multiple actually becomes slower (and no smaller in code size).
13485      That is the transformation
13486
13487         ldr     rd1, [rbase + offset]
13488         ldr     rd2, [rbase + offset + 4]
13489
13490      to
13491
13492         add     rd1, rbase, offset
13493         ldmia   rd1, {rd1, rd2}
13494
13495      produces worse code -- '3 cycles + any stalls on rd2' instead of
13496      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13497      access per cycle, the first sequence could never complete in less
13498      than 6 cycles, whereas the ldm sequence would only take 5 and
13499      would make better use of sequential accesses if not hitting the
13500      cache.
13501
13502      We cheat here and test 'arm_ld_sched' which we currently know to
13503      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13504      changes, then the test below needs to be reworked.  */
13505   if (nops == 2 && arm_ld_sched && add_offset != 0)
13506     return false;
13507
13508   /* XScale has load-store double instructions, but they have stricter
13509      alignment requirements than load-store multiple, so we cannot
13510      use them.
13511
13512      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13513      the pipeline until completion.
13514
13515         NREGS           CYCLES
13516           1               3
13517           2               4
13518           3               5
13519           4               6
13520
13521      An ldr instruction takes 1-3 cycles, but does not block the
13522      pipeline.
13523
13524         NREGS           CYCLES
13525           1              1-3
13526           2              2-6
13527           3              3-9
13528           4              4-12
13529
13530      Best case ldr will always win.  However, the more ldr instructions
13531      we issue, the less likely we are to be able to schedule them well.
13532      Using ldr instructions also increases code size.
13533
13534      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13535      for counts of 3 or 4 regs.  */
13536   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13537     return false;
13538   return true;
13539 }
13540
13541 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13542    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13543    an array ORDER which describes the sequence to use when accessing the
13544    offsets that produces an ascending order.  In this sequence, each
13545    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13546    must have been filled in with the lowest offset by the caller.
13547    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13548    we use to verify that ORDER produces an ascending order of registers.
13549    Return true if it was possible to construct such an order, false if
13550    not.  */
13551
13552 static bool
13553 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13554                       int *unsorted_regs)
13555 {
13556   int i;
13557   for (i = 1; i < nops; i++)
13558     {
13559       int j;
13560
13561       order[i] = order[i - 1];
13562       for (j = 0; j < nops; j++)
13563         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13564           {
13565             /* We must find exactly one offset that is higher than the
13566                previous one by 4.  */
13567             if (order[i] != order[i - 1])
13568               return false;
13569             order[i] = j;
13570           }
13571       if (order[i] == order[i - 1])
13572         return false;
13573       /* The register numbers must be ascending.  */
13574       if (unsorted_regs != NULL
13575           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13576         return false;
13577     }
13578   return true;
13579 }
13580
13581 /* Used to determine in a peephole whether a sequence of load
13582    instructions can be changed into a load-multiple instruction.
13583    NOPS is the number of separate load instructions we are examining.  The
13584    first NOPS entries in OPERANDS are the destination registers, the
13585    next NOPS entries are memory operands.  If this function is
13586    successful, *BASE is set to the common base register of the memory
13587    accesses; *LOAD_OFFSET is set to the first memory location's offset
13588    from that base register.
13589    REGS is an array filled in with the destination register numbers.
13590    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13591    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13592    the sequence of registers in REGS matches the loads from ascending memory
13593    locations, and the function verifies that the register numbers are
13594    themselves ascending.  If CHECK_REGS is false, the register numbers
13595    are stored in the order they are found in the operands.  */
13596 static int
13597 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13598                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13599 {
13600   int unsorted_regs[MAX_LDM_STM_OPS];
13601   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13602   int order[MAX_LDM_STM_OPS];
13603   rtx base_reg_rtx = NULL;
13604   int base_reg = -1;
13605   int i, ldm_case;
13606
13607   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13608      easily extended if required.  */
13609   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13610
13611   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13612
13613   /* Loop over the operands and check that the memory references are
13614      suitable (i.e. immediate offsets from the same base register).  At
13615      the same time, extract the target register, and the memory
13616      offsets.  */
13617   for (i = 0; i < nops; i++)
13618     {
13619       rtx reg;
13620       rtx offset;
13621
13622       /* Convert a subreg of a mem into the mem itself.  */
13623       if (GET_CODE (operands[nops + i]) == SUBREG)
13624         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13625
13626       gcc_assert (MEM_P (operands[nops + i]));
13627
13628       /* Don't reorder volatile memory references; it doesn't seem worth
13629          looking for the case where the order is ok anyway.  */
13630       if (MEM_VOLATILE_P (operands[nops + i]))
13631         return 0;
13632
13633       offset = const0_rtx;
13634
13635       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13636            || (GET_CODE (reg) == SUBREG
13637                && REG_P (reg = SUBREG_REG (reg))))
13638           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13639               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13640                   || (GET_CODE (reg) == SUBREG
13641                       && REG_P (reg = SUBREG_REG (reg))))
13642               && (CONST_INT_P (offset
13643                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13644         {
13645           if (i == 0)
13646             {
13647               base_reg = REGNO (reg);
13648               base_reg_rtx = reg;
13649               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13650                 return 0;
13651             }
13652           else if (base_reg != (int) REGNO (reg))
13653             /* Not addressed from the same base register.  */
13654             return 0;
13655
13656           unsorted_regs[i] = (REG_P (operands[i])
13657                               ? REGNO (operands[i])
13658                               : REGNO (SUBREG_REG (operands[i])));
13659
13660           /* If it isn't an integer register, or if it overwrites the
13661              base register but isn't the last insn in the list, then
13662              we can't do this.  */
13663           if (unsorted_regs[i] < 0
13664               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13665               || unsorted_regs[i] > 14
13666               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13667             return 0;
13668
13669           /* Don't allow SP to be loaded unless it is also the base
13670              register.  It guarantees that SP is reset correctly when
13671              an LDM instruction is interrupted.  Otherwise, we might
13672              end up with a corrupt stack.  */
13673           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13674             return 0;
13675
13676           unsorted_offsets[i] = INTVAL (offset);
13677           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13678             order[0] = i;
13679         }
13680       else
13681         /* Not a suitable memory address.  */
13682         return 0;
13683     }
13684
13685   /* All the useful information has now been extracted from the
13686      operands into unsorted_regs and unsorted_offsets; additionally,
13687      order[0] has been set to the lowest offset in the list.  Sort
13688      the offsets into order, verifying that they are adjacent, and
13689      check that the register numbers are ascending.  */
13690   if (!compute_offset_order (nops, unsorted_offsets, order,
13691                              check_regs ? unsorted_regs : NULL))
13692     return 0;
13693
13694   if (saved_order)
13695     memcpy (saved_order, order, sizeof order);
13696
13697   if (base)
13698     {
13699       *base = base_reg;
13700
13701       for (i = 0; i < nops; i++)
13702         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13703
13704       *load_offset = unsorted_offsets[order[0]];
13705     }
13706
13707   if (TARGET_THUMB1
13708       && !peep2_reg_dead_p (nops, base_reg_rtx))
13709     return 0;
13710
13711   if (unsorted_offsets[order[0]] == 0)
13712     ldm_case = 1; /* ldmia */
13713   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13714     ldm_case = 2; /* ldmib */
13715   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13716     ldm_case = 3; /* ldmda */
13717   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13718     ldm_case = 4; /* ldmdb */
13719   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13720            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13721     ldm_case = 5;
13722   else
13723     return 0;
13724
13725   if (!multiple_operation_profitable_p (false, nops,
13726                                         ldm_case == 5
13727                                         ? unsorted_offsets[order[0]] : 0))
13728     return 0;
13729
13730   return ldm_case;
13731 }
13732
13733 /* Used to determine in a peephole whether a sequence of store instructions can
13734    be changed into a store-multiple instruction.
13735    NOPS is the number of separate store instructions we are examining.
13736    NOPS_TOTAL is the total number of instructions recognized by the peephole
13737    pattern.
13738    The first NOPS entries in OPERANDS are the source registers, the next
13739    NOPS entries are memory operands.  If this function is successful, *BASE is
13740    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13741    to the first memory location's offset from that base register.  REGS is an
13742    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13743    likewise filled with the corresponding rtx's.
13744    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13745    numbers to an ascending order of stores.
13746    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13747    from ascending memory locations, and the function verifies that the register
13748    numbers are themselves ascending.  If CHECK_REGS is false, the register
13749    numbers are stored in the order they are found in the operands.  */
13750 static int
13751 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13752                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13753                          HOST_WIDE_INT *load_offset, bool check_regs)
13754 {
13755   int unsorted_regs[MAX_LDM_STM_OPS];
13756   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13757   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13758   int order[MAX_LDM_STM_OPS];
13759   int base_reg = -1;
13760   rtx base_reg_rtx = NULL;
13761   int i, stm_case;
13762
13763   /* Write back of base register is currently only supported for Thumb 1.  */
13764   int base_writeback = TARGET_THUMB1;
13765
13766   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13767      easily extended if required.  */
13768   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13769
13770   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13771
13772   /* Loop over the operands and check that the memory references are
13773      suitable (i.e. immediate offsets from the same base register).  At
13774      the same time, extract the target register, and the memory
13775      offsets.  */
13776   for (i = 0; i < nops; i++)
13777     {
13778       rtx reg;
13779       rtx offset;
13780
13781       /* Convert a subreg of a mem into the mem itself.  */
13782       if (GET_CODE (operands[nops + i]) == SUBREG)
13783         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13784
13785       gcc_assert (MEM_P (operands[nops + i]));
13786
13787       /* Don't reorder volatile memory references; it doesn't seem worth
13788          looking for the case where the order is ok anyway.  */
13789       if (MEM_VOLATILE_P (operands[nops + i]))
13790         return 0;
13791
13792       offset = const0_rtx;
13793
13794       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13795            || (GET_CODE (reg) == SUBREG
13796                && REG_P (reg = SUBREG_REG (reg))))
13797           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13798               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13799                   || (GET_CODE (reg) == SUBREG
13800                       && REG_P (reg = SUBREG_REG (reg))))
13801               && (CONST_INT_P (offset
13802                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13803         {
13804           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13805                                   ? operands[i] : SUBREG_REG (operands[i]));
13806           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13807
13808           if (i == 0)
13809             {
13810               base_reg = REGNO (reg);
13811               base_reg_rtx = reg;
13812               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13813                 return 0;
13814             }
13815           else if (base_reg != (int) REGNO (reg))
13816             /* Not addressed from the same base register.  */
13817             return 0;
13818
13819           /* If it isn't an integer register, then we can't do this.  */
13820           if (unsorted_regs[i] < 0
13821               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13822               /* The effects are unpredictable if the base register is
13823                  both updated and stored.  */
13824               || (base_writeback && unsorted_regs[i] == base_reg)
13825               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13826               || unsorted_regs[i] > 14)
13827             return 0;
13828
13829           unsorted_offsets[i] = INTVAL (offset);
13830           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13831             order[0] = i;
13832         }
13833       else
13834         /* Not a suitable memory address.  */
13835         return 0;
13836     }
13837
13838   /* All the useful information has now been extracted from the
13839      operands into unsorted_regs and unsorted_offsets; additionally,
13840      order[0] has been set to the lowest offset in the list.  Sort
13841      the offsets into order, verifying that they are adjacent, and
13842      check that the register numbers are ascending.  */
13843   if (!compute_offset_order (nops, unsorted_offsets, order,
13844                              check_regs ? unsorted_regs : NULL))
13845     return 0;
13846
13847   if (saved_order)
13848     memcpy (saved_order, order, sizeof order);
13849
13850   if (base)
13851     {
13852       *base = base_reg;
13853
13854       for (i = 0; i < nops; i++)
13855         {
13856           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13857           if (reg_rtxs)
13858             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13859         }
13860
13861       *load_offset = unsorted_offsets[order[0]];
13862     }
13863
13864   if (TARGET_THUMB1
13865       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13866     return 0;
13867
13868   if (unsorted_offsets[order[0]] == 0)
13869     stm_case = 1; /* stmia */
13870   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13871     stm_case = 2; /* stmib */
13872   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13873     stm_case = 3; /* stmda */
13874   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13875     stm_case = 4; /* stmdb */
13876   else
13877     return 0;
13878
13879   if (!multiple_operation_profitable_p (false, nops, 0))
13880     return 0;
13881
13882   return stm_case;
13883 }
13884 \f
13885 /* Routines for use in generating RTL.  */
13886
13887 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13888    the instruction; REGS and MEMS are arrays containing the operands.
13889    BASEREG is the base register to be used in addressing the memory operands.
13890    WBACK_OFFSET is nonzero if the instruction should update the base
13891    register.  */
13892
13893 static rtx
13894 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13895                          HOST_WIDE_INT wback_offset)
13896 {
13897   int i = 0, j;
13898   rtx result;
13899
13900   if (!multiple_operation_profitable_p (false, count, 0))
13901     {
13902       rtx seq;
13903
13904       start_sequence ();
13905
13906       for (i = 0; i < count; i++)
13907         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13908
13909       if (wback_offset != 0)
13910         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13911
13912       seq = get_insns ();
13913       end_sequence ();
13914
13915       return seq;
13916     }
13917
13918   result = gen_rtx_PARALLEL (VOIDmode,
13919                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13920   if (wback_offset != 0)
13921     {
13922       XVECEXP (result, 0, 0)
13923         = gen_rtx_SET (VOIDmode, basereg,
13924                        plus_constant (Pmode, basereg, wback_offset));
13925       i = 1;
13926       count++;
13927     }
13928
13929   for (j = 0; i < count; i++, j++)
13930     XVECEXP (result, 0, i)
13931       = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13932
13933   return result;
13934 }
13935
13936 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13937    the instruction; REGS and MEMS are arrays containing the operands.
13938    BASEREG is the base register to be used in addressing the memory operands.
13939    WBACK_OFFSET is nonzero if the instruction should update the base
13940    register.  */
13941
13942 static rtx
13943 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13944                           HOST_WIDE_INT wback_offset)
13945 {
13946   int i = 0, j;
13947   rtx result;
13948
13949   if (GET_CODE (basereg) == PLUS)
13950     basereg = XEXP (basereg, 0);
13951
13952   if (!multiple_operation_profitable_p (false, count, 0))
13953     {
13954       rtx seq;
13955
13956       start_sequence ();
13957
13958       for (i = 0; i < count; i++)
13959         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13960
13961       if (wback_offset != 0)
13962         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13963
13964       seq = get_insns ();
13965       end_sequence ();
13966
13967       return seq;
13968     }
13969
13970   result = gen_rtx_PARALLEL (VOIDmode,
13971                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13972   if (wback_offset != 0)
13973     {
13974       XVECEXP (result, 0, 0)
13975         = gen_rtx_SET (VOIDmode, basereg,
13976                        plus_constant (Pmode, basereg, wback_offset));
13977       i = 1;
13978       count++;
13979     }
13980
13981   for (j = 0; i < count; i++, j++)
13982     XVECEXP (result, 0, i)
13983       = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13984
13985   return result;
13986 }
13987
13988 /* Generate either a load-multiple or a store-multiple instruction.  This
13989    function can be used in situations where we can start with a single MEM
13990    rtx and adjust its address upwards.
13991    COUNT is the number of operations in the instruction, not counting a
13992    possible update of the base register.  REGS is an array containing the
13993    register operands.
13994    BASEREG is the base register to be used in addressing the memory operands,
13995    which are constructed from BASEMEM.
13996    WRITE_BACK specifies whether the generated instruction should include an
13997    update of the base register.
13998    OFFSETP is used to pass an offset to and from this function; this offset
13999    is not used when constructing the address (instead BASEMEM should have an
14000    appropriate offset in its address), it is used only for setting
14001    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14002
14003 static rtx
14004 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14005                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14006 {
14007   rtx mems[MAX_LDM_STM_OPS];
14008   HOST_WIDE_INT offset = *offsetp;
14009   int i;
14010
14011   gcc_assert (count <= MAX_LDM_STM_OPS);
14012
14013   if (GET_CODE (basereg) == PLUS)
14014     basereg = XEXP (basereg, 0);
14015
14016   for (i = 0; i < count; i++)
14017     {
14018       rtx addr = plus_constant (Pmode, basereg, i * 4);
14019       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14020       offset += 4;
14021     }
14022
14023   if (write_back)
14024     *offsetp = offset;
14025
14026   if (is_load)
14027     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14028                                     write_back ? 4 * count : 0);
14029   else
14030     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14031                                      write_back ? 4 * count : 0);
14032 }
14033
14034 rtx
14035 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14036                        rtx basemem, HOST_WIDE_INT *offsetp)
14037 {
14038   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14039                               offsetp);
14040 }
14041
14042 rtx
14043 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14044                         rtx basemem, HOST_WIDE_INT *offsetp)
14045 {
14046   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14047                               offsetp);
14048 }
14049
14050 /* Called from a peephole2 expander to turn a sequence of loads into an
14051    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14052    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14053    is true if we can reorder the registers because they are used commutatively
14054    subsequently.
14055    Returns true iff we could generate a new instruction.  */
14056
14057 bool
14058 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14059 {
14060   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14061   rtx mems[MAX_LDM_STM_OPS];
14062   int i, j, base_reg;
14063   rtx base_reg_rtx;
14064   HOST_WIDE_INT offset;
14065   int write_back = FALSE;
14066   int ldm_case;
14067   rtx addr;
14068
14069   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14070                                      &base_reg, &offset, !sort_regs);
14071
14072   if (ldm_case == 0)
14073     return false;
14074
14075   if (sort_regs)
14076     for (i = 0; i < nops - 1; i++)
14077       for (j = i + 1; j < nops; j++)
14078         if (regs[i] > regs[j])
14079           {
14080             int t = regs[i];
14081             regs[i] = regs[j];
14082             regs[j] = t;
14083           }
14084   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14085
14086   if (TARGET_THUMB1)
14087     {
14088       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14089       gcc_assert (ldm_case == 1 || ldm_case == 5);
14090       write_back = TRUE;
14091     }
14092
14093   if (ldm_case == 5)
14094     {
14095       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14096       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14097       offset = 0;
14098       if (!TARGET_THUMB1)
14099         {
14100           base_reg = regs[0];
14101           base_reg_rtx = newbase;
14102         }
14103     }
14104
14105   for (i = 0; i < nops; i++)
14106     {
14107       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14108       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14109                                               SImode, addr, 0);
14110     }
14111   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14112                                       write_back ? offset + i * 4 : 0));
14113   return true;
14114 }
14115
14116 /* Called from a peephole2 expander to turn a sequence of stores into an
14117    STM instruction.  OPERANDS are the operands found by the peephole matcher;
14118    NOPS indicates how many separate stores we are trying to combine.
14119    Returns true iff we could generate a new instruction.  */
14120
14121 bool
14122 gen_stm_seq (rtx *operands, int nops)
14123 {
14124   int i;
14125   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14126   rtx mems[MAX_LDM_STM_OPS];
14127   int base_reg;
14128   rtx base_reg_rtx;
14129   HOST_WIDE_INT offset;
14130   int write_back = FALSE;
14131   int stm_case;
14132   rtx addr;
14133   bool base_reg_dies;
14134
14135   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14136                                       mem_order, &base_reg, &offset, true);
14137
14138   if (stm_case == 0)
14139     return false;
14140
14141   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14142
14143   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14144   if (TARGET_THUMB1)
14145     {
14146       gcc_assert (base_reg_dies);
14147       write_back = TRUE;
14148     }
14149
14150   if (stm_case == 5)
14151     {
14152       gcc_assert (base_reg_dies);
14153       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14154       offset = 0;
14155     }
14156
14157   addr = plus_constant (Pmode, base_reg_rtx, offset);
14158
14159   for (i = 0; i < nops; i++)
14160     {
14161       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14162       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14163                                               SImode, addr, 0);
14164     }
14165   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14166                                        write_back ? offset + i * 4 : 0));
14167   return true;
14168 }
14169
14170 /* Called from a peephole2 expander to turn a sequence of stores that are
14171    preceded by constant loads into an STM instruction.  OPERANDS are the
14172    operands found by the peephole matcher; NOPS indicates how many
14173    separate stores we are trying to combine; there are 2 * NOPS
14174    instructions in the peephole.
14175    Returns true iff we could generate a new instruction.  */
14176
14177 bool
14178 gen_const_stm_seq (rtx *operands, int nops)
14179 {
14180   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14181   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14182   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14183   rtx mems[MAX_LDM_STM_OPS];
14184   int base_reg;
14185   rtx base_reg_rtx;
14186   HOST_WIDE_INT offset;
14187   int write_back = FALSE;
14188   int stm_case;
14189   rtx addr;
14190   bool base_reg_dies;
14191   int i, j;
14192   HARD_REG_SET allocated;
14193
14194   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14195                                       mem_order, &base_reg, &offset, false);
14196
14197   if (stm_case == 0)
14198     return false;
14199
14200   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14201
14202   /* If the same register is used more than once, try to find a free
14203      register.  */
14204   CLEAR_HARD_REG_SET (allocated);
14205   for (i = 0; i < nops; i++)
14206     {
14207       for (j = i + 1; j < nops; j++)
14208         if (regs[i] == regs[j])
14209           {
14210             rtx t = peep2_find_free_register (0, nops * 2,
14211                                               TARGET_THUMB1 ? "l" : "r",
14212                                               SImode, &allocated);
14213             if (t == NULL_RTX)
14214               return false;
14215             reg_rtxs[i] = t;
14216             regs[i] = REGNO (t);
14217           }
14218     }
14219
14220   /* Compute an ordering that maps the register numbers to an ascending
14221      sequence.  */
14222   reg_order[0] = 0;
14223   for (i = 0; i < nops; i++)
14224     if (regs[i] < regs[reg_order[0]])
14225       reg_order[0] = i;
14226
14227   for (i = 1; i < nops; i++)
14228     {
14229       int this_order = reg_order[i - 1];
14230       for (j = 0; j < nops; j++)
14231         if (regs[j] > regs[reg_order[i - 1]]
14232             && (this_order == reg_order[i - 1]
14233                 || regs[j] < regs[this_order]))
14234           this_order = j;
14235       reg_order[i] = this_order;
14236     }
14237
14238   /* Ensure that registers that must be live after the instruction end
14239      up with the correct value.  */
14240   for (i = 0; i < nops; i++)
14241     {
14242       int this_order = reg_order[i];
14243       if ((this_order != mem_order[i]
14244            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14245           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14246         return false;
14247     }
14248
14249   /* Load the constants.  */
14250   for (i = 0; i < nops; i++)
14251     {
14252       rtx op = operands[2 * nops + mem_order[i]];
14253       sorted_regs[i] = regs[reg_order[i]];
14254       emit_move_insn (reg_rtxs[reg_order[i]], op);
14255     }
14256
14257   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14258
14259   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14260   if (TARGET_THUMB1)
14261     {
14262       gcc_assert (base_reg_dies);
14263       write_back = TRUE;
14264     }
14265
14266   if (stm_case == 5)
14267     {
14268       gcc_assert (base_reg_dies);
14269       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14270       offset = 0;
14271     }
14272
14273   addr = plus_constant (Pmode, base_reg_rtx, offset);
14274
14275   for (i = 0; i < nops; i++)
14276     {
14277       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14278       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14279                                               SImode, addr, 0);
14280     }
14281   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14282                                        write_back ? offset + i * 4 : 0));
14283   return true;
14284 }
14285
14286 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14287    unaligned copies on processors which support unaligned semantics for those
14288    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14289    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14290    An interleave factor of 1 (the minimum) will perform no interleaving.
14291    Load/store multiple are used for aligned addresses where possible.  */
14292
14293 static void
14294 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14295                                    HOST_WIDE_INT length,
14296                                    unsigned int interleave_factor)
14297 {
14298   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14299   int *regnos = XALLOCAVEC (int, interleave_factor);
14300   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14301   HOST_WIDE_INT i, j;
14302   HOST_WIDE_INT remaining = length, words;
14303   rtx halfword_tmp = NULL, byte_tmp = NULL;
14304   rtx dst, src;
14305   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14306   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14307   HOST_WIDE_INT srcoffset, dstoffset;
14308   HOST_WIDE_INT src_autoinc, dst_autoinc;
14309   rtx mem, addr;
14310
14311   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14312
14313   /* Use hard registers if we have aligned source or destination so we can use
14314      load/store multiple with contiguous registers.  */
14315   if (dst_aligned || src_aligned)
14316     for (i = 0; i < interleave_factor; i++)
14317       regs[i] = gen_rtx_REG (SImode, i);
14318   else
14319     for (i = 0; i < interleave_factor; i++)
14320       regs[i] = gen_reg_rtx (SImode);
14321
14322   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14323   src = copy_addr_to_reg (XEXP (srcbase, 0));
14324
14325   srcoffset = dstoffset = 0;
14326
14327   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14328      For copying the last bytes we want to subtract this offset again.  */
14329   src_autoinc = dst_autoinc = 0;
14330
14331   for (i = 0; i < interleave_factor; i++)
14332     regnos[i] = i;
14333
14334   /* Copy BLOCK_SIZE_BYTES chunks.  */
14335
14336   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14337     {
14338       /* Load words.  */
14339       if (src_aligned && interleave_factor > 1)
14340         {
14341           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14342                                             TRUE, srcbase, &srcoffset));
14343           src_autoinc += UNITS_PER_WORD * interleave_factor;
14344         }
14345       else
14346         {
14347           for (j = 0; j < interleave_factor; j++)
14348             {
14349               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14350                                                  - src_autoinc));
14351               mem = adjust_automodify_address (srcbase, SImode, addr,
14352                                                srcoffset + j * UNITS_PER_WORD);
14353               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14354             }
14355           srcoffset += block_size_bytes;
14356         }
14357
14358       /* Store words.  */
14359       if (dst_aligned && interleave_factor > 1)
14360         {
14361           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14362                                              TRUE, dstbase, &dstoffset));
14363           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14364         }
14365       else
14366         {
14367           for (j = 0; j < interleave_factor; j++)
14368             {
14369               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14370                                                  - dst_autoinc));
14371               mem = adjust_automodify_address (dstbase, SImode, addr,
14372                                                dstoffset + j * UNITS_PER_WORD);
14373               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14374             }
14375           dstoffset += block_size_bytes;
14376         }
14377
14378       remaining -= block_size_bytes;
14379     }
14380
14381   /* Copy any whole words left (note these aren't interleaved with any
14382      subsequent halfword/byte load/stores in the interests of simplicity).  */
14383
14384   words = remaining / UNITS_PER_WORD;
14385
14386   gcc_assert (words < interleave_factor);
14387
14388   if (src_aligned && words > 1)
14389     {
14390       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14391                                         &srcoffset));
14392       src_autoinc += UNITS_PER_WORD * words;
14393     }
14394   else
14395     {
14396       for (j = 0; j < words; j++)
14397         {
14398           addr = plus_constant (Pmode, src,
14399                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14400           mem = adjust_automodify_address (srcbase, SImode, addr,
14401                                            srcoffset + j * UNITS_PER_WORD);
14402           emit_insn (gen_unaligned_loadsi (regs[j], mem));
14403         }
14404       srcoffset += words * UNITS_PER_WORD;
14405     }
14406
14407   if (dst_aligned && words > 1)
14408     {
14409       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14410                                          &dstoffset));
14411       dst_autoinc += words * UNITS_PER_WORD;
14412     }
14413   else
14414     {
14415       for (j = 0; j < words; j++)
14416         {
14417           addr = plus_constant (Pmode, dst,
14418                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14419           mem = adjust_automodify_address (dstbase, SImode, addr,
14420                                            dstoffset + j * UNITS_PER_WORD);
14421           emit_insn (gen_unaligned_storesi (mem, regs[j]));
14422         }
14423       dstoffset += words * UNITS_PER_WORD;
14424     }
14425
14426   remaining -= words * UNITS_PER_WORD;
14427
14428   gcc_assert (remaining < 4);
14429
14430   /* Copy a halfword if necessary.  */
14431
14432   if (remaining >= 2)
14433     {
14434       halfword_tmp = gen_reg_rtx (SImode);
14435
14436       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14437       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14438       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14439
14440       /* Either write out immediately, or delay until we've loaded the last
14441          byte, depending on interleave factor.  */
14442       if (interleave_factor == 1)
14443         {
14444           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14445           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14446           emit_insn (gen_unaligned_storehi (mem,
14447                        gen_lowpart (HImode, halfword_tmp)));
14448           halfword_tmp = NULL;
14449           dstoffset += 2;
14450         }
14451
14452       remaining -= 2;
14453       srcoffset += 2;
14454     }
14455
14456   gcc_assert (remaining < 2);
14457
14458   /* Copy last byte.  */
14459
14460   if ((remaining & 1) != 0)
14461     {
14462       byte_tmp = gen_reg_rtx (SImode);
14463
14464       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14465       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14466       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14467
14468       if (interleave_factor == 1)
14469         {
14470           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14471           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14472           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14473           byte_tmp = NULL;
14474           dstoffset++;
14475         }
14476
14477       remaining--;
14478       srcoffset++;
14479     }
14480
14481   /* Store last halfword if we haven't done so already.  */
14482
14483   if (halfword_tmp)
14484     {
14485       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14486       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14487       emit_insn (gen_unaligned_storehi (mem,
14488                    gen_lowpart (HImode, halfword_tmp)));
14489       dstoffset += 2;
14490     }
14491
14492   /* Likewise for last byte.  */
14493
14494   if (byte_tmp)
14495     {
14496       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14497       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14498       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14499       dstoffset++;
14500     }
14501
14502   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14503 }
14504
14505 /* From mips_adjust_block_mem:
14506
14507    Helper function for doing a loop-based block operation on memory
14508    reference MEM.  Each iteration of the loop will operate on LENGTH
14509    bytes of MEM.
14510
14511    Create a new base register for use within the loop and point it to
14512    the start of MEM.  Create a new memory reference that uses this
14513    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14514
14515 static void
14516 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14517                       rtx *loop_mem)
14518 {
14519   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14520
14521   /* Although the new mem does not refer to a known location,
14522      it does keep up to LENGTH bytes of alignment.  */
14523   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14524   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14525 }
14526
14527 /* From mips_block_move_loop:
14528
14529    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14530    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14531    the memory regions do not overlap.  */
14532
14533 static void
14534 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14535                                unsigned int interleave_factor,
14536                                HOST_WIDE_INT bytes_per_iter)
14537 {
14538   rtx src_reg, dest_reg, final_src, test;
14539   HOST_WIDE_INT leftover;
14540
14541   leftover = length % bytes_per_iter;
14542   length -= leftover;
14543
14544   /* Create registers and memory references for use within the loop.  */
14545   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14546   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14547
14548   /* Calculate the value that SRC_REG should have after the last iteration of
14549      the loop.  */
14550   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14551                                    0, 0, OPTAB_WIDEN);
14552
14553   /* Emit the start of the loop.  */
14554   rtx_code_label *label = gen_label_rtx ();
14555   emit_label (label);
14556
14557   /* Emit the loop body.  */
14558   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14559                                      interleave_factor);
14560
14561   /* Move on to the next block.  */
14562   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14563   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14564
14565   /* Emit the loop condition.  */
14566   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14567   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14568
14569   /* Mop up any left-over bytes.  */
14570   if (leftover)
14571     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14572 }
14573
14574 /* Emit a block move when either the source or destination is unaligned (not
14575    aligned to a four-byte boundary).  This may need further tuning depending on
14576    core type, optimize_size setting, etc.  */
14577
14578 static int
14579 arm_movmemqi_unaligned (rtx *operands)
14580 {
14581   HOST_WIDE_INT length = INTVAL (operands[2]);
14582
14583   if (optimize_size)
14584     {
14585       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14586       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14587       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14588          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14589          or dst_aligned though: allow more interleaving in those cases since the
14590          resulting code can be smaller.  */
14591       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14592       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14593
14594       if (length > 12)
14595         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14596                                        interleave_factor, bytes_per_iter);
14597       else
14598         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14599                                            interleave_factor);
14600     }
14601   else
14602     {
14603       /* Note that the loop created by arm_block_move_unaligned_loop may be
14604          subject to loop unrolling, which makes tuning this condition a little
14605          redundant.  */
14606       if (length > 32)
14607         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14608       else
14609         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14610     }
14611
14612   return 1;
14613 }
14614
14615 int
14616 arm_gen_movmemqi (rtx *operands)
14617 {
14618   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14619   HOST_WIDE_INT srcoffset, dstoffset;
14620   int i;
14621   rtx src, dst, srcbase, dstbase;
14622   rtx part_bytes_reg = NULL;
14623   rtx mem;
14624
14625   if (!CONST_INT_P (operands[2])
14626       || !CONST_INT_P (operands[3])
14627       || INTVAL (operands[2]) > 64)
14628     return 0;
14629
14630   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14631     return arm_movmemqi_unaligned (operands);
14632
14633   if (INTVAL (operands[3]) & 3)
14634     return 0;
14635
14636   dstbase = operands[0];
14637   srcbase = operands[1];
14638
14639   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14640   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14641
14642   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14643   out_words_to_go = INTVAL (operands[2]) / 4;
14644   last_bytes = INTVAL (operands[2]) & 3;
14645   dstoffset = srcoffset = 0;
14646
14647   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14648     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14649
14650   for (i = 0; in_words_to_go >= 2; i+=4)
14651     {
14652       if (in_words_to_go > 4)
14653         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14654                                           TRUE, srcbase, &srcoffset));
14655       else
14656         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14657                                           src, FALSE, srcbase,
14658                                           &srcoffset));
14659
14660       if (out_words_to_go)
14661         {
14662           if (out_words_to_go > 4)
14663             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14664                                                TRUE, dstbase, &dstoffset));
14665           else if (out_words_to_go != 1)
14666             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14667                                                out_words_to_go, dst,
14668                                                (last_bytes == 0
14669                                                 ? FALSE : TRUE),
14670                                                dstbase, &dstoffset));
14671           else
14672             {
14673               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14674               emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14675               if (last_bytes != 0)
14676                 {
14677                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14678                   dstoffset += 4;
14679                 }
14680             }
14681         }
14682
14683       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14684       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14685     }
14686
14687   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14688   if (out_words_to_go)
14689     {
14690       rtx sreg;
14691
14692       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14693       sreg = copy_to_reg (mem);
14694
14695       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14696       emit_move_insn (mem, sreg);
14697       in_words_to_go--;
14698
14699       gcc_assert (!in_words_to_go);     /* Sanity check */
14700     }
14701
14702   if (in_words_to_go)
14703     {
14704       gcc_assert (in_words_to_go > 0);
14705
14706       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14707       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14708     }
14709
14710   gcc_assert (!last_bytes || part_bytes_reg);
14711
14712   if (BYTES_BIG_ENDIAN && last_bytes)
14713     {
14714       rtx tmp = gen_reg_rtx (SImode);
14715
14716       /* The bytes we want are in the top end of the word.  */
14717       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14718                               GEN_INT (8 * (4 - last_bytes))));
14719       part_bytes_reg = tmp;
14720
14721       while (last_bytes)
14722         {
14723           mem = adjust_automodify_address (dstbase, QImode,
14724                                            plus_constant (Pmode, dst,
14725                                                           last_bytes - 1),
14726                                            dstoffset + last_bytes - 1);
14727           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14728
14729           if (--last_bytes)
14730             {
14731               tmp = gen_reg_rtx (SImode);
14732               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14733               part_bytes_reg = tmp;
14734             }
14735         }
14736
14737     }
14738   else
14739     {
14740       if (last_bytes > 1)
14741         {
14742           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14743           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14744           last_bytes -= 2;
14745           if (last_bytes)
14746             {
14747               rtx tmp = gen_reg_rtx (SImode);
14748               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14749               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14750               part_bytes_reg = tmp;
14751               dstoffset += 2;
14752             }
14753         }
14754
14755       if (last_bytes)
14756         {
14757           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14758           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14759         }
14760     }
14761
14762   return 1;
14763 }
14764
14765 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14766 by mode size.  */
14767 inline static rtx
14768 next_consecutive_mem (rtx mem)
14769 {
14770   machine_mode mode = GET_MODE (mem);
14771   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14772   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14773
14774   return adjust_automodify_address (mem, mode, addr, offset);
14775 }
14776
14777 /* Copy using LDRD/STRD instructions whenever possible.
14778    Returns true upon success. */
14779 bool
14780 gen_movmem_ldrd_strd (rtx *operands)
14781 {
14782   unsigned HOST_WIDE_INT len;
14783   HOST_WIDE_INT align;
14784   rtx src, dst, base;
14785   rtx reg0;
14786   bool src_aligned, dst_aligned;
14787   bool src_volatile, dst_volatile;
14788
14789   gcc_assert (CONST_INT_P (operands[2]));
14790   gcc_assert (CONST_INT_P (operands[3]));
14791
14792   len = UINTVAL (operands[2]);
14793   if (len > 64)
14794     return false;
14795
14796   /* Maximum alignment we can assume for both src and dst buffers.  */
14797   align = INTVAL (operands[3]);
14798
14799   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14800     return false;
14801
14802   /* Place src and dst addresses in registers
14803      and update the corresponding mem rtx.  */
14804   dst = operands[0];
14805   dst_volatile = MEM_VOLATILE_P (dst);
14806   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14807   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14808   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14809
14810   src = operands[1];
14811   src_volatile = MEM_VOLATILE_P (src);
14812   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14813   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14814   src = adjust_automodify_address (src, VOIDmode, base, 0);
14815
14816   if (!unaligned_access && !(src_aligned && dst_aligned))
14817     return false;
14818
14819   if (src_volatile || dst_volatile)
14820     return false;
14821
14822   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14823   if (!(dst_aligned || src_aligned))
14824     return arm_gen_movmemqi (operands);
14825
14826   src = adjust_address (src, DImode, 0);
14827   dst = adjust_address (dst, DImode, 0);
14828   while (len >= 8)
14829     {
14830       len -= 8;
14831       reg0 = gen_reg_rtx (DImode);
14832       if (src_aligned)
14833         emit_move_insn (reg0, src);
14834       else
14835         emit_insn (gen_unaligned_loaddi (reg0, src));
14836
14837       if (dst_aligned)
14838         emit_move_insn (dst, reg0);
14839       else
14840         emit_insn (gen_unaligned_storedi (dst, reg0));
14841
14842       src = next_consecutive_mem (src);
14843       dst = next_consecutive_mem (dst);
14844     }
14845
14846   gcc_assert (len < 8);
14847   if (len >= 4)
14848     {
14849       /* More than a word but less than a double-word to copy.  Copy a word.  */
14850       reg0 = gen_reg_rtx (SImode);
14851       src = adjust_address (src, SImode, 0);
14852       dst = adjust_address (dst, SImode, 0);
14853       if (src_aligned)
14854         emit_move_insn (reg0, src);
14855       else
14856         emit_insn (gen_unaligned_loadsi (reg0, src));
14857
14858       if (dst_aligned)
14859         emit_move_insn (dst, reg0);
14860       else
14861         emit_insn (gen_unaligned_storesi (dst, reg0));
14862
14863       src = next_consecutive_mem (src);
14864       dst = next_consecutive_mem (dst);
14865       len -= 4;
14866     }
14867
14868   if (len == 0)
14869     return true;
14870
14871   /* Copy the remaining bytes.  */
14872   if (len >= 2)
14873     {
14874       dst = adjust_address (dst, HImode, 0);
14875       src = adjust_address (src, HImode, 0);
14876       reg0 = gen_reg_rtx (SImode);
14877       if (src_aligned)
14878         emit_insn (gen_zero_extendhisi2 (reg0, src));
14879       else
14880         emit_insn (gen_unaligned_loadhiu (reg0, src));
14881
14882       if (dst_aligned)
14883         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14884       else
14885         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14886
14887       src = next_consecutive_mem (src);
14888       dst = next_consecutive_mem (dst);
14889       if (len == 2)
14890         return true;
14891     }
14892
14893   dst = adjust_address (dst, QImode, 0);
14894   src = adjust_address (src, QImode, 0);
14895   reg0 = gen_reg_rtx (QImode);
14896   emit_move_insn (reg0, src);
14897   emit_move_insn (dst, reg0);
14898   return true;
14899 }
14900
14901 /* Select a dominance comparison mode if possible for a test of the general
14902    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14903    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14904    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14905    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14906    In all cases OP will be either EQ or NE, but we don't need to know which
14907    here.  If we are unable to support a dominance comparison we return
14908    CC mode.  This will then fail to match for the RTL expressions that
14909    generate this call.  */
14910 machine_mode
14911 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14912 {
14913   enum rtx_code cond1, cond2;
14914   int swapped = 0;
14915
14916   /* Currently we will probably get the wrong result if the individual
14917      comparisons are not simple.  This also ensures that it is safe to
14918      reverse a comparison if necessary.  */
14919   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14920        != CCmode)
14921       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14922           != CCmode))
14923     return CCmode;
14924
14925   /* The if_then_else variant of this tests the second condition if the
14926      first passes, but is true if the first fails.  Reverse the first
14927      condition to get a true "inclusive-or" expression.  */
14928   if (cond_or == DOM_CC_NX_OR_Y)
14929     cond1 = reverse_condition (cond1);
14930
14931   /* If the comparisons are not equal, and one doesn't dominate the other,
14932      then we can't do this.  */
14933   if (cond1 != cond2
14934       && !comparison_dominates_p (cond1, cond2)
14935       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14936     return CCmode;
14937
14938   if (swapped)
14939     std::swap (cond1, cond2);
14940
14941   switch (cond1)
14942     {
14943     case EQ:
14944       if (cond_or == DOM_CC_X_AND_Y)
14945         return CC_DEQmode;
14946
14947       switch (cond2)
14948         {
14949         case EQ: return CC_DEQmode;
14950         case LE: return CC_DLEmode;
14951         case LEU: return CC_DLEUmode;
14952         case GE: return CC_DGEmode;
14953         case GEU: return CC_DGEUmode;
14954         default: gcc_unreachable ();
14955         }
14956
14957     case LT:
14958       if (cond_or == DOM_CC_X_AND_Y)
14959         return CC_DLTmode;
14960
14961       switch (cond2)
14962         {
14963         case  LT:
14964             return CC_DLTmode;
14965         case LE:
14966           return CC_DLEmode;
14967         case NE:
14968           return CC_DNEmode;
14969         default:
14970           gcc_unreachable ();
14971         }
14972
14973     case GT:
14974       if (cond_or == DOM_CC_X_AND_Y)
14975         return CC_DGTmode;
14976
14977       switch (cond2)
14978         {
14979         case GT:
14980           return CC_DGTmode;
14981         case GE:
14982           return CC_DGEmode;
14983         case NE:
14984           return CC_DNEmode;
14985         default:
14986           gcc_unreachable ();
14987         }
14988
14989     case LTU:
14990       if (cond_or == DOM_CC_X_AND_Y)
14991         return CC_DLTUmode;
14992
14993       switch (cond2)
14994         {
14995         case LTU:
14996           return CC_DLTUmode;
14997         case LEU:
14998           return CC_DLEUmode;
14999         case NE:
15000           return CC_DNEmode;
15001         default:
15002           gcc_unreachable ();
15003         }
15004
15005     case GTU:
15006       if (cond_or == DOM_CC_X_AND_Y)
15007         return CC_DGTUmode;
15008
15009       switch (cond2)
15010         {
15011         case GTU:
15012           return CC_DGTUmode;
15013         case GEU:
15014           return CC_DGEUmode;
15015         case NE:
15016           return CC_DNEmode;
15017         default:
15018           gcc_unreachable ();
15019         }
15020
15021     /* The remaining cases only occur when both comparisons are the
15022        same.  */
15023     case NE:
15024       gcc_assert (cond1 == cond2);
15025       return CC_DNEmode;
15026
15027     case LE:
15028       gcc_assert (cond1 == cond2);
15029       return CC_DLEmode;
15030
15031     case GE:
15032       gcc_assert (cond1 == cond2);
15033       return CC_DGEmode;
15034
15035     case LEU:
15036       gcc_assert (cond1 == cond2);
15037       return CC_DLEUmode;
15038
15039     case GEU:
15040       gcc_assert (cond1 == cond2);
15041       return CC_DGEUmode;
15042
15043     default:
15044       gcc_unreachable ();
15045     }
15046 }
15047
15048 machine_mode
15049 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15050 {
15051   /* All floating point compares return CCFP if it is an equality
15052      comparison, and CCFPE otherwise.  */
15053   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15054     {
15055       switch (op)
15056         {
15057         case EQ:
15058         case NE:
15059         case UNORDERED:
15060         case ORDERED:
15061         case UNLT:
15062         case UNLE:
15063         case UNGT:
15064         case UNGE:
15065         case UNEQ:
15066         case LTGT:
15067           return CCFPmode;
15068
15069         case LT:
15070         case LE:
15071         case GT:
15072         case GE:
15073           return CCFPEmode;
15074
15075         default:
15076           gcc_unreachable ();
15077         }
15078     }
15079
15080   /* A compare with a shifted operand.  Because of canonicalization, the
15081      comparison will have to be swapped when we emit the assembler.  */
15082   if (GET_MODE (y) == SImode
15083       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15084       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15085           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15086           || GET_CODE (x) == ROTATERT))
15087     return CC_SWPmode;
15088
15089   /* This operation is performed swapped, but since we only rely on the Z
15090      flag we don't need an additional mode.  */
15091   if (GET_MODE (y) == SImode
15092       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15093       && GET_CODE (x) == NEG
15094       && (op == EQ || op == NE))
15095     return CC_Zmode;
15096
15097   /* This is a special case that is used by combine to allow a
15098      comparison of a shifted byte load to be split into a zero-extend
15099      followed by a comparison of the shifted integer (only valid for
15100      equalities and unsigned inequalities).  */
15101   if (GET_MODE (x) == SImode
15102       && GET_CODE (x) == ASHIFT
15103       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15104       && GET_CODE (XEXP (x, 0)) == SUBREG
15105       && MEM_P (SUBREG_REG (XEXP (x, 0)))
15106       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15107       && (op == EQ || op == NE
15108           || op == GEU || op == GTU || op == LTU || op == LEU)
15109       && CONST_INT_P (y))
15110     return CC_Zmode;
15111
15112   /* A construct for a conditional compare, if the false arm contains
15113      0, then both conditions must be true, otherwise either condition
15114      must be true.  Not all conditions are possible, so CCmode is
15115      returned if it can't be done.  */
15116   if (GET_CODE (x) == IF_THEN_ELSE
15117       && (XEXP (x, 2) == const0_rtx
15118           || XEXP (x, 2) == const1_rtx)
15119       && COMPARISON_P (XEXP (x, 0))
15120       && COMPARISON_P (XEXP (x, 1)))
15121     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15122                                          INTVAL (XEXP (x, 2)));
15123
15124   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
15125   if (GET_CODE (x) == AND
15126       && (op == EQ || op == NE)
15127       && COMPARISON_P (XEXP (x, 0))
15128       && COMPARISON_P (XEXP (x, 1)))
15129     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15130                                          DOM_CC_X_AND_Y);
15131
15132   if (GET_CODE (x) == IOR
15133       && (op == EQ || op == NE)
15134       && COMPARISON_P (XEXP (x, 0))
15135       && COMPARISON_P (XEXP (x, 1)))
15136     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15137                                          DOM_CC_X_OR_Y);
15138
15139   /* An operation (on Thumb) where we want to test for a single bit.
15140      This is done by shifting that bit up into the top bit of a
15141      scratch register; we can then branch on the sign bit.  */
15142   if (TARGET_THUMB1
15143       && GET_MODE (x) == SImode
15144       && (op == EQ || op == NE)
15145       && GET_CODE (x) == ZERO_EXTRACT
15146       && XEXP (x, 1) == const1_rtx)
15147     return CC_Nmode;
15148
15149   /* An operation that sets the condition codes as a side-effect, the
15150      V flag is not set correctly, so we can only use comparisons where
15151      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15152      instead.)  */
15153   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15154   if (GET_MODE (x) == SImode
15155       && y == const0_rtx
15156       && (op == EQ || op == NE || op == LT || op == GE)
15157       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15158           || GET_CODE (x) == AND || GET_CODE (x) == IOR
15159           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15160           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15161           || GET_CODE (x) == LSHIFTRT
15162           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15163           || GET_CODE (x) == ROTATERT
15164           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15165     return CC_NOOVmode;
15166
15167   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15168     return CC_Zmode;
15169
15170   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15171       && GET_CODE (x) == PLUS
15172       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15173     return CC_Cmode;
15174
15175   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15176     {
15177       switch (op)
15178         {
15179         case EQ:
15180         case NE:
15181           /* A DImode comparison against zero can be implemented by
15182              or'ing the two halves together.  */
15183           if (y == const0_rtx)
15184             return CC_Zmode;
15185
15186           /* We can do an equality test in three Thumb instructions.  */
15187           if (!TARGET_32BIT)
15188             return CC_Zmode;
15189
15190           /* FALLTHROUGH */
15191
15192         case LTU:
15193         case LEU:
15194         case GTU:
15195         case GEU:
15196           /* DImode unsigned comparisons can be implemented by cmp +
15197              cmpeq without a scratch register.  Not worth doing in
15198              Thumb-2.  */
15199           if (TARGET_32BIT)
15200             return CC_CZmode;
15201
15202           /* FALLTHROUGH */
15203
15204         case LT:
15205         case LE:
15206         case GT:
15207         case GE:
15208           /* DImode signed and unsigned comparisons can be implemented
15209              by cmp + sbcs with a scratch register, but that does not
15210              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
15211           gcc_assert (op != EQ && op != NE);
15212           return CC_NCVmode;
15213
15214         default:
15215           gcc_unreachable ();
15216         }
15217     }
15218
15219   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15220     return GET_MODE (x);
15221
15222   return CCmode;
15223 }
15224
15225 /* X and Y are two things to compare using CODE.  Emit the compare insn and
15226    return the rtx for register 0 in the proper mode.  FP means this is a
15227    floating point compare: I don't think that it is needed on the arm.  */
15228 rtx
15229 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15230 {
15231   machine_mode mode;
15232   rtx cc_reg;
15233   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15234
15235   /* We might have X as a constant, Y as a register because of the predicates
15236      used for cmpdi.  If so, force X to a register here.  */
15237   if (dimode_comparison && !REG_P (x))
15238     x = force_reg (DImode, x);
15239
15240   mode = SELECT_CC_MODE (code, x, y);
15241   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15242
15243   if (dimode_comparison
15244       && mode != CC_CZmode)
15245     {
15246       rtx clobber, set;
15247
15248       /* To compare two non-zero values for equality, XOR them and
15249          then compare against zero.  Not used for ARM mode; there
15250          CC_CZmode is cheaper.  */
15251       if (mode == CC_Zmode && y != const0_rtx)
15252         {
15253           gcc_assert (!reload_completed);
15254           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15255           y = const0_rtx;
15256         }
15257
15258       /* A scratch register is required.  */
15259       if (reload_completed)
15260         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15261       else
15262         scratch = gen_rtx_SCRATCH (SImode);
15263
15264       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15265       set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15266       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15267     }
15268   else
15269     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15270
15271   return cc_reg;
15272 }
15273
15274 /* Generate a sequence of insns that will generate the correct return
15275    address mask depending on the physical architecture that the program
15276    is running on.  */
15277 rtx
15278 arm_gen_return_addr_mask (void)
15279 {
15280   rtx reg = gen_reg_rtx (Pmode);
15281
15282   emit_insn (gen_return_addr_mask (reg));
15283   return reg;
15284 }
15285
15286 void
15287 arm_reload_in_hi (rtx *operands)
15288 {
15289   rtx ref = operands[1];
15290   rtx base, scratch;
15291   HOST_WIDE_INT offset = 0;
15292
15293   if (GET_CODE (ref) == SUBREG)
15294     {
15295       offset = SUBREG_BYTE (ref);
15296       ref = SUBREG_REG (ref);
15297     }
15298
15299   if (REG_P (ref))
15300     {
15301       /* We have a pseudo which has been spilt onto the stack; there
15302          are two cases here: the first where there is a simple
15303          stack-slot replacement and a second where the stack-slot is
15304          out of range, or is used as a subreg.  */
15305       if (reg_equiv_mem (REGNO (ref)))
15306         {
15307           ref = reg_equiv_mem (REGNO (ref));
15308           base = find_replacement (&XEXP (ref, 0));
15309         }
15310       else
15311         /* The slot is out of range, or was dressed up in a SUBREG.  */
15312         base = reg_equiv_address (REGNO (ref));
15313     }
15314   else
15315     base = find_replacement (&XEXP (ref, 0));
15316
15317   /* Handle the case where the address is too complex to be offset by 1.  */
15318   if (GET_CODE (base) == MINUS
15319       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15320     {
15321       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15322
15323       emit_set_insn (base_plus, base);
15324       base = base_plus;
15325     }
15326   else if (GET_CODE (base) == PLUS)
15327     {
15328       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15329       HOST_WIDE_INT hi, lo;
15330
15331       offset += INTVAL (XEXP (base, 1));
15332       base = XEXP (base, 0);
15333
15334       /* Rework the address into a legal sequence of insns.  */
15335       /* Valid range for lo is -4095 -> 4095 */
15336       lo = (offset >= 0
15337             ? (offset & 0xfff)
15338             : -((-offset) & 0xfff));
15339
15340       /* Corner case, if lo is the max offset then we would be out of range
15341          once we have added the additional 1 below, so bump the msb into the
15342          pre-loading insn(s).  */
15343       if (lo == 4095)
15344         lo &= 0x7ff;
15345
15346       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15347              ^ (HOST_WIDE_INT) 0x80000000)
15348             - (HOST_WIDE_INT) 0x80000000);
15349
15350       gcc_assert (hi + lo == offset);
15351
15352       if (hi != 0)
15353         {
15354           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15355
15356           /* Get the base address; addsi3 knows how to handle constants
15357              that require more than one insn.  */
15358           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15359           base = base_plus;
15360           offset = lo;
15361         }
15362     }
15363
15364   /* Operands[2] may overlap operands[0] (though it won't overlap
15365      operands[1]), that's why we asked for a DImode reg -- so we can
15366      use the bit that does not overlap.  */
15367   if (REGNO (operands[2]) == REGNO (operands[0]))
15368     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15369   else
15370     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15371
15372   emit_insn (gen_zero_extendqisi2 (scratch,
15373                                    gen_rtx_MEM (QImode,
15374                                                 plus_constant (Pmode, base,
15375                                                                offset))));
15376   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15377                                    gen_rtx_MEM (QImode,
15378                                                 plus_constant (Pmode, base,
15379                                                                offset + 1))));
15380   if (!BYTES_BIG_ENDIAN)
15381     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15382                    gen_rtx_IOR (SImode,
15383                                 gen_rtx_ASHIFT
15384                                 (SImode,
15385                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15386                                  GEN_INT (8)),
15387                                 scratch));
15388   else
15389     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15390                    gen_rtx_IOR (SImode,
15391                                 gen_rtx_ASHIFT (SImode, scratch,
15392                                                 GEN_INT (8)),
15393                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15394 }
15395
15396 /* Handle storing a half-word to memory during reload by synthesizing as two
15397    byte stores.  Take care not to clobber the input values until after we
15398    have moved them somewhere safe.  This code assumes that if the DImode
15399    scratch in operands[2] overlaps either the input value or output address
15400    in some way, then that value must die in this insn (we absolutely need
15401    two scratch registers for some corner cases).  */
15402 void
15403 arm_reload_out_hi (rtx *operands)
15404 {
15405   rtx ref = operands[0];
15406   rtx outval = operands[1];
15407   rtx base, scratch;
15408   HOST_WIDE_INT offset = 0;
15409
15410   if (GET_CODE (ref) == SUBREG)
15411     {
15412       offset = SUBREG_BYTE (ref);
15413       ref = SUBREG_REG (ref);
15414     }
15415
15416   if (REG_P (ref))
15417     {
15418       /* We have a pseudo which has been spilt onto the stack; there
15419          are two cases here: the first where there is a simple
15420          stack-slot replacement and a second where the stack-slot is
15421          out of range, or is used as a subreg.  */
15422       if (reg_equiv_mem (REGNO (ref)))
15423         {
15424           ref = reg_equiv_mem (REGNO (ref));
15425           base = find_replacement (&XEXP (ref, 0));
15426         }
15427       else
15428         /* The slot is out of range, or was dressed up in a SUBREG.  */
15429         base = reg_equiv_address (REGNO (ref));
15430     }
15431   else
15432     base = find_replacement (&XEXP (ref, 0));
15433
15434   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15435
15436   /* Handle the case where the address is too complex to be offset by 1.  */
15437   if (GET_CODE (base) == MINUS
15438       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15439     {
15440       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15441
15442       /* Be careful not to destroy OUTVAL.  */
15443       if (reg_overlap_mentioned_p (base_plus, outval))
15444         {
15445           /* Updating base_plus might destroy outval, see if we can
15446              swap the scratch and base_plus.  */
15447           if (!reg_overlap_mentioned_p (scratch, outval))
15448             std::swap (scratch, base_plus);
15449           else
15450             {
15451               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15452
15453               /* Be conservative and copy OUTVAL into the scratch now,
15454                  this should only be necessary if outval is a subreg
15455                  of something larger than a word.  */
15456               /* XXX Might this clobber base?  I can't see how it can,
15457                  since scratch is known to overlap with OUTVAL, and
15458                  must be wider than a word.  */
15459               emit_insn (gen_movhi (scratch_hi, outval));
15460               outval = scratch_hi;
15461             }
15462         }
15463
15464       emit_set_insn (base_plus, base);
15465       base = base_plus;
15466     }
15467   else if (GET_CODE (base) == PLUS)
15468     {
15469       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15470       HOST_WIDE_INT hi, lo;
15471
15472       offset += INTVAL (XEXP (base, 1));
15473       base = XEXP (base, 0);
15474
15475       /* Rework the address into a legal sequence of insns.  */
15476       /* Valid range for lo is -4095 -> 4095 */
15477       lo = (offset >= 0
15478             ? (offset & 0xfff)
15479             : -((-offset) & 0xfff));
15480
15481       /* Corner case, if lo is the max offset then we would be out of range
15482          once we have added the additional 1 below, so bump the msb into the
15483          pre-loading insn(s).  */
15484       if (lo == 4095)
15485         lo &= 0x7ff;
15486
15487       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15488              ^ (HOST_WIDE_INT) 0x80000000)
15489             - (HOST_WIDE_INT) 0x80000000);
15490
15491       gcc_assert (hi + lo == offset);
15492
15493       if (hi != 0)
15494         {
15495           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15496
15497           /* Be careful not to destroy OUTVAL.  */
15498           if (reg_overlap_mentioned_p (base_plus, outval))
15499             {
15500               /* Updating base_plus might destroy outval, see if we
15501                  can swap the scratch and base_plus.  */
15502               if (!reg_overlap_mentioned_p (scratch, outval))
15503                 std::swap (scratch, base_plus);
15504               else
15505                 {
15506                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15507
15508                   /* Be conservative and copy outval into scratch now,
15509                      this should only be necessary if outval is a
15510                      subreg of something larger than a word.  */
15511                   /* XXX Might this clobber base?  I can't see how it
15512                      can, since scratch is known to overlap with
15513                      outval.  */
15514                   emit_insn (gen_movhi (scratch_hi, outval));
15515                   outval = scratch_hi;
15516                 }
15517             }
15518
15519           /* Get the base address; addsi3 knows how to handle constants
15520              that require more than one insn.  */
15521           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15522           base = base_plus;
15523           offset = lo;
15524         }
15525     }
15526
15527   if (BYTES_BIG_ENDIAN)
15528     {
15529       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15530                                          plus_constant (Pmode, base,
15531                                                         offset + 1)),
15532                             gen_lowpart (QImode, outval)));
15533       emit_insn (gen_lshrsi3 (scratch,
15534                               gen_rtx_SUBREG (SImode, outval, 0),
15535                               GEN_INT (8)));
15536       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15537                                                                 offset)),
15538                             gen_lowpart (QImode, scratch)));
15539     }
15540   else
15541     {
15542       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15543                                                                 offset)),
15544                             gen_lowpart (QImode, outval)));
15545       emit_insn (gen_lshrsi3 (scratch,
15546                               gen_rtx_SUBREG (SImode, outval, 0),
15547                               GEN_INT (8)));
15548       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15549                                          plus_constant (Pmode, base,
15550                                                         offset + 1)),
15551                             gen_lowpart (QImode, scratch)));
15552     }
15553 }
15554
15555 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15556    (padded to the size of a word) should be passed in a register.  */
15557
15558 static bool
15559 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15560 {
15561   if (TARGET_AAPCS_BASED)
15562     return must_pass_in_stack_var_size (mode, type);
15563   else
15564     return must_pass_in_stack_var_size_or_pad (mode, type);
15565 }
15566
15567
15568 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15569    Return true if an argument passed on the stack should be padded upwards,
15570    i.e. if the least-significant byte has useful data.
15571    For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
15572    aggregate types are placed in the lowest memory address.  */
15573
15574 bool
15575 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15576 {
15577   if (!TARGET_AAPCS_BASED)
15578     return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15579
15580   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15581     return false;
15582
15583   return true;
15584 }
15585
15586
15587 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15588    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15589    register has useful data, and return the opposite if the most
15590    significant byte does.  */
15591
15592 bool
15593 arm_pad_reg_upward (machine_mode mode,
15594                     tree type, int first ATTRIBUTE_UNUSED)
15595 {
15596   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15597     {
15598       /* For AAPCS, small aggregates, small fixed-point types,
15599          and small complex types are always padded upwards.  */
15600       if (type)
15601         {
15602           if ((AGGREGATE_TYPE_P (type)
15603                || TREE_CODE (type) == COMPLEX_TYPE
15604                || FIXED_POINT_TYPE_P (type))
15605               && int_size_in_bytes (type) <= 4)
15606             return true;
15607         }
15608       else
15609         {
15610           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15611               && GET_MODE_SIZE (mode) <= 4)
15612             return true;
15613         }
15614     }
15615
15616   /* Otherwise, use default padding.  */
15617   return !BYTES_BIG_ENDIAN;
15618 }
15619
15620 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15621    assuming that the address in the base register is word aligned.  */
15622 bool
15623 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15624 {
15625   HOST_WIDE_INT max_offset;
15626
15627   /* Offset must be a multiple of 4 in Thumb mode.  */
15628   if (TARGET_THUMB2 && ((offset & 3) != 0))
15629     return false;
15630
15631   if (TARGET_THUMB2)
15632     max_offset = 1020;
15633   else if (TARGET_ARM)
15634     max_offset = 255;
15635   else
15636     return false;
15637
15638   return ((offset <= max_offset) && (offset >= -max_offset));
15639 }
15640
15641 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15642    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15643    Assumes that the address in the base register RN is word aligned.  Pattern
15644    guarantees that both memory accesses use the same base register,
15645    the offsets are constants within the range, and the gap between the offsets is 4.
15646    If preload complete then check that registers are legal.  WBACK indicates whether
15647    address is updated.  LOAD indicates whether memory access is load or store.  */
15648 bool
15649 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15650                        bool wback, bool load)
15651 {
15652   unsigned int t, t2, n;
15653
15654   if (!reload_completed)
15655     return true;
15656
15657   if (!offset_ok_for_ldrd_strd (offset))
15658     return false;
15659
15660   t = REGNO (rt);
15661   t2 = REGNO (rt2);
15662   n = REGNO (rn);
15663
15664   if ((TARGET_THUMB2)
15665       && ((wback && (n == t || n == t2))
15666           || (t == SP_REGNUM)
15667           || (t == PC_REGNUM)
15668           || (t2 == SP_REGNUM)
15669           || (t2 == PC_REGNUM)
15670           || (!load && (n == PC_REGNUM))
15671           || (load && (t == t2))
15672           /* Triggers Cortex-M3 LDRD errata.  */
15673           || (!wback && load && fix_cm3_ldrd && (n == t))))
15674     return false;
15675
15676   if ((TARGET_ARM)
15677       && ((wback && (n == t || n == t2))
15678           || (t2 == PC_REGNUM)
15679           || (t % 2 != 0)   /* First destination register is not even.  */
15680           || (t2 != t + 1)
15681           /* PC can be used as base register (for offset addressing only),
15682              but it is depricated.  */
15683           || (n == PC_REGNUM)))
15684     return false;
15685
15686   return true;
15687 }
15688
15689 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15690    operand MEM's address contains an immediate offset from the base
15691    register and has no side effects, in which case it sets BASE and
15692    OFFSET accordingly.  */
15693 static bool
15694 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15695 {
15696   rtx addr;
15697
15698   gcc_assert (base != NULL && offset != NULL);
15699
15700   /* TODO: Handle more general memory operand patterns, such as
15701      PRE_DEC and PRE_INC.  */
15702
15703   if (side_effects_p (mem))
15704     return false;
15705
15706   /* Can't deal with subregs.  */
15707   if (GET_CODE (mem) == SUBREG)
15708     return false;
15709
15710   gcc_assert (MEM_P (mem));
15711
15712   *offset = const0_rtx;
15713
15714   addr = XEXP (mem, 0);
15715
15716   /* If addr isn't valid for DImode, then we can't handle it.  */
15717   if (!arm_legitimate_address_p (DImode, addr,
15718                                  reload_in_progress || reload_completed))
15719     return false;
15720
15721   if (REG_P (addr))
15722     {
15723       *base = addr;
15724       return true;
15725     }
15726   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15727     {
15728       *base = XEXP (addr, 0);
15729       *offset = XEXP (addr, 1);
15730       return (REG_P (*base) && CONST_INT_P (*offset));
15731     }
15732
15733   return false;
15734 }
15735
15736 /* Called from a peephole2 to replace two word-size accesses with a
15737    single LDRD/STRD instruction.  Returns true iff we can generate a
15738    new instruction sequence.  That is, both accesses use the same base
15739    register and the gap between constant offsets is 4.  This function
15740    may reorder its operands to match ldrd/strd RTL templates.
15741    OPERANDS are the operands found by the peephole matcher;
15742    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15743    corresponding memory operands.  LOAD indicaates whether the access
15744    is load or store.  CONST_STORE indicates a store of constant
15745    integer values held in OPERANDS[4,5] and assumes that the pattern
15746    is of length 4 insn, for the purpose of checking dead registers.
15747    COMMUTE indicates that register operands may be reordered.  */
15748 bool
15749 gen_operands_ldrd_strd (rtx *operands, bool load,
15750                         bool const_store, bool commute)
15751 {
15752   int nops = 2;
15753   HOST_WIDE_INT offsets[2], offset;
15754   rtx base = NULL_RTX;
15755   rtx cur_base, cur_offset, tmp;
15756   int i, gap;
15757   HARD_REG_SET regset;
15758
15759   gcc_assert (!const_store || !load);
15760   /* Check that the memory references are immediate offsets from the
15761      same base register.  Extract the base register, the destination
15762      registers, and the corresponding memory offsets.  */
15763   for (i = 0; i < nops; i++)
15764     {
15765       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15766         return false;
15767
15768       if (i == 0)
15769         base = cur_base;
15770       else if (REGNO (base) != REGNO (cur_base))
15771         return false;
15772
15773       offsets[i] = INTVAL (cur_offset);
15774       if (GET_CODE (operands[i]) == SUBREG)
15775         {
15776           tmp = SUBREG_REG (operands[i]);
15777           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15778           operands[i] = tmp;
15779         }
15780     }
15781
15782   /* Make sure there is no dependency between the individual loads.  */
15783   if (load && REGNO (operands[0]) == REGNO (base))
15784     return false; /* RAW */
15785
15786   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15787     return false; /* WAW */
15788
15789   /* If the same input register is used in both stores
15790      when storing different constants, try to find a free register.
15791      For example, the code
15792         mov r0, 0
15793         str r0, [r2]
15794         mov r0, 1
15795         str r0, [r2, #4]
15796      can be transformed into
15797         mov r1, 0
15798         strd r1, r0, [r2]
15799      in Thumb mode assuming that r1 is free.  */
15800   if (const_store
15801       && REGNO (operands[0]) == REGNO (operands[1])
15802       && INTVAL (operands[4]) != INTVAL (operands[5]))
15803     {
15804     if (TARGET_THUMB2)
15805       {
15806         CLEAR_HARD_REG_SET (regset);
15807         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15808         if (tmp == NULL_RTX)
15809           return false;
15810
15811         /* Use the new register in the first load to ensure that
15812            if the original input register is not dead after peephole,
15813            then it will have the correct constant value.  */
15814         operands[0] = tmp;
15815       }
15816     else if (TARGET_ARM)
15817       {
15818         return false;
15819         int regno = REGNO (operands[0]);
15820         if (!peep2_reg_dead_p (4, operands[0]))
15821           {
15822             /* When the input register is even and is not dead after the
15823                pattern, it has to hold the second constant but we cannot
15824                form a legal STRD in ARM mode with this register as the second
15825                register.  */
15826             if (regno % 2 == 0)
15827               return false;
15828
15829             /* Is regno-1 free? */
15830             SET_HARD_REG_SET (regset);
15831             CLEAR_HARD_REG_BIT(regset, regno - 1);
15832             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15833             if (tmp == NULL_RTX)
15834               return false;
15835
15836             operands[0] = tmp;
15837           }
15838         else
15839           {
15840             /* Find a DImode register.  */
15841             CLEAR_HARD_REG_SET (regset);
15842             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15843             if (tmp != NULL_RTX)
15844               {
15845                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15846                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15847               }
15848             else
15849               {
15850                 /* Can we use the input register to form a DI register?  */
15851                 SET_HARD_REG_SET (regset);
15852                 CLEAR_HARD_REG_BIT(regset,
15853                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15854                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15855                 if (tmp == NULL_RTX)
15856                   return false;
15857                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15858               }
15859           }
15860
15861         gcc_assert (operands[0] != NULL_RTX);
15862         gcc_assert (operands[1] != NULL_RTX);
15863         gcc_assert (REGNO (operands[0]) % 2 == 0);
15864         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15865       }
15866     }
15867
15868   /* Make sure the instructions are ordered with lower memory access first.  */
15869   if (offsets[0] > offsets[1])
15870     {
15871       gap = offsets[0] - offsets[1];
15872       offset = offsets[1];
15873
15874       /* Swap the instructions such that lower memory is accessed first.  */
15875       std::swap (operands[0], operands[1]);
15876       std::swap (operands[2], operands[3]);
15877       if (const_store)
15878         std::swap (operands[4], operands[5]);
15879     }
15880   else
15881     {
15882       gap = offsets[1] - offsets[0];
15883       offset = offsets[0];
15884     }
15885
15886   /* Make sure accesses are to consecutive memory locations.  */
15887   if (gap != 4)
15888     return false;
15889
15890   /* Make sure we generate legal instructions.  */
15891   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15892                              false, load))
15893     return true;
15894
15895   /* In Thumb state, where registers are almost unconstrained, there
15896      is little hope to fix it.  */
15897   if (TARGET_THUMB2)
15898     return false;
15899
15900   if (load && commute)
15901     {
15902       /* Try reordering registers.  */
15903       std::swap (operands[0], operands[1]);
15904       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15905                                  false, load))
15906         return true;
15907     }
15908
15909   if (const_store)
15910     {
15911       /* If input registers are dead after this pattern, they can be
15912          reordered or replaced by other registers that are free in the
15913          current pattern.  */
15914       if (!peep2_reg_dead_p (4, operands[0])
15915           || !peep2_reg_dead_p (4, operands[1]))
15916         return false;
15917
15918       /* Try to reorder the input registers.  */
15919       /* For example, the code
15920            mov r0, 0
15921            mov r1, 1
15922            str r1, [r2]
15923            str r0, [r2, #4]
15924          can be transformed into
15925            mov r1, 0
15926            mov r0, 1
15927            strd r0, [r2]
15928       */
15929       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15930                                   false, false))
15931         {
15932           std::swap (operands[0], operands[1]);
15933           return true;
15934         }
15935
15936       /* Try to find a free DI register.  */
15937       CLEAR_HARD_REG_SET (regset);
15938       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15939       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15940       while (true)
15941         {
15942           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15943           if (tmp == NULL_RTX)
15944             return false;
15945
15946           /* DREG must be an even-numbered register in DImode.
15947              Split it into SI registers.  */
15948           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15949           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15950           gcc_assert (operands[0] != NULL_RTX);
15951           gcc_assert (operands[1] != NULL_RTX);
15952           gcc_assert (REGNO (operands[0]) % 2 == 0);
15953           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15954
15955           return (operands_ok_ldrd_strd (operands[0], operands[1],
15956                                          base, offset,
15957                                          false, load));
15958         }
15959     }
15960
15961   return false;
15962 }
15963
15964
15965
15966 \f
15967 /* Print a symbolic form of X to the debug file, F.  */
15968 static void
15969 arm_print_value (FILE *f, rtx x)
15970 {
15971   switch (GET_CODE (x))
15972     {
15973     case CONST_INT:
15974       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15975       return;
15976
15977     case CONST_DOUBLE:
15978       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15979       return;
15980
15981     case CONST_VECTOR:
15982       {
15983         int i;
15984
15985         fprintf (f, "<");
15986         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15987           {
15988             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15989             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15990               fputc (',', f);
15991           }
15992         fprintf (f, ">");
15993       }
15994       return;
15995
15996     case CONST_STRING:
15997       fprintf (f, "\"%s\"", XSTR (x, 0));
15998       return;
15999
16000     case SYMBOL_REF:
16001       fprintf (f, "`%s'", XSTR (x, 0));
16002       return;
16003
16004     case LABEL_REF:
16005       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16006       return;
16007
16008     case CONST:
16009       arm_print_value (f, XEXP (x, 0));
16010       return;
16011
16012     case PLUS:
16013       arm_print_value (f, XEXP (x, 0));
16014       fprintf (f, "+");
16015       arm_print_value (f, XEXP (x, 1));
16016       return;
16017
16018     case PC:
16019       fprintf (f, "pc");
16020       return;
16021
16022     default:
16023       fprintf (f, "????");
16024       return;
16025     }
16026 }
16027 \f
16028 /* Routines for manipulation of the constant pool.  */
16029
16030 /* Arm instructions cannot load a large constant directly into a
16031    register; they have to come from a pc relative load.  The constant
16032    must therefore be placed in the addressable range of the pc
16033    relative load.  Depending on the precise pc relative load
16034    instruction the range is somewhere between 256 bytes and 4k.  This
16035    means that we often have to dump a constant inside a function, and
16036    generate code to branch around it.
16037
16038    It is important to minimize this, since the branches will slow
16039    things down and make the code larger.
16040
16041    Normally we can hide the table after an existing unconditional
16042    branch so that there is no interruption of the flow, but in the
16043    worst case the code looks like this:
16044
16045         ldr     rn, L1
16046         ...
16047         b       L2
16048         align
16049         L1:     .long value
16050         L2:
16051         ...
16052
16053         ldr     rn, L3
16054         ...
16055         b       L4
16056         align
16057         L3:     .long value
16058         L4:
16059         ...
16060
16061    We fix this by performing a scan after scheduling, which notices
16062    which instructions need to have their operands fetched from the
16063    constant table and builds the table.
16064
16065    The algorithm starts by building a table of all the constants that
16066    need fixing up and all the natural barriers in the function (places
16067    where a constant table can be dropped without breaking the flow).
16068    For each fixup we note how far the pc-relative replacement will be
16069    able to reach and the offset of the instruction into the function.
16070
16071    Having built the table we then group the fixes together to form
16072    tables that are as large as possible (subject to addressing
16073    constraints) and emit each table of constants after the last
16074    barrier that is within range of all the instructions in the group.
16075    If a group does not contain a barrier, then we forcibly create one
16076    by inserting a jump instruction into the flow.  Once the table has
16077    been inserted, the insns are then modified to reference the
16078    relevant entry in the pool.
16079
16080    Possible enhancements to the algorithm (not implemented) are:
16081
16082    1) For some processors and object formats, there may be benefit in
16083    aligning the pools to the start of cache lines; this alignment
16084    would need to be taken into account when calculating addressability
16085    of a pool.  */
16086
16087 /* These typedefs are located at the start of this file, so that
16088    they can be used in the prototypes there.  This comment is to
16089    remind readers of that fact so that the following structures
16090    can be understood more easily.
16091
16092      typedef struct minipool_node    Mnode;
16093      typedef struct minipool_fixup   Mfix;  */
16094
16095 struct minipool_node
16096 {
16097   /* Doubly linked chain of entries.  */
16098   Mnode * next;
16099   Mnode * prev;
16100   /* The maximum offset into the code that this entry can be placed.  While
16101      pushing fixes for forward references, all entries are sorted in order
16102      of increasing max_address.  */
16103   HOST_WIDE_INT max_address;
16104   /* Similarly for an entry inserted for a backwards ref.  */
16105   HOST_WIDE_INT min_address;
16106   /* The number of fixes referencing this entry.  This can become zero
16107      if we "unpush" an entry.  In this case we ignore the entry when we
16108      come to emit the code.  */
16109   int refcount;
16110   /* The offset from the start of the minipool.  */
16111   HOST_WIDE_INT offset;
16112   /* The value in table.  */
16113   rtx value;
16114   /* The mode of value.  */
16115   machine_mode mode;
16116   /* The size of the value.  With iWMMXt enabled
16117      sizes > 4 also imply an alignment of 8-bytes.  */
16118   int fix_size;
16119 };
16120
16121 struct minipool_fixup
16122 {
16123   Mfix *            next;
16124   rtx_insn *        insn;
16125   HOST_WIDE_INT     address;
16126   rtx *             loc;
16127   machine_mode mode;
16128   int               fix_size;
16129   rtx               value;
16130   Mnode *           minipool;
16131   HOST_WIDE_INT     forwards;
16132   HOST_WIDE_INT     backwards;
16133 };
16134
16135 /* Fixes less than a word need padding out to a word boundary.  */
16136 #define MINIPOOL_FIX_SIZE(mode) \
16137   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16138
16139 static Mnode *  minipool_vector_head;
16140 static Mnode *  minipool_vector_tail;
16141 static rtx_code_label   *minipool_vector_label;
16142 static int      minipool_pad;
16143
16144 /* The linked list of all minipool fixes required for this function.  */
16145 Mfix *          minipool_fix_head;
16146 Mfix *          minipool_fix_tail;
16147 /* The fix entry for the current minipool, once it has been placed.  */
16148 Mfix *          minipool_barrier;
16149
16150 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16151 #define JUMP_TABLES_IN_TEXT_SECTION 0
16152 #endif
16153
16154 static HOST_WIDE_INT
16155 get_jump_table_size (rtx_jump_table_data *insn)
16156 {
16157   /* ADDR_VECs only take room if read-only data does into the text
16158      section.  */
16159   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16160     {
16161       rtx body = PATTERN (insn);
16162       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16163       HOST_WIDE_INT size;
16164       HOST_WIDE_INT modesize;
16165
16166       modesize = GET_MODE_SIZE (GET_MODE (body));
16167       size = modesize * XVECLEN (body, elt);
16168       switch (modesize)
16169         {
16170         case 1:
16171           /* Round up size  of TBB table to a halfword boundary.  */
16172           size = (size + 1) & ~(HOST_WIDE_INT)1;
16173           break;
16174         case 2:
16175           /* No padding necessary for TBH.  */
16176           break;
16177         case 4:
16178           /* Add two bytes for alignment on Thumb.  */
16179           if (TARGET_THUMB)
16180             size += 2;
16181           break;
16182         default:
16183           gcc_unreachable ();
16184         }
16185       return size;
16186     }
16187
16188   return 0;
16189 }
16190
16191 /* Return the maximum amount of padding that will be inserted before
16192    label LABEL.  */
16193
16194 static HOST_WIDE_INT
16195 get_label_padding (rtx label)
16196 {
16197   HOST_WIDE_INT align, min_insn_size;
16198
16199   align = 1 << label_to_alignment (label);
16200   min_insn_size = TARGET_THUMB ? 2 : 4;
16201   return align > min_insn_size ? align - min_insn_size : 0;
16202 }
16203
16204 /* Move a minipool fix MP from its current location to before MAX_MP.
16205    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16206    constraints may need updating.  */
16207 static Mnode *
16208 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16209                                HOST_WIDE_INT max_address)
16210 {
16211   /* The code below assumes these are different.  */
16212   gcc_assert (mp != max_mp);
16213
16214   if (max_mp == NULL)
16215     {
16216       if (max_address < mp->max_address)
16217         mp->max_address = max_address;
16218     }
16219   else
16220     {
16221       if (max_address > max_mp->max_address - mp->fix_size)
16222         mp->max_address = max_mp->max_address - mp->fix_size;
16223       else
16224         mp->max_address = max_address;
16225
16226       /* Unlink MP from its current position.  Since max_mp is non-null,
16227        mp->prev must be non-null.  */
16228       mp->prev->next = mp->next;
16229       if (mp->next != NULL)
16230         mp->next->prev = mp->prev;
16231       else
16232         minipool_vector_tail = mp->prev;
16233
16234       /* Re-insert it before MAX_MP.  */
16235       mp->next = max_mp;
16236       mp->prev = max_mp->prev;
16237       max_mp->prev = mp;
16238
16239       if (mp->prev != NULL)
16240         mp->prev->next = mp;
16241       else
16242         minipool_vector_head = mp;
16243     }
16244
16245   /* Save the new entry.  */
16246   max_mp = mp;
16247
16248   /* Scan over the preceding entries and adjust their addresses as
16249      required.  */
16250   while (mp->prev != NULL
16251          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16252     {
16253       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16254       mp = mp->prev;
16255     }
16256
16257   return max_mp;
16258 }
16259
16260 /* Add a constant to the minipool for a forward reference.  Returns the
16261    node added or NULL if the constant will not fit in this pool.  */
16262 static Mnode *
16263 add_minipool_forward_ref (Mfix *fix)
16264 {
16265   /* If set, max_mp is the first pool_entry that has a lower
16266      constraint than the one we are trying to add.  */
16267   Mnode *       max_mp = NULL;
16268   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16269   Mnode *       mp;
16270
16271   /* If the minipool starts before the end of FIX->INSN then this FIX
16272      can not be placed into the current pool.  Furthermore, adding the
16273      new constant pool entry may cause the pool to start FIX_SIZE bytes
16274      earlier.  */
16275   if (minipool_vector_head &&
16276       (fix->address + get_attr_length (fix->insn)
16277        >= minipool_vector_head->max_address - fix->fix_size))
16278     return NULL;
16279
16280   /* Scan the pool to see if a constant with the same value has
16281      already been added.  While we are doing this, also note the
16282      location where we must insert the constant if it doesn't already
16283      exist.  */
16284   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16285     {
16286       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16287           && fix->mode == mp->mode
16288           && (!LABEL_P (fix->value)
16289               || (CODE_LABEL_NUMBER (fix->value)
16290                   == CODE_LABEL_NUMBER (mp->value)))
16291           && rtx_equal_p (fix->value, mp->value))
16292         {
16293           /* More than one fix references this entry.  */
16294           mp->refcount++;
16295           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16296         }
16297
16298       /* Note the insertion point if necessary.  */
16299       if (max_mp == NULL
16300           && mp->max_address > max_address)
16301         max_mp = mp;
16302
16303       /* If we are inserting an 8-bytes aligned quantity and
16304          we have not already found an insertion point, then
16305          make sure that all such 8-byte aligned quantities are
16306          placed at the start of the pool.  */
16307       if (ARM_DOUBLEWORD_ALIGN
16308           && max_mp == NULL
16309           && fix->fix_size >= 8
16310           && mp->fix_size < 8)
16311         {
16312           max_mp = mp;
16313           max_address = mp->max_address;
16314         }
16315     }
16316
16317   /* The value is not currently in the minipool, so we need to create
16318      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16319      the end of the list since the placement is less constrained than
16320      any existing entry.  Otherwise, we insert the new fix before
16321      MAX_MP and, if necessary, adjust the constraints on the other
16322      entries.  */
16323   mp = XNEW (Mnode);
16324   mp->fix_size = fix->fix_size;
16325   mp->mode = fix->mode;
16326   mp->value = fix->value;
16327   mp->refcount = 1;
16328   /* Not yet required for a backwards ref.  */
16329   mp->min_address = -65536;
16330
16331   if (max_mp == NULL)
16332     {
16333       mp->max_address = max_address;
16334       mp->next = NULL;
16335       mp->prev = minipool_vector_tail;
16336
16337       if (mp->prev == NULL)
16338         {
16339           minipool_vector_head = mp;
16340           minipool_vector_label = gen_label_rtx ();
16341         }
16342       else
16343         mp->prev->next = mp;
16344
16345       minipool_vector_tail = mp;
16346     }
16347   else
16348     {
16349       if (max_address > max_mp->max_address - mp->fix_size)
16350         mp->max_address = max_mp->max_address - mp->fix_size;
16351       else
16352         mp->max_address = max_address;
16353
16354       mp->next = max_mp;
16355       mp->prev = max_mp->prev;
16356       max_mp->prev = mp;
16357       if (mp->prev != NULL)
16358         mp->prev->next = mp;
16359       else
16360         minipool_vector_head = mp;
16361     }
16362
16363   /* Save the new entry.  */
16364   max_mp = mp;
16365
16366   /* Scan over the preceding entries and adjust their addresses as
16367      required.  */
16368   while (mp->prev != NULL
16369          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16370     {
16371       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16372       mp = mp->prev;
16373     }
16374
16375   return max_mp;
16376 }
16377
16378 static Mnode *
16379 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16380                                 HOST_WIDE_INT  min_address)
16381 {
16382   HOST_WIDE_INT offset;
16383
16384   /* The code below assumes these are different.  */
16385   gcc_assert (mp != min_mp);
16386
16387   if (min_mp == NULL)
16388     {
16389       if (min_address > mp->min_address)
16390         mp->min_address = min_address;
16391     }
16392   else
16393     {
16394       /* We will adjust this below if it is too loose.  */
16395       mp->min_address = min_address;
16396
16397       /* Unlink MP from its current position.  Since min_mp is non-null,
16398          mp->next must be non-null.  */
16399       mp->next->prev = mp->prev;
16400       if (mp->prev != NULL)
16401         mp->prev->next = mp->next;
16402       else
16403         minipool_vector_head = mp->next;
16404
16405       /* Reinsert it after MIN_MP.  */
16406       mp->prev = min_mp;
16407       mp->next = min_mp->next;
16408       min_mp->next = mp;
16409       if (mp->next != NULL)
16410         mp->next->prev = mp;
16411       else
16412         minipool_vector_tail = mp;
16413     }
16414
16415   min_mp = mp;
16416
16417   offset = 0;
16418   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16419     {
16420       mp->offset = offset;
16421       if (mp->refcount > 0)
16422         offset += mp->fix_size;
16423
16424       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16425         mp->next->min_address = mp->min_address + mp->fix_size;
16426     }
16427
16428   return min_mp;
16429 }
16430
16431 /* Add a constant to the minipool for a backward reference.  Returns the
16432    node added or NULL if the constant will not fit in this pool.
16433
16434    Note that the code for insertion for a backwards reference can be
16435    somewhat confusing because the calculated offsets for each fix do
16436    not take into account the size of the pool (which is still under
16437    construction.  */
16438 static Mnode *
16439 add_minipool_backward_ref (Mfix *fix)
16440 {
16441   /* If set, min_mp is the last pool_entry that has a lower constraint
16442      than the one we are trying to add.  */
16443   Mnode *min_mp = NULL;
16444   /* This can be negative, since it is only a constraint.  */
16445   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16446   Mnode *mp;
16447
16448   /* If we can't reach the current pool from this insn, or if we can't
16449      insert this entry at the end of the pool without pushing other
16450      fixes out of range, then we don't try.  This ensures that we
16451      can't fail later on.  */
16452   if (min_address >= minipool_barrier->address
16453       || (minipool_vector_tail->min_address + fix->fix_size
16454           >= minipool_barrier->address))
16455     return NULL;
16456
16457   /* Scan the pool to see if a constant with the same value has
16458      already been added.  While we are doing this, also note the
16459      location where we must insert the constant if it doesn't already
16460      exist.  */
16461   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16462     {
16463       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16464           && fix->mode == mp->mode
16465           && (!LABEL_P (fix->value)
16466               || (CODE_LABEL_NUMBER (fix->value)
16467                   == CODE_LABEL_NUMBER (mp->value)))
16468           && rtx_equal_p (fix->value, mp->value)
16469           /* Check that there is enough slack to move this entry to the
16470              end of the table (this is conservative).  */
16471           && (mp->max_address
16472               > (minipool_barrier->address
16473                  + minipool_vector_tail->offset
16474                  + minipool_vector_tail->fix_size)))
16475         {
16476           mp->refcount++;
16477           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16478         }
16479
16480       if (min_mp != NULL)
16481         mp->min_address += fix->fix_size;
16482       else
16483         {
16484           /* Note the insertion point if necessary.  */
16485           if (mp->min_address < min_address)
16486             {
16487               /* For now, we do not allow the insertion of 8-byte alignment
16488                  requiring nodes anywhere but at the start of the pool.  */
16489               if (ARM_DOUBLEWORD_ALIGN
16490                   && fix->fix_size >= 8 && mp->fix_size < 8)
16491                 return NULL;
16492               else
16493                 min_mp = mp;
16494             }
16495           else if (mp->max_address
16496                    < minipool_barrier->address + mp->offset + fix->fix_size)
16497             {
16498               /* Inserting before this entry would push the fix beyond
16499                  its maximum address (which can happen if we have
16500                  re-located a forwards fix); force the new fix to come
16501                  after it.  */
16502               if (ARM_DOUBLEWORD_ALIGN
16503                   && fix->fix_size >= 8 && mp->fix_size < 8)
16504                 return NULL;
16505               else
16506                 {
16507                   min_mp = mp;
16508                   min_address = mp->min_address + fix->fix_size;
16509                 }
16510             }
16511           /* Do not insert a non-8-byte aligned quantity before 8-byte
16512              aligned quantities.  */
16513           else if (ARM_DOUBLEWORD_ALIGN
16514                    && fix->fix_size < 8
16515                    && mp->fix_size >= 8)
16516             {
16517               min_mp = mp;
16518               min_address = mp->min_address + fix->fix_size;
16519             }
16520         }
16521     }
16522
16523   /* We need to create a new entry.  */
16524   mp = XNEW (Mnode);
16525   mp->fix_size = fix->fix_size;
16526   mp->mode = fix->mode;
16527   mp->value = fix->value;
16528   mp->refcount = 1;
16529   mp->max_address = minipool_barrier->address + 65536;
16530
16531   mp->min_address = min_address;
16532
16533   if (min_mp == NULL)
16534     {
16535       mp->prev = NULL;
16536       mp->next = minipool_vector_head;
16537
16538       if (mp->next == NULL)
16539         {
16540           minipool_vector_tail = mp;
16541           minipool_vector_label = gen_label_rtx ();
16542         }
16543       else
16544         mp->next->prev = mp;
16545
16546       minipool_vector_head = mp;
16547     }
16548   else
16549     {
16550       mp->next = min_mp->next;
16551       mp->prev = min_mp;
16552       min_mp->next = mp;
16553
16554       if (mp->next != NULL)
16555         mp->next->prev = mp;
16556       else
16557         minipool_vector_tail = mp;
16558     }
16559
16560   /* Save the new entry.  */
16561   min_mp = mp;
16562
16563   if (mp->prev)
16564     mp = mp->prev;
16565   else
16566     mp->offset = 0;
16567
16568   /* Scan over the following entries and adjust their offsets.  */
16569   while (mp->next != NULL)
16570     {
16571       if (mp->next->min_address < mp->min_address + mp->fix_size)
16572         mp->next->min_address = mp->min_address + mp->fix_size;
16573
16574       if (mp->refcount)
16575         mp->next->offset = mp->offset + mp->fix_size;
16576       else
16577         mp->next->offset = mp->offset;
16578
16579       mp = mp->next;
16580     }
16581
16582   return min_mp;
16583 }
16584
16585 static void
16586 assign_minipool_offsets (Mfix *barrier)
16587 {
16588   HOST_WIDE_INT offset = 0;
16589   Mnode *mp;
16590
16591   minipool_barrier = barrier;
16592
16593   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16594     {
16595       mp->offset = offset;
16596
16597       if (mp->refcount > 0)
16598         offset += mp->fix_size;
16599     }
16600 }
16601
16602 /* Output the literal table */
16603 static void
16604 dump_minipool (rtx_insn *scan)
16605 {
16606   Mnode * mp;
16607   Mnode * nmp;
16608   int align64 = 0;
16609
16610   if (ARM_DOUBLEWORD_ALIGN)
16611     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16612       if (mp->refcount > 0 && mp->fix_size >= 8)
16613         {
16614           align64 = 1;
16615           break;
16616         }
16617
16618   if (dump_file)
16619     fprintf (dump_file,
16620              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16621              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16622
16623   scan = emit_label_after (gen_label_rtx (), scan);
16624   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16625   scan = emit_label_after (minipool_vector_label, scan);
16626
16627   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16628     {
16629       if (mp->refcount > 0)
16630         {
16631           if (dump_file)
16632             {
16633               fprintf (dump_file,
16634                        ";;  Offset %u, min %ld, max %ld ",
16635                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16636                        (unsigned long) mp->max_address);
16637               arm_print_value (dump_file, mp->value);
16638               fputc ('\n', dump_file);
16639             }
16640
16641           switch (GET_MODE_SIZE (mp->mode))
16642             {
16643 #ifdef HAVE_consttable_1
16644             case 1:
16645               scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16646               break;
16647
16648 #endif
16649 #ifdef HAVE_consttable_2
16650             case 2:
16651               scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16652               break;
16653
16654 #endif
16655 #ifdef HAVE_consttable_4
16656             case 4:
16657               scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16658               break;
16659
16660 #endif
16661 #ifdef HAVE_consttable_8
16662             case 8:
16663               scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16664               break;
16665
16666 #endif
16667 #ifdef HAVE_consttable_16
16668             case 16:
16669               scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16670               break;
16671
16672 #endif
16673             default:
16674               gcc_unreachable ();
16675             }
16676         }
16677
16678       nmp = mp->next;
16679       free (mp);
16680     }
16681
16682   minipool_vector_head = minipool_vector_tail = NULL;
16683   scan = emit_insn_after (gen_consttable_end (), scan);
16684   scan = emit_barrier_after (scan);
16685 }
16686
16687 /* Return the cost of forcibly inserting a barrier after INSN.  */
16688 static int
16689 arm_barrier_cost (rtx insn)
16690 {
16691   /* Basing the location of the pool on the loop depth is preferable,
16692      but at the moment, the basic block information seems to be
16693      corrupt by this stage of the compilation.  */
16694   int base_cost = 50;
16695   rtx next = next_nonnote_insn (insn);
16696
16697   if (next != NULL && LABEL_P (next))
16698     base_cost -= 20;
16699
16700   switch (GET_CODE (insn))
16701     {
16702     case CODE_LABEL:
16703       /* It will always be better to place the table before the label, rather
16704          than after it.  */
16705       return 50;
16706
16707     case INSN:
16708     case CALL_INSN:
16709       return base_cost;
16710
16711     case JUMP_INSN:
16712       return base_cost - 10;
16713
16714     default:
16715       return base_cost + 10;
16716     }
16717 }
16718
16719 /* Find the best place in the insn stream in the range
16720    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16721    Create the barrier by inserting a jump and add a new fix entry for
16722    it.  */
16723 static Mfix *
16724 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16725 {
16726   HOST_WIDE_INT count = 0;
16727   rtx_barrier *barrier;
16728   rtx_insn *from = fix->insn;
16729   /* The instruction after which we will insert the jump.  */
16730   rtx_insn *selected = NULL;
16731   int selected_cost;
16732   /* The address at which the jump instruction will be placed.  */
16733   HOST_WIDE_INT selected_address;
16734   Mfix * new_fix;
16735   HOST_WIDE_INT max_count = max_address - fix->address;
16736   rtx_code_label *label = gen_label_rtx ();
16737
16738   selected_cost = arm_barrier_cost (from);
16739   selected_address = fix->address;
16740
16741   while (from && count < max_count)
16742     {
16743       rtx_jump_table_data *tmp;
16744       int new_cost;
16745
16746       /* This code shouldn't have been called if there was a natural barrier
16747          within range.  */
16748       gcc_assert (!BARRIER_P (from));
16749
16750       /* Count the length of this insn.  This must stay in sync with the
16751          code that pushes minipool fixes.  */
16752       if (LABEL_P (from))
16753         count += get_label_padding (from);
16754       else
16755         count += get_attr_length (from);
16756
16757       /* If there is a jump table, add its length.  */
16758       if (tablejump_p (from, NULL, &tmp))
16759         {
16760           count += get_jump_table_size (tmp);
16761
16762           /* Jump tables aren't in a basic block, so base the cost on
16763              the dispatch insn.  If we select this location, we will
16764              still put the pool after the table.  */
16765           new_cost = arm_barrier_cost (from);
16766
16767           if (count < max_count
16768               && (!selected || new_cost <= selected_cost))
16769             {
16770               selected = tmp;
16771               selected_cost = new_cost;
16772               selected_address = fix->address + count;
16773             }
16774
16775           /* Continue after the dispatch table.  */
16776           from = NEXT_INSN (tmp);
16777           continue;
16778         }
16779
16780       new_cost = arm_barrier_cost (from);
16781
16782       if (count < max_count
16783           && (!selected || new_cost <= selected_cost))
16784         {
16785           selected = from;
16786           selected_cost = new_cost;
16787           selected_address = fix->address + count;
16788         }
16789
16790       from = NEXT_INSN (from);
16791     }
16792
16793   /* Make sure that we found a place to insert the jump.  */
16794   gcc_assert (selected);
16795
16796   /* Make sure we do not split a call and its corresponding
16797      CALL_ARG_LOCATION note.  */
16798   if (CALL_P (selected))
16799     {
16800       rtx_insn *next = NEXT_INSN (selected);
16801       if (next && NOTE_P (next)
16802           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16803           selected = next;
16804     }
16805
16806   /* Create a new JUMP_INSN that branches around a barrier.  */
16807   from = emit_jump_insn_after (gen_jump (label), selected);
16808   JUMP_LABEL (from) = label;
16809   barrier = emit_barrier_after (from);
16810   emit_label_after (label, barrier);
16811
16812   /* Create a minipool barrier entry for the new barrier.  */
16813   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16814   new_fix->insn = barrier;
16815   new_fix->address = selected_address;
16816   new_fix->next = fix->next;
16817   fix->next = new_fix;
16818
16819   return new_fix;
16820 }
16821
16822 /* Record that there is a natural barrier in the insn stream at
16823    ADDRESS.  */
16824 static void
16825 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16826 {
16827   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16828
16829   fix->insn = insn;
16830   fix->address = address;
16831
16832   fix->next = NULL;
16833   if (minipool_fix_head != NULL)
16834     minipool_fix_tail->next = fix;
16835   else
16836     minipool_fix_head = fix;
16837
16838   minipool_fix_tail = fix;
16839 }
16840
16841 /* Record INSN, which will need fixing up to load a value from the
16842    minipool.  ADDRESS is the offset of the insn since the start of the
16843    function; LOC is a pointer to the part of the insn which requires
16844    fixing; VALUE is the constant that must be loaded, which is of type
16845    MODE.  */
16846 static void
16847 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16848                    machine_mode mode, rtx value)
16849 {
16850   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16851
16852   fix->insn = insn;
16853   fix->address = address;
16854   fix->loc = loc;
16855   fix->mode = mode;
16856   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16857   fix->value = value;
16858   fix->forwards = get_attr_pool_range (insn);
16859   fix->backwards = get_attr_neg_pool_range (insn);
16860   fix->minipool = NULL;
16861
16862   /* If an insn doesn't have a range defined for it, then it isn't
16863      expecting to be reworked by this code.  Better to stop now than
16864      to generate duff assembly code.  */
16865   gcc_assert (fix->forwards || fix->backwards);
16866
16867   /* If an entry requires 8-byte alignment then assume all constant pools
16868      require 4 bytes of padding.  Trying to do this later on a per-pool
16869      basis is awkward because existing pool entries have to be modified.  */
16870   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16871     minipool_pad = 4;
16872
16873   if (dump_file)
16874     {
16875       fprintf (dump_file,
16876                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16877                GET_MODE_NAME (mode),
16878                INSN_UID (insn), (unsigned long) address,
16879                -1 * (long)fix->backwards, (long)fix->forwards);
16880       arm_print_value (dump_file, fix->value);
16881       fprintf (dump_file, "\n");
16882     }
16883
16884   /* Add it to the chain of fixes.  */
16885   fix->next = NULL;
16886
16887   if (minipool_fix_head != NULL)
16888     minipool_fix_tail->next = fix;
16889   else
16890     minipool_fix_head = fix;
16891
16892   minipool_fix_tail = fix;
16893 }
16894
16895 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16896    Returns the number of insns needed, or 99 if we always want to synthesize
16897    the value.  */
16898 int
16899 arm_max_const_double_inline_cost ()
16900 {
16901   /* Let the value get synthesized to avoid the use of literal pools.  */
16902   if (arm_disable_literal_pool)
16903     return 99;
16904
16905   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16906 }
16907
16908 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16909    Returns the number of insns needed, or 99 if we don't know how to
16910    do it.  */
16911 int
16912 arm_const_double_inline_cost (rtx val)
16913 {
16914   rtx lowpart, highpart;
16915   machine_mode mode;
16916
16917   mode = GET_MODE (val);
16918
16919   if (mode == VOIDmode)
16920     mode = DImode;
16921
16922   gcc_assert (GET_MODE_SIZE (mode) == 8);
16923
16924   lowpart = gen_lowpart (SImode, val);
16925   highpart = gen_highpart_mode (SImode, mode, val);
16926
16927   gcc_assert (CONST_INT_P (lowpart));
16928   gcc_assert (CONST_INT_P (highpart));
16929
16930   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16931                             NULL_RTX, NULL_RTX, 0, 0)
16932           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16933                               NULL_RTX, NULL_RTX, 0, 0));
16934 }
16935
16936 /* Cost of loading a SImode constant.  */
16937 static inline int
16938 arm_const_inline_cost (enum rtx_code code, rtx val)
16939 {
16940   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16941                            NULL_RTX, NULL_RTX, 1, 0);
16942 }
16943
16944 /* Return true if it is worthwhile to split a 64-bit constant into two
16945    32-bit operations.  This is the case if optimizing for size, or
16946    if we have load delay slots, or if one 32-bit part can be done with
16947    a single data operation.  */
16948 bool
16949 arm_const_double_by_parts (rtx val)
16950 {
16951   machine_mode mode = GET_MODE (val);
16952   rtx part;
16953
16954   if (optimize_size || arm_ld_sched)
16955     return true;
16956
16957   if (mode == VOIDmode)
16958     mode = DImode;
16959
16960   part = gen_highpart_mode (SImode, mode, val);
16961
16962   gcc_assert (CONST_INT_P (part));
16963
16964   if (const_ok_for_arm (INTVAL (part))
16965       || const_ok_for_arm (~INTVAL (part)))
16966     return true;
16967
16968   part = gen_lowpart (SImode, val);
16969
16970   gcc_assert (CONST_INT_P (part));
16971
16972   if (const_ok_for_arm (INTVAL (part))
16973       || const_ok_for_arm (~INTVAL (part)))
16974     return true;
16975
16976   return false;
16977 }
16978
16979 /* Return true if it is possible to inline both the high and low parts
16980    of a 64-bit constant into 32-bit data processing instructions.  */
16981 bool
16982 arm_const_double_by_immediates (rtx val)
16983 {
16984   machine_mode mode = GET_MODE (val);
16985   rtx part;
16986
16987   if (mode == VOIDmode)
16988     mode = DImode;
16989
16990   part = gen_highpart_mode (SImode, mode, val);
16991
16992   gcc_assert (CONST_INT_P (part));
16993
16994   if (!const_ok_for_arm (INTVAL (part)))
16995     return false;
16996
16997   part = gen_lowpart (SImode, val);
16998
16999   gcc_assert (CONST_INT_P (part));
17000
17001   if (!const_ok_for_arm (INTVAL (part)))
17002     return false;
17003
17004   return true;
17005 }
17006
17007 /* Scan INSN and note any of its operands that need fixing.
17008    If DO_PUSHES is false we do not actually push any of the fixups
17009    needed.  */
17010 static void
17011 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17012 {
17013   int opno;
17014
17015   extract_constrain_insn (insn);
17016
17017   if (recog_data.n_alternatives == 0)
17018     return;
17019
17020   /* Fill in recog_op_alt with information about the constraints of
17021      this insn.  */
17022   preprocess_constraints (insn);
17023
17024   const operand_alternative *op_alt = which_op_alt ();
17025   for (opno = 0; opno < recog_data.n_operands; opno++)
17026     {
17027       /* Things we need to fix can only occur in inputs.  */
17028       if (recog_data.operand_type[opno] != OP_IN)
17029         continue;
17030
17031       /* If this alternative is a memory reference, then any mention
17032          of constants in this alternative is really to fool reload
17033          into allowing us to accept one there.  We need to fix them up
17034          now so that we output the right code.  */
17035       if (op_alt[opno].memory_ok)
17036         {
17037           rtx op = recog_data.operand[opno];
17038
17039           if (CONSTANT_P (op))
17040             {
17041               if (do_pushes)
17042                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17043                                    recog_data.operand_mode[opno], op);
17044             }
17045           else if (MEM_P (op)
17046                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17047                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17048             {
17049               if (do_pushes)
17050                 {
17051                   rtx cop = avoid_constant_pool_reference (op);
17052
17053                   /* Casting the address of something to a mode narrower
17054                      than a word can cause avoid_constant_pool_reference()
17055                      to return the pool reference itself.  That's no good to
17056                      us here.  Lets just hope that we can use the
17057                      constant pool value directly.  */
17058                   if (op == cop)
17059                     cop = get_pool_constant (XEXP (op, 0));
17060
17061                   push_minipool_fix (insn, address,
17062                                      recog_data.operand_loc[opno],
17063                                      recog_data.operand_mode[opno], cop);
17064                 }
17065
17066             }
17067         }
17068     }
17069
17070   return;
17071 }
17072
17073 /* Rewrite move insn into subtract of 0 if the condition codes will
17074    be useful in next conditional jump insn.  */
17075
17076 static void
17077 thumb1_reorg (void)
17078 {
17079   basic_block bb;
17080
17081   FOR_EACH_BB_FN (bb, cfun)
17082     {
17083       rtx dest, src;
17084       rtx pat, op0, set = NULL;
17085       rtx_insn *prev, *insn = BB_END (bb);
17086       bool insn_clobbered = false;
17087
17088       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17089         insn = PREV_INSN (insn);
17090
17091       /* Find the last cbranchsi4_insn in basic block BB.  */
17092       if (insn == BB_HEAD (bb)
17093           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17094         continue;
17095
17096       /* Get the register with which we are comparing.  */
17097       pat = PATTERN (insn);
17098       op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17099
17100       /* Find the first flag setting insn before INSN in basic block BB.  */
17101       gcc_assert (insn != BB_HEAD (bb));
17102       for (prev = PREV_INSN (insn);
17103            (!insn_clobbered
17104             && prev != BB_HEAD (bb)
17105             && (NOTE_P (prev)
17106                 || DEBUG_INSN_P (prev)
17107                 || ((set = single_set (prev)) != NULL
17108                     && get_attr_conds (prev) == CONDS_NOCOND)));
17109            prev = PREV_INSN (prev))
17110         {
17111           if (reg_set_p (op0, prev))
17112             insn_clobbered = true;
17113         }
17114
17115       /* Skip if op0 is clobbered by insn other than prev. */
17116       if (insn_clobbered)
17117         continue;
17118
17119       if (!set)
17120         continue;
17121
17122       dest = SET_DEST (set);
17123       src = SET_SRC (set);
17124       if (!low_register_operand (dest, SImode)
17125           || !low_register_operand (src, SImode))
17126         continue;
17127
17128       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17129          in INSN.  Both src and dest of the move insn are checked.  */
17130       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17131         {
17132           dest = copy_rtx (dest);
17133           src = copy_rtx (src);
17134           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17135           PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17136           INSN_CODE (prev) = -1;
17137           /* Set test register in INSN to dest.  */
17138           XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17139           INSN_CODE (insn) = -1;
17140         }
17141     }
17142 }
17143
17144 /* Convert instructions to their cc-clobbering variant if possible, since
17145    that allows us to use smaller encodings.  */
17146
17147 static void
17148 thumb2_reorg (void)
17149 {
17150   basic_block bb;
17151   regset_head live;
17152
17153   INIT_REG_SET (&live);
17154
17155   /* We are freeing block_for_insn in the toplev to keep compatibility
17156      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17157   compute_bb_for_insn ();
17158   df_analyze ();
17159
17160   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17161
17162   FOR_EACH_BB_FN (bb, cfun)
17163     {
17164       if (current_tune->disparage_flag_setting_t16_encodings
17165           && optimize_bb_for_speed_p (bb))
17166         continue;
17167
17168       rtx_insn *insn;
17169       Convert_Action action = SKIP;
17170       Convert_Action action_for_partial_flag_setting
17171         = (current_tune->disparage_partial_flag_setting_t16_encodings
17172            && optimize_bb_for_speed_p (bb))
17173           ? SKIP : CONV;
17174
17175       COPY_REG_SET (&live, DF_LR_OUT (bb));
17176       df_simulate_initialize_backwards (bb, &live);
17177       FOR_BB_INSNS_REVERSE (bb, insn)
17178         {
17179           if (NONJUMP_INSN_P (insn)
17180               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17181               && GET_CODE (PATTERN (insn)) == SET)
17182             {
17183               action = SKIP;
17184               rtx pat = PATTERN (insn);
17185               rtx dst = XEXP (pat, 0);
17186               rtx src = XEXP (pat, 1);
17187               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17188
17189               if (!OBJECT_P (src))
17190                   op0 = XEXP (src, 0);
17191
17192               if (BINARY_P (src))
17193                   op1 = XEXP (src, 1);
17194
17195               if (low_register_operand (dst, SImode))
17196                 {
17197                   switch (GET_CODE (src))
17198                     {
17199                     case PLUS:
17200                       /* Adding two registers and storing the result
17201                          in the first source is already a 16-bit
17202                          operation.  */
17203                       if (rtx_equal_p (dst, op0)
17204                           && register_operand (op1, SImode))
17205                         break;
17206
17207                       if (low_register_operand (op0, SImode))
17208                         {
17209                           /* ADDS <Rd>,<Rn>,<Rm>  */
17210                           if (low_register_operand (op1, SImode))
17211                             action = CONV;
17212                           /* ADDS <Rdn>,#<imm8>  */
17213                           /* SUBS <Rdn>,#<imm8>  */
17214                           else if (rtx_equal_p (dst, op0)
17215                                    && CONST_INT_P (op1)
17216                                    && IN_RANGE (INTVAL (op1), -255, 255))
17217                             action = CONV;
17218                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17219                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17220                           else if (CONST_INT_P (op1)
17221                                    && IN_RANGE (INTVAL (op1), -7, 7))
17222                             action = CONV;
17223                         }
17224                       /* ADCS <Rd>, <Rn>  */
17225                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17226                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17227                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17228                                                        SImode)
17229                               && COMPARISON_P (op1)
17230                               && cc_register (XEXP (op1, 0), VOIDmode)
17231                               && maybe_get_arm_condition_code (op1) == ARM_CS
17232                               && XEXP (op1, 1) == const0_rtx)
17233                         action = CONV;
17234                       break;
17235
17236                     case MINUS:
17237                       /* RSBS <Rd>,<Rn>,#0
17238                          Not handled here: see NEG below.  */
17239                       /* SUBS <Rd>,<Rn>,#<imm3>
17240                          SUBS <Rdn>,#<imm8>
17241                          Not handled here: see PLUS above.  */
17242                       /* SUBS <Rd>,<Rn>,<Rm>  */
17243                       if (low_register_operand (op0, SImode)
17244                           && low_register_operand (op1, SImode))
17245                             action = CONV;
17246                       break;
17247
17248                     case MULT:
17249                       /* MULS <Rdm>,<Rn>,<Rdm>
17250                          As an exception to the rule, this is only used
17251                          when optimizing for size since MULS is slow on all
17252                          known implementations.  We do not even want to use
17253                          MULS in cold code, if optimizing for speed, so we
17254                          test the global flag here.  */
17255                       if (!optimize_size)
17256                         break;
17257                       /* else fall through.  */
17258                     case AND:
17259                     case IOR:
17260                     case XOR:
17261                       /* ANDS <Rdn>,<Rm>  */
17262                       if (rtx_equal_p (dst, op0)
17263                           && low_register_operand (op1, SImode))
17264                         action = action_for_partial_flag_setting;
17265                       else if (rtx_equal_p (dst, op1)
17266                                && low_register_operand (op0, SImode))
17267                         action = action_for_partial_flag_setting == SKIP
17268                                  ? SKIP : SWAP_CONV;
17269                       break;
17270
17271                     case ASHIFTRT:
17272                     case ASHIFT:
17273                     case LSHIFTRT:
17274                       /* ASRS <Rdn>,<Rm> */
17275                       /* LSRS <Rdn>,<Rm> */
17276                       /* LSLS <Rdn>,<Rm> */
17277                       if (rtx_equal_p (dst, op0)
17278                           && low_register_operand (op1, SImode))
17279                         action = action_for_partial_flag_setting;
17280                       /* ASRS <Rd>,<Rm>,#<imm5> */
17281                       /* LSRS <Rd>,<Rm>,#<imm5> */
17282                       /* LSLS <Rd>,<Rm>,#<imm5> */
17283                       else if (low_register_operand (op0, SImode)
17284                                && CONST_INT_P (op1)
17285                                && IN_RANGE (INTVAL (op1), 0, 31))
17286                         action = action_for_partial_flag_setting;
17287                       break;
17288
17289                     case ROTATERT:
17290                       /* RORS <Rdn>,<Rm>  */
17291                       if (rtx_equal_p (dst, op0)
17292                           && low_register_operand (op1, SImode))
17293                         action = action_for_partial_flag_setting;
17294                       break;
17295
17296                     case NOT:
17297                       /* MVNS <Rd>,<Rm>  */
17298                       if (low_register_operand (op0, SImode))
17299                         action = action_for_partial_flag_setting;
17300                       break;
17301
17302                     case NEG:
17303                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17304                       if (low_register_operand (op0, SImode))
17305                         action = CONV;
17306                       break;
17307
17308                     case CONST_INT:
17309                       /* MOVS <Rd>,#<imm8>  */
17310                       if (CONST_INT_P (src)
17311                           && IN_RANGE (INTVAL (src), 0, 255))
17312                         action = action_for_partial_flag_setting;
17313                       break;
17314
17315                     case REG:
17316                       /* MOVS and MOV<c> with registers have different
17317                          encodings, so are not relevant here.  */
17318                       break;
17319
17320                     default:
17321                       break;
17322                     }
17323                 }
17324
17325               if (action != SKIP)
17326                 {
17327                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17328                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17329                   rtvec vec;
17330
17331                   if (action == SWAP_CONV)
17332                     {
17333                       src = copy_rtx (src);
17334                       XEXP (src, 0) = op1;
17335                       XEXP (src, 1) = op0;
17336                       pat = gen_rtx_SET (VOIDmode, dst, src);
17337                       vec = gen_rtvec (2, pat, clobber);
17338                     }
17339                   else /* action == CONV */
17340                     vec = gen_rtvec (2, pat, clobber);
17341
17342                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17343                   INSN_CODE (insn) = -1;
17344                 }
17345             }
17346
17347           if (NONDEBUG_INSN_P (insn))
17348             df_simulate_one_insn_backwards (bb, insn, &live);
17349         }
17350     }
17351
17352   CLEAR_REG_SET (&live);
17353 }
17354
17355 /* Gcc puts the pool in the wrong place for ARM, since we can only
17356    load addresses a limited distance around the pc.  We do some
17357    special munging to move the constant pool values to the correct
17358    point in the code.  */
17359 static void
17360 arm_reorg (void)
17361 {
17362   rtx_insn *insn;
17363   HOST_WIDE_INT address = 0;
17364   Mfix * fix;
17365
17366   if (TARGET_THUMB1)
17367     thumb1_reorg ();
17368   else if (TARGET_THUMB2)
17369     thumb2_reorg ();
17370
17371   /* Ensure all insns that must be split have been split at this point.
17372      Otherwise, the pool placement code below may compute incorrect
17373      insn lengths.  Note that when optimizing, all insns have already
17374      been split at this point.  */
17375   if (!optimize)
17376     split_all_insns_noflow ();
17377
17378   minipool_fix_head = minipool_fix_tail = NULL;
17379
17380   /* The first insn must always be a note, or the code below won't
17381      scan it properly.  */
17382   insn = get_insns ();
17383   gcc_assert (NOTE_P (insn));
17384   minipool_pad = 0;
17385
17386   /* Scan all the insns and record the operands that will need fixing.  */
17387   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17388     {
17389       if (BARRIER_P (insn))
17390         push_minipool_barrier (insn, address);
17391       else if (INSN_P (insn))
17392         {
17393           rtx_jump_table_data *table;
17394
17395           note_invalid_constants (insn, address, true);
17396           address += get_attr_length (insn);
17397
17398           /* If the insn is a vector jump, add the size of the table
17399              and skip the table.  */
17400           if (tablejump_p (insn, NULL, &table))
17401             {
17402               address += get_jump_table_size (table);
17403               insn = table;
17404             }
17405         }
17406       else if (LABEL_P (insn))
17407         /* Add the worst-case padding due to alignment.  We don't add
17408            the _current_ padding because the minipool insertions
17409            themselves might change it.  */
17410         address += get_label_padding (insn);
17411     }
17412
17413   fix = minipool_fix_head;
17414
17415   /* Now scan the fixups and perform the required changes.  */
17416   while (fix)
17417     {
17418       Mfix * ftmp;
17419       Mfix * fdel;
17420       Mfix *  last_added_fix;
17421       Mfix * last_barrier = NULL;
17422       Mfix * this_fix;
17423
17424       /* Skip any further barriers before the next fix.  */
17425       while (fix && BARRIER_P (fix->insn))
17426         fix = fix->next;
17427
17428       /* No more fixes.  */
17429       if (fix == NULL)
17430         break;
17431
17432       last_added_fix = NULL;
17433
17434       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17435         {
17436           if (BARRIER_P (ftmp->insn))
17437             {
17438               if (ftmp->address >= minipool_vector_head->max_address)
17439                 break;
17440
17441               last_barrier = ftmp;
17442             }
17443           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17444             break;
17445
17446           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17447         }
17448
17449       /* If we found a barrier, drop back to that; any fixes that we
17450          could have reached but come after the barrier will now go in
17451          the next mini-pool.  */
17452       if (last_barrier != NULL)
17453         {
17454           /* Reduce the refcount for those fixes that won't go into this
17455              pool after all.  */
17456           for (fdel = last_barrier->next;
17457                fdel && fdel != ftmp;
17458                fdel = fdel->next)
17459             {
17460               fdel->minipool->refcount--;
17461               fdel->minipool = NULL;
17462             }
17463
17464           ftmp = last_barrier;
17465         }
17466       else
17467         {
17468           /* ftmp is first fix that we can't fit into this pool and
17469              there no natural barriers that we could use.  Insert a
17470              new barrier in the code somewhere between the previous
17471              fix and this one, and arrange to jump around it.  */
17472           HOST_WIDE_INT max_address;
17473
17474           /* The last item on the list of fixes must be a barrier, so
17475              we can never run off the end of the list of fixes without
17476              last_barrier being set.  */
17477           gcc_assert (ftmp);
17478
17479           max_address = minipool_vector_head->max_address;
17480           /* Check that there isn't another fix that is in range that
17481              we couldn't fit into this pool because the pool was
17482              already too large: we need to put the pool before such an
17483              instruction.  The pool itself may come just after the
17484              fix because create_fix_barrier also allows space for a
17485              jump instruction.  */
17486           if (ftmp->address < max_address)
17487             max_address = ftmp->address + 1;
17488
17489           last_barrier = create_fix_barrier (last_added_fix, max_address);
17490         }
17491
17492       assign_minipool_offsets (last_barrier);
17493
17494       while (ftmp)
17495         {
17496           if (!BARRIER_P (ftmp->insn)
17497               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17498                   == NULL))
17499             break;
17500
17501           ftmp = ftmp->next;
17502         }
17503
17504       /* Scan over the fixes we have identified for this pool, fixing them
17505          up and adding the constants to the pool itself.  */
17506       for (this_fix = fix; this_fix && ftmp != this_fix;
17507            this_fix = this_fix->next)
17508         if (!BARRIER_P (this_fix->insn))
17509           {
17510             rtx addr
17511               = plus_constant (Pmode,
17512                                gen_rtx_LABEL_REF (VOIDmode,
17513                                                   minipool_vector_label),
17514                                this_fix->minipool->offset);
17515             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17516           }
17517
17518       dump_minipool (last_barrier->insn);
17519       fix = ftmp;
17520     }
17521
17522   /* From now on we must synthesize any constants that we can't handle
17523      directly.  This can happen if the RTL gets split during final
17524      instruction generation.  */
17525   cfun->machine->after_arm_reorg = 1;
17526
17527   /* Free the minipool memory.  */
17528   obstack_free (&minipool_obstack, minipool_startobj);
17529 }
17530 \f
17531 /* Routines to output assembly language.  */
17532
17533 /* Return string representation of passed in real value.  */
17534 static const char *
17535 fp_const_from_val (REAL_VALUE_TYPE *r)
17536 {
17537   if (!fp_consts_inited)
17538     init_fp_table ();
17539
17540   gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17541   return "0";
17542 }
17543
17544 /* OPERANDS[0] is the entire list of insns that constitute pop,
17545    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17546    is in the list, UPDATE is true iff the list contains explicit
17547    update of base register.  */
17548 void
17549 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17550                          bool update)
17551 {
17552   int i;
17553   char pattern[100];
17554   int offset;
17555   const char *conditional;
17556   int num_saves = XVECLEN (operands[0], 0);
17557   unsigned int regno;
17558   unsigned int regno_base = REGNO (operands[1]);
17559
17560   offset = 0;
17561   offset += update ? 1 : 0;
17562   offset += return_pc ? 1 : 0;
17563
17564   /* Is the base register in the list?  */
17565   for (i = offset; i < num_saves; i++)
17566     {
17567       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17568       /* If SP is in the list, then the base register must be SP.  */
17569       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17570       /* If base register is in the list, there must be no explicit update.  */
17571       if (regno == regno_base)
17572         gcc_assert (!update);
17573     }
17574
17575   conditional = reverse ? "%?%D0" : "%?%d0";
17576   if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17577     {
17578       /* Output pop (not stmfd) because it has a shorter encoding.  */
17579       gcc_assert (update);
17580       sprintf (pattern, "pop%s\t{", conditional);
17581     }
17582   else
17583     {
17584       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17585          It's just a convention, their semantics are identical.  */
17586       if (regno_base == SP_REGNUM)
17587         sprintf (pattern, "ldm%sfd\t", conditional);
17588       else if (TARGET_UNIFIED_ASM)
17589         sprintf (pattern, "ldmia%s\t", conditional);
17590       else
17591         sprintf (pattern, "ldm%sia\t", conditional);
17592
17593       strcat (pattern, reg_names[regno_base]);
17594       if (update)
17595         strcat (pattern, "!, {");
17596       else
17597         strcat (pattern, ", {");
17598     }
17599
17600   /* Output the first destination register.  */
17601   strcat (pattern,
17602           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17603
17604   /* Output the rest of the destination registers.  */
17605   for (i = offset + 1; i < num_saves; i++)
17606     {
17607       strcat (pattern, ", ");
17608       strcat (pattern,
17609               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17610     }
17611
17612   strcat (pattern, "}");
17613
17614   if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17615     strcat (pattern, "^");
17616
17617   output_asm_insn (pattern, &cond);
17618 }
17619
17620
17621 /* Output the assembly for a store multiple.  */
17622
17623 const char *
17624 vfp_output_vstmd (rtx * operands)
17625 {
17626   char pattern[100];
17627   int p;
17628   int base;
17629   int i;
17630   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17631                    ? XEXP (operands[0], 0)
17632                    : XEXP (XEXP (operands[0], 0), 0);
17633   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17634
17635   if (push_p)
17636     strcpy (pattern, "vpush%?.64\t{%P1");
17637   else
17638     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17639
17640   p = strlen (pattern);
17641
17642   gcc_assert (REG_P (operands[1]));
17643
17644   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17645   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17646     {
17647       p += sprintf (&pattern[p], ", d%d", base + i);
17648     }
17649   strcpy (&pattern[p], "}");
17650
17651   output_asm_insn (pattern, operands);
17652   return "";
17653 }
17654
17655
17656 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17657    number of bytes pushed.  */
17658
17659 static int
17660 vfp_emit_fstmd (int base_reg, int count)
17661 {
17662   rtx par;
17663   rtx dwarf;
17664   rtx tmp, reg;
17665   int i;
17666
17667   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17668      register pairs are stored by a store multiple insn.  We avoid this
17669      by pushing an extra pair.  */
17670   if (count == 2 && !arm_arch6)
17671     {
17672       if (base_reg == LAST_VFP_REGNUM - 3)
17673         base_reg -= 2;
17674       count++;
17675     }
17676
17677   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17678      larger stores into multiple parts (up to a maximum of two, in
17679      practice).  */
17680   if (count > 16)
17681     {
17682       int saved;
17683       /* NOTE: base_reg is an internal register number, so each D register
17684          counts as 2.  */
17685       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17686       saved += vfp_emit_fstmd (base_reg, 16);
17687       return saved;
17688     }
17689
17690   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17691   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17692
17693   reg = gen_rtx_REG (DFmode, base_reg);
17694   base_reg += 2;
17695
17696   XVECEXP (par, 0, 0)
17697     = gen_rtx_SET (VOIDmode,
17698                    gen_frame_mem
17699                    (BLKmode,
17700                     gen_rtx_PRE_MODIFY (Pmode,
17701                                         stack_pointer_rtx,
17702                                         plus_constant
17703                                         (Pmode, stack_pointer_rtx,
17704                                          - (count * 8)))
17705                     ),
17706                    gen_rtx_UNSPEC (BLKmode,
17707                                    gen_rtvec (1, reg),
17708                                    UNSPEC_PUSH_MULT));
17709
17710   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17711                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17712   RTX_FRAME_RELATED_P (tmp) = 1;
17713   XVECEXP (dwarf, 0, 0) = tmp;
17714
17715   tmp = gen_rtx_SET (VOIDmode,
17716                      gen_frame_mem (DFmode, stack_pointer_rtx),
17717                      reg);
17718   RTX_FRAME_RELATED_P (tmp) = 1;
17719   XVECEXP (dwarf, 0, 1) = tmp;
17720
17721   for (i = 1; i < count; i++)
17722     {
17723       reg = gen_rtx_REG (DFmode, base_reg);
17724       base_reg += 2;
17725       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17726
17727       tmp = gen_rtx_SET (VOIDmode,
17728                          gen_frame_mem (DFmode,
17729                                         plus_constant (Pmode,
17730                                                        stack_pointer_rtx,
17731                                                        i * 8)),
17732                          reg);
17733       RTX_FRAME_RELATED_P (tmp) = 1;
17734       XVECEXP (dwarf, 0, i + 1) = tmp;
17735     }
17736
17737   par = emit_insn (par);
17738   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17739   RTX_FRAME_RELATED_P (par) = 1;
17740
17741   return count * 8;
17742 }
17743
17744 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17745    the call target.  */
17746
17747 void
17748 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17749 {
17750   rtx insn;
17751
17752   insn = emit_call_insn (pat);
17753
17754   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17755      If the call might use such an entry, add a use of the PIC register
17756      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17757   if (TARGET_VXWORKS_RTP
17758       && flag_pic
17759       && !sibcall
17760       && GET_CODE (addr) == SYMBOL_REF
17761       && (SYMBOL_REF_DECL (addr)
17762           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17763           : !SYMBOL_REF_LOCAL_P (addr)))
17764     {
17765       require_pic_register ();
17766       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17767     }
17768
17769   if (TARGET_AAPCS_BASED)
17770     {
17771       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17772          linker.  We need to add an IP clobber to allow setting
17773          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17774          is not needed since it's a fixed register.  */
17775       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17776       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17777     }
17778 }
17779
17780 /* Output a 'call' insn.  */
17781 const char *
17782 output_call (rtx *operands)
17783 {
17784   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17785
17786   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17787   if (REGNO (operands[0]) == LR_REGNUM)
17788     {
17789       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17790       output_asm_insn ("mov%?\t%0, %|lr", operands);
17791     }
17792
17793   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17794
17795   if (TARGET_INTERWORK || arm_arch4t)
17796     output_asm_insn ("bx%?\t%0", operands);
17797   else
17798     output_asm_insn ("mov%?\t%|pc, %0", operands);
17799
17800   return "";
17801 }
17802
17803 /* Output a 'call' insn that is a reference in memory. This is
17804    disabled for ARMv5 and we prefer a blx instead because otherwise
17805    there's a significant performance overhead.  */
17806 const char *
17807 output_call_mem (rtx *operands)
17808 {
17809   gcc_assert (!arm_arch5);
17810   if (TARGET_INTERWORK)
17811     {
17812       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17813       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17814       output_asm_insn ("bx%?\t%|ip", operands);
17815     }
17816   else if (regno_use_in (LR_REGNUM, operands[0]))
17817     {
17818       /* LR is used in the memory address.  We load the address in the
17819          first instruction.  It's safe to use IP as the target of the
17820          load since the call will kill it anyway.  */
17821       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17822       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17823       if (arm_arch4t)
17824         output_asm_insn ("bx%?\t%|ip", operands);
17825       else
17826         output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17827     }
17828   else
17829     {
17830       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17831       output_asm_insn ("ldr%?\t%|pc, %0", operands);
17832     }
17833
17834   return "";
17835 }
17836
17837
17838 /* Output a move from arm registers to arm registers of a long double
17839    OPERANDS[0] is the destination.
17840    OPERANDS[1] is the source.  */
17841 const char *
17842 output_mov_long_double_arm_from_arm (rtx *operands)
17843 {
17844   /* We have to be careful here because the two might overlap.  */
17845   int dest_start = REGNO (operands[0]);
17846   int src_start = REGNO (operands[1]);
17847   rtx ops[2];
17848   int i;
17849
17850   if (dest_start < src_start)
17851     {
17852       for (i = 0; i < 3; i++)
17853         {
17854           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17855           ops[1] = gen_rtx_REG (SImode, src_start + i);
17856           output_asm_insn ("mov%?\t%0, %1", ops);
17857         }
17858     }
17859   else
17860     {
17861       for (i = 2; i >= 0; i--)
17862         {
17863           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17864           ops[1] = gen_rtx_REG (SImode, src_start + i);
17865           output_asm_insn ("mov%?\t%0, %1", ops);
17866         }
17867     }
17868
17869   return "";
17870 }
17871
17872 void
17873 arm_emit_movpair (rtx dest, rtx src)
17874  {
17875   /* If the src is an immediate, simplify it.  */
17876   if (CONST_INT_P (src))
17877     {
17878       HOST_WIDE_INT val = INTVAL (src);
17879       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17880       if ((val >> 16) & 0x0000ffff)
17881         emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17882                                              GEN_INT (16)),
17883                        GEN_INT ((val >> 16) & 0x0000ffff));
17884       return;
17885     }
17886    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17887    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17888  }
17889
17890 /* Output a move between double words.  It must be REG<-MEM
17891    or MEM<-REG.  */
17892 const char *
17893 output_move_double (rtx *operands, bool emit, int *count)
17894 {
17895   enum rtx_code code0 = GET_CODE (operands[0]);
17896   enum rtx_code code1 = GET_CODE (operands[1]);
17897   rtx otherops[3];
17898   if (count)
17899     *count = 1;
17900
17901   /* The only case when this might happen is when
17902      you are looking at the length of a DImode instruction
17903      that has an invalid constant in it.  */
17904   if (code0 == REG && code1 != MEM)
17905     {
17906       gcc_assert (!emit);
17907       *count = 2;
17908       return "";
17909     }
17910
17911   if (code0 == REG)
17912     {
17913       unsigned int reg0 = REGNO (operands[0]);
17914
17915       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17916
17917       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17918
17919       switch (GET_CODE (XEXP (operands[1], 0)))
17920         {
17921         case REG:
17922
17923           if (emit)
17924             {
17925               if (TARGET_LDRD
17926                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17927                 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17928               else
17929                 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17930             }
17931           break;
17932
17933         case PRE_INC:
17934           gcc_assert (TARGET_LDRD);
17935           if (emit)
17936             output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17937           break;
17938
17939         case PRE_DEC:
17940           if (emit)
17941             {
17942               if (TARGET_LDRD)
17943                 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17944               else
17945                 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17946             }
17947           break;
17948
17949         case POST_INC:
17950           if (emit)
17951             {
17952               if (TARGET_LDRD)
17953                 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17954               else
17955                 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17956             }
17957           break;
17958
17959         case POST_DEC:
17960           gcc_assert (TARGET_LDRD);
17961           if (emit)
17962             output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17963           break;
17964
17965         case PRE_MODIFY:
17966         case POST_MODIFY:
17967           /* Autoicrement addressing modes should never have overlapping
17968              base and destination registers, and overlapping index registers
17969              are already prohibited, so this doesn't need to worry about
17970              fix_cm3_ldrd.  */
17971           otherops[0] = operands[0];
17972           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17973           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17974
17975           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17976             {
17977               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17978                 {
17979                   /* Registers overlap so split out the increment.  */
17980                   if (emit)
17981                     {
17982                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
17983                       output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17984                     }
17985                   if (count)
17986                     *count = 2;
17987                 }
17988               else
17989                 {
17990                   /* Use a single insn if we can.
17991                      FIXME: IWMMXT allows offsets larger than ldrd can
17992                      handle, fix these up with a pair of ldr.  */
17993                   if (TARGET_THUMB2
17994                       || !CONST_INT_P (otherops[2])
17995                       || (INTVAL (otherops[2]) > -256
17996                           && INTVAL (otherops[2]) < 256))
17997                     {
17998                       if (emit)
17999                         output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18000                     }
18001                   else
18002                     {
18003                       if (emit)
18004                         {
18005                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18006                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18007                         }
18008                       if (count)
18009                         *count = 2;
18010
18011                     }
18012                 }
18013             }
18014           else
18015             {
18016               /* Use a single insn if we can.
18017                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18018                  fix these up with a pair of ldr.  */
18019               if (TARGET_THUMB2
18020                   || !CONST_INT_P (otherops[2])
18021                   || (INTVAL (otherops[2]) > -256
18022                       && INTVAL (otherops[2]) < 256))
18023                 {
18024                   if (emit)
18025                     output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18026                 }
18027               else
18028                 {
18029                   if (emit)
18030                     {
18031                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18032                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18033                     }
18034                   if (count)
18035                     *count = 2;
18036                 }
18037             }
18038           break;
18039
18040         case LABEL_REF:
18041         case CONST:
18042           /* We might be able to use ldrd %0, %1 here.  However the range is
18043              different to ldr/adr, and it is broken on some ARMv7-M
18044              implementations.  */
18045           /* Use the second register of the pair to avoid problematic
18046              overlap.  */
18047           otherops[1] = operands[1];
18048           if (emit)
18049             output_asm_insn ("adr%?\t%0, %1", otherops);
18050           operands[1] = otherops[0];
18051           if (emit)
18052             {
18053               if (TARGET_LDRD)
18054                 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18055               else
18056                 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18057             }
18058
18059           if (count)
18060             *count = 2;
18061           break;
18062
18063           /* ??? This needs checking for thumb2.  */
18064         default:
18065           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18066                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18067             {
18068               otherops[0] = operands[0];
18069               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18070               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18071
18072               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18073                 {
18074                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18075                     {
18076                       switch ((int) INTVAL (otherops[2]))
18077                         {
18078                         case -8:
18079                           if (emit)
18080                             output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18081                           return "";
18082                         case -4:
18083                           if (TARGET_THUMB2)
18084                             break;
18085                           if (emit)
18086                             output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18087                           return "";
18088                         case 4:
18089                           if (TARGET_THUMB2)
18090                             break;
18091                           if (emit)
18092                             output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18093                           return "";
18094                         }
18095                     }
18096                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18097                   operands[1] = otherops[0];
18098                   if (TARGET_LDRD
18099                       && (REG_P (otherops[2])
18100                           || TARGET_THUMB2
18101                           || (CONST_INT_P (otherops[2])
18102                               && INTVAL (otherops[2]) > -256
18103                               && INTVAL (otherops[2]) < 256)))
18104                     {
18105                       if (reg_overlap_mentioned_p (operands[0],
18106                                                    otherops[2]))
18107                         {
18108                           /* Swap base and index registers over to
18109                              avoid a conflict.  */
18110                           std::swap (otherops[1], otherops[2]);
18111                         }
18112                       /* If both registers conflict, it will usually
18113                          have been fixed by a splitter.  */
18114                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18115                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18116                         {
18117                           if (emit)
18118                             {
18119                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18120                               output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18121                             }
18122                           if (count)
18123                             *count = 2;
18124                         }
18125                       else
18126                         {
18127                           otherops[0] = operands[0];
18128                           if (emit)
18129                             output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18130                         }
18131                       return "";
18132                     }
18133
18134                   if (CONST_INT_P (otherops[2]))
18135                     {
18136                       if (emit)
18137                         {
18138                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18139                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18140                           else
18141                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18142                         }
18143                     }
18144                   else
18145                     {
18146                       if (emit)
18147                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18148                     }
18149                 }
18150               else
18151                 {
18152                   if (emit)
18153                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18154                 }
18155
18156               if (count)
18157                 *count = 2;
18158
18159               if (TARGET_LDRD)
18160                 return "ldr%(d%)\t%0, [%1]";
18161
18162               return "ldm%(ia%)\t%1, %M0";
18163             }
18164           else
18165             {
18166               otherops[1] = adjust_address (operands[1], SImode, 4);
18167               /* Take care of overlapping base/data reg.  */
18168               if (reg_mentioned_p (operands[0], operands[1]))
18169                 {
18170                   if (emit)
18171                     {
18172                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18173                       output_asm_insn ("ldr%?\t%0, %1", operands);
18174                     }
18175                   if (count)
18176                     *count = 2;
18177
18178                 }
18179               else
18180                 {
18181                   if (emit)
18182                     {
18183                       output_asm_insn ("ldr%?\t%0, %1", operands);
18184                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18185                     }
18186                   if (count)
18187                     *count = 2;
18188                 }
18189             }
18190         }
18191     }
18192   else
18193     {
18194       /* Constraints should ensure this.  */
18195       gcc_assert (code0 == MEM && code1 == REG);
18196       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18197                   || (TARGET_ARM && TARGET_LDRD));
18198
18199       switch (GET_CODE (XEXP (operands[0], 0)))
18200         {
18201         case REG:
18202           if (emit)
18203             {
18204               if (TARGET_LDRD)
18205                 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18206               else
18207                 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18208             }
18209           break;
18210
18211         case PRE_INC:
18212           gcc_assert (TARGET_LDRD);
18213           if (emit)
18214             output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18215           break;
18216
18217         case PRE_DEC:
18218           if (emit)
18219             {
18220               if (TARGET_LDRD)
18221                 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18222               else
18223                 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18224             }
18225           break;
18226
18227         case POST_INC:
18228           if (emit)
18229             {
18230               if (TARGET_LDRD)
18231                 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18232               else
18233                 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18234             }
18235           break;
18236
18237         case POST_DEC:
18238           gcc_assert (TARGET_LDRD);
18239           if (emit)
18240             output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18241           break;
18242
18243         case PRE_MODIFY:
18244         case POST_MODIFY:
18245           otherops[0] = operands[1];
18246           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18247           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18248
18249           /* IWMMXT allows offsets larger than ldrd can handle,
18250              fix these up with a pair of ldr.  */
18251           if (!TARGET_THUMB2
18252               && CONST_INT_P (otherops[2])
18253               && (INTVAL(otherops[2]) <= -256
18254                   || INTVAL(otherops[2]) >= 256))
18255             {
18256               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18257                 {
18258                   if (emit)
18259                     {
18260                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18261                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18262                     }
18263                   if (count)
18264                     *count = 2;
18265                 }
18266               else
18267                 {
18268                   if (emit)
18269                     {
18270                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18271                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18272                     }
18273                   if (count)
18274                     *count = 2;
18275                 }
18276             }
18277           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18278             {
18279               if (emit)
18280                 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18281             }
18282           else
18283             {
18284               if (emit)
18285                 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18286             }
18287           break;
18288
18289         case PLUS:
18290           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18291           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18292             {
18293               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18294                 {
18295                 case -8:
18296                   if (emit)
18297                     output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18298                   return "";
18299
18300                 case -4:
18301                   if (TARGET_THUMB2)
18302                     break;
18303                   if (emit)
18304                     output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18305                   return "";
18306
18307                 case 4:
18308                   if (TARGET_THUMB2)
18309                     break;
18310                   if (emit)
18311                     output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18312                   return "";
18313                 }
18314             }
18315           if (TARGET_LDRD
18316               && (REG_P (otherops[2])
18317                   || TARGET_THUMB2
18318                   || (CONST_INT_P (otherops[2])
18319                       && INTVAL (otherops[2]) > -256
18320                       && INTVAL (otherops[2]) < 256)))
18321             {
18322               otherops[0] = operands[1];
18323               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18324               if (emit)
18325                 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18326               return "";
18327             }
18328           /* Fall through */
18329
18330         default:
18331           otherops[0] = adjust_address (operands[0], SImode, 4);
18332           otherops[1] = operands[1];
18333           if (emit)
18334             {
18335               output_asm_insn ("str%?\t%1, %0", operands);
18336               output_asm_insn ("str%?\t%H1, %0", otherops);
18337             }
18338           if (count)
18339             *count = 2;
18340         }
18341     }
18342
18343   return "";
18344 }
18345
18346 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18347    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18348
18349 const char *
18350 output_move_quad (rtx *operands)
18351 {
18352   if (REG_P (operands[0]))
18353     {
18354       /* Load, or reg->reg move.  */
18355
18356       if (MEM_P (operands[1]))
18357         {
18358           switch (GET_CODE (XEXP (operands[1], 0)))
18359             {
18360             case REG:
18361               output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18362               break;
18363
18364             case LABEL_REF:
18365             case CONST:
18366               output_asm_insn ("adr%?\t%0, %1", operands);
18367               output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18368               break;
18369
18370             default:
18371               gcc_unreachable ();
18372             }
18373         }
18374       else
18375         {
18376           rtx ops[2];
18377           int dest, src, i;
18378
18379           gcc_assert (REG_P (operands[1]));
18380
18381           dest = REGNO (operands[0]);
18382           src = REGNO (operands[1]);
18383
18384           /* This seems pretty dumb, but hopefully GCC won't try to do it
18385              very often.  */
18386           if (dest < src)
18387             for (i = 0; i < 4; i++)
18388               {
18389                 ops[0] = gen_rtx_REG (SImode, dest + i);
18390                 ops[1] = gen_rtx_REG (SImode, src + i);
18391                 output_asm_insn ("mov%?\t%0, %1", ops);
18392               }
18393           else
18394             for (i = 3; i >= 0; i--)
18395               {
18396                 ops[0] = gen_rtx_REG (SImode, dest + i);
18397                 ops[1] = gen_rtx_REG (SImode, src + i);
18398                 output_asm_insn ("mov%?\t%0, %1", ops);
18399               }
18400         }
18401     }
18402   else
18403     {
18404       gcc_assert (MEM_P (operands[0]));
18405       gcc_assert (REG_P (operands[1]));
18406       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18407
18408       switch (GET_CODE (XEXP (operands[0], 0)))
18409         {
18410         case REG:
18411           output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18412           break;
18413
18414         default:
18415           gcc_unreachable ();
18416         }
18417     }
18418
18419   return "";
18420 }
18421
18422 /* Output a VFP load or store instruction.  */
18423
18424 const char *
18425 output_move_vfp (rtx *operands)
18426 {
18427   rtx reg, mem, addr, ops[2];
18428   int load = REG_P (operands[0]);
18429   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18430   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18431   const char *templ;
18432   char buff[50];
18433   machine_mode mode;
18434
18435   reg = operands[!load];
18436   mem = operands[load];
18437
18438   mode = GET_MODE (reg);
18439
18440   gcc_assert (REG_P (reg));
18441   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18442   gcc_assert (mode == SFmode
18443               || mode == DFmode
18444               || mode == SImode
18445               || mode == DImode
18446               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18447   gcc_assert (MEM_P (mem));
18448
18449   addr = XEXP (mem, 0);
18450
18451   switch (GET_CODE (addr))
18452     {
18453     case PRE_DEC:
18454       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18455       ops[0] = XEXP (addr, 0);
18456       ops[1] = reg;
18457       break;
18458
18459     case POST_INC:
18460       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18461       ops[0] = XEXP (addr, 0);
18462       ops[1] = reg;
18463       break;
18464
18465     default:
18466       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18467       ops[0] = reg;
18468       ops[1] = mem;
18469       break;
18470     }
18471
18472   sprintf (buff, templ,
18473            load ? "ld" : "st",
18474            dp ? "64" : "32",
18475            dp ? "P" : "",
18476            integer_p ? "\t%@ int" : "");
18477   output_asm_insn (buff, ops);
18478
18479   return "";
18480 }
18481
18482 /* Output a Neon double-word or quad-word load or store, or a load
18483    or store for larger structure modes.
18484
18485    WARNING: The ordering of elements is weird in big-endian mode,
18486    because the EABI requires that vectors stored in memory appear
18487    as though they were stored by a VSTM, as required by the EABI.
18488    GCC RTL defines element ordering based on in-memory order.
18489    This can be different from the architectural ordering of elements
18490    within a NEON register. The intrinsics defined in arm_neon.h use the
18491    NEON register element ordering, not the GCC RTL element ordering.
18492
18493    For example, the in-memory ordering of a big-endian a quadword
18494    vector with 16-bit elements when stored from register pair {d0,d1}
18495    will be (lowest address first, d0[N] is NEON register element N):
18496
18497      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18498
18499    When necessary, quadword registers (dN, dN+1) are moved to ARM
18500    registers from rN in the order:
18501
18502      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18503
18504    So that STM/LDM can be used on vectors in ARM registers, and the
18505    same memory layout will result as if VSTM/VLDM were used.
18506
18507    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18508    possible, which allows use of appropriate alignment tags.
18509    Note that the choice of "64" is independent of the actual vector
18510    element size; this size simply ensures that the behavior is
18511    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18512
18513    Due to limitations of those instructions, use of VST1.64/VLD1.64
18514    is not possible if:
18515     - the address contains PRE_DEC, or
18516     - the mode refers to more than 4 double-word registers
18517
18518    In those cases, it would be possible to replace VSTM/VLDM by a
18519    sequence of instructions; this is not currently implemented since
18520    this is not certain to actually improve performance.  */
18521
18522 const char *
18523 output_move_neon (rtx *operands)
18524 {
18525   rtx reg, mem, addr, ops[2];
18526   int regno, nregs, load = REG_P (operands[0]);
18527   const char *templ;
18528   char buff[50];
18529   machine_mode mode;
18530
18531   reg = operands[!load];
18532   mem = operands[load];
18533
18534   mode = GET_MODE (reg);
18535
18536   gcc_assert (REG_P (reg));
18537   regno = REGNO (reg);
18538   nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18539   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18540               || NEON_REGNO_OK_FOR_QUAD (regno));
18541   gcc_assert (VALID_NEON_DREG_MODE (mode)
18542               || VALID_NEON_QREG_MODE (mode)
18543               || VALID_NEON_STRUCT_MODE (mode));
18544   gcc_assert (MEM_P (mem));
18545
18546   addr = XEXP (mem, 0);
18547
18548   /* Strip off const from addresses like (const (plus (...))).  */
18549   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18550     addr = XEXP (addr, 0);
18551
18552   switch (GET_CODE (addr))
18553     {
18554     case POST_INC:
18555       /* We have to use vldm / vstm for too-large modes.  */
18556       if (nregs > 4)
18557         {
18558           templ = "v%smia%%?\t%%0!, %%h1";
18559           ops[0] = XEXP (addr, 0);
18560         }
18561       else
18562         {
18563           templ = "v%s1.64\t%%h1, %%A0";
18564           ops[0] = mem;
18565         }
18566       ops[1] = reg;
18567       break;
18568
18569     case PRE_DEC:
18570       /* We have to use vldm / vstm in this case, since there is no
18571          pre-decrement form of the vld1 / vst1 instructions.  */
18572       templ = "v%smdb%%?\t%%0!, %%h1";
18573       ops[0] = XEXP (addr, 0);
18574       ops[1] = reg;
18575       break;
18576
18577     case POST_MODIFY:
18578       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18579       gcc_unreachable ();
18580
18581     case REG:
18582       /* We have to use vldm / vstm for too-large modes.  */
18583       if (nregs > 1)
18584         {
18585           if (nregs > 4)
18586             templ = "v%smia%%?\t%%m0, %%h1";
18587           else
18588             templ = "v%s1.64\t%%h1, %%A0";
18589
18590           ops[0] = mem;
18591           ops[1] = reg;
18592           break;
18593         }
18594       /* Fall through.  */
18595     case LABEL_REF:
18596     case PLUS:
18597       {
18598         int i;
18599         int overlap = -1;
18600         for (i = 0; i < nregs; i++)
18601           {
18602             /* We're only using DImode here because it's a convenient size.  */
18603             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18604             ops[1] = adjust_address (mem, DImode, 8 * i);
18605             if (reg_overlap_mentioned_p (ops[0], mem))
18606               {
18607                 gcc_assert (overlap == -1);
18608                 overlap = i;
18609               }
18610             else
18611               {
18612                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18613                 output_asm_insn (buff, ops);
18614               }
18615           }
18616         if (overlap != -1)
18617           {
18618             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18619             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18620             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18621             output_asm_insn (buff, ops);
18622           }
18623
18624         return "";
18625       }
18626
18627     default:
18628       gcc_unreachable ();
18629     }
18630
18631   sprintf (buff, templ, load ? "ld" : "st");
18632   output_asm_insn (buff, ops);
18633
18634   return "";
18635 }
18636
18637 /* Compute and return the length of neon_mov<mode>, where <mode> is
18638    one of VSTRUCT modes: EI, OI, CI or XI.  */
18639 int
18640 arm_attr_length_move_neon (rtx_insn *insn)
18641 {
18642   rtx reg, mem, addr;
18643   int load;
18644   machine_mode mode;
18645
18646   extract_insn_cached (insn);
18647
18648   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18649     {
18650       mode = GET_MODE (recog_data.operand[0]);
18651       switch (mode)
18652         {
18653         case EImode:
18654         case OImode:
18655           return 8;
18656         case CImode:
18657           return 12;
18658         case XImode:
18659           return 16;
18660         default:
18661           gcc_unreachable ();
18662         }
18663     }
18664
18665   load = REG_P (recog_data.operand[0]);
18666   reg = recog_data.operand[!load];
18667   mem = recog_data.operand[load];
18668
18669   gcc_assert (MEM_P (mem));
18670
18671   mode = GET_MODE (reg);
18672   addr = XEXP (mem, 0);
18673
18674   /* Strip off const from addresses like (const (plus (...))).  */
18675   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18676     addr = XEXP (addr, 0);
18677
18678   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18679     {
18680       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18681       return insns * 4;
18682     }
18683   else
18684     return 4;
18685 }
18686
18687 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18688    return zero.  */
18689
18690 int
18691 arm_address_offset_is_imm (rtx_insn *insn)
18692 {
18693   rtx mem, addr;
18694
18695   extract_insn_cached (insn);
18696
18697   if (REG_P (recog_data.operand[0]))
18698     return 0;
18699
18700   mem = recog_data.operand[0];
18701
18702   gcc_assert (MEM_P (mem));
18703
18704   addr = XEXP (mem, 0);
18705
18706   if (REG_P (addr)
18707       || (GET_CODE (addr) == PLUS
18708           && REG_P (XEXP (addr, 0))
18709           && CONST_INT_P (XEXP (addr, 1))))
18710     return 1;
18711   else
18712     return 0;
18713 }
18714
18715 /* Output an ADD r, s, #n where n may be too big for one instruction.
18716    If adding zero to one register, output nothing.  */
18717 const char *
18718 output_add_immediate (rtx *operands)
18719 {
18720   HOST_WIDE_INT n = INTVAL (operands[2]);
18721
18722   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18723     {
18724       if (n < 0)
18725         output_multi_immediate (operands,
18726                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18727                                 -n);
18728       else
18729         output_multi_immediate (operands,
18730                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18731                                 n);
18732     }
18733
18734   return "";
18735 }
18736
18737 /* Output a multiple immediate operation.
18738    OPERANDS is the vector of operands referred to in the output patterns.
18739    INSTR1 is the output pattern to use for the first constant.
18740    INSTR2 is the output pattern to use for subsequent constants.
18741    IMMED_OP is the index of the constant slot in OPERANDS.
18742    N is the constant value.  */
18743 static const char *
18744 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18745                         int immed_op, HOST_WIDE_INT n)
18746 {
18747 #if HOST_BITS_PER_WIDE_INT > 32
18748   n &= 0xffffffff;
18749 #endif
18750
18751   if (n == 0)
18752     {
18753       /* Quick and easy output.  */
18754       operands[immed_op] = const0_rtx;
18755       output_asm_insn (instr1, operands);
18756     }
18757   else
18758     {
18759       int i;
18760       const char * instr = instr1;
18761
18762       /* Note that n is never zero here (which would give no output).  */
18763       for (i = 0; i < 32; i += 2)
18764         {
18765           if (n & (3 << i))
18766             {
18767               operands[immed_op] = GEN_INT (n & (255 << i));
18768               output_asm_insn (instr, operands);
18769               instr = instr2;
18770               i += 6;
18771             }
18772         }
18773     }
18774
18775   return "";
18776 }
18777
18778 /* Return the name of a shifter operation.  */
18779 static const char *
18780 arm_shift_nmem(enum rtx_code code)
18781 {
18782   switch (code)
18783     {
18784     case ASHIFT:
18785       return ARM_LSL_NAME;
18786
18787     case ASHIFTRT:
18788       return "asr";
18789
18790     case LSHIFTRT:
18791       return "lsr";
18792
18793     case ROTATERT:
18794       return "ror";
18795
18796     default:
18797       abort();
18798     }
18799 }
18800
18801 /* Return the appropriate ARM instruction for the operation code.
18802    The returned result should not be overwritten.  OP is the rtx of the
18803    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18804    was shifted.  */
18805 const char *
18806 arithmetic_instr (rtx op, int shift_first_arg)
18807 {
18808   switch (GET_CODE (op))
18809     {
18810     case PLUS:
18811       return "add";
18812
18813     case MINUS:
18814       return shift_first_arg ? "rsb" : "sub";
18815
18816     case IOR:
18817       return "orr";
18818
18819     case XOR:
18820       return "eor";
18821
18822     case AND:
18823       return "and";
18824
18825     case ASHIFT:
18826     case ASHIFTRT:
18827     case LSHIFTRT:
18828     case ROTATERT:
18829       return arm_shift_nmem(GET_CODE(op));
18830
18831     default:
18832       gcc_unreachable ();
18833     }
18834 }
18835
18836 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18837    for the operation code.  The returned result should not be overwritten.
18838    OP is the rtx code of the shift.
18839    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18840    shift.  */
18841 static const char *
18842 shift_op (rtx op, HOST_WIDE_INT *amountp)
18843 {
18844   const char * mnem;
18845   enum rtx_code code = GET_CODE (op);
18846
18847   switch (code)
18848     {
18849     case ROTATE:
18850       if (!CONST_INT_P (XEXP (op, 1)))
18851         {
18852           output_operand_lossage ("invalid shift operand");
18853           return NULL;
18854         }
18855
18856       code = ROTATERT;
18857       *amountp = 32 - INTVAL (XEXP (op, 1));
18858       mnem = "ror";
18859       break;
18860
18861     case ASHIFT:
18862     case ASHIFTRT:
18863     case LSHIFTRT:
18864     case ROTATERT:
18865       mnem = arm_shift_nmem(code);
18866       if (CONST_INT_P (XEXP (op, 1)))
18867         {
18868           *amountp = INTVAL (XEXP (op, 1));
18869         }
18870       else if (REG_P (XEXP (op, 1)))
18871         {
18872           *amountp = -1;
18873           return mnem;
18874         }
18875       else
18876         {
18877           output_operand_lossage ("invalid shift operand");
18878           return NULL;
18879         }
18880       break;
18881
18882     case MULT:
18883       /* We never have to worry about the amount being other than a
18884          power of 2, since this case can never be reloaded from a reg.  */
18885       if (!CONST_INT_P (XEXP (op, 1)))
18886         {
18887           output_operand_lossage ("invalid shift operand");
18888           return NULL;
18889         }
18890
18891       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18892
18893       /* Amount must be a power of two.  */
18894       if (*amountp & (*amountp - 1))
18895         {
18896           output_operand_lossage ("invalid shift operand");
18897           return NULL;
18898         }
18899
18900       *amountp = int_log2 (*amountp);
18901       return ARM_LSL_NAME;
18902
18903     default:
18904       output_operand_lossage ("invalid shift operand");
18905       return NULL;
18906     }
18907
18908   /* This is not 100% correct, but follows from the desire to merge
18909      multiplication by a power of 2 with the recognizer for a
18910      shift.  >=32 is not a valid shift for "lsl", so we must try and
18911      output a shift that produces the correct arithmetical result.
18912      Using lsr #32 is identical except for the fact that the carry bit
18913      is not set correctly if we set the flags; but we never use the
18914      carry bit from such an operation, so we can ignore that.  */
18915   if (code == ROTATERT)
18916     /* Rotate is just modulo 32.  */
18917     *amountp &= 31;
18918   else if (*amountp != (*amountp & 31))
18919     {
18920       if (code == ASHIFT)
18921         mnem = "lsr";
18922       *amountp = 32;
18923     }
18924
18925   /* Shifts of 0 are no-ops.  */
18926   if (*amountp == 0)
18927     return NULL;
18928
18929   return mnem;
18930 }
18931
18932 /* Obtain the shift from the POWER of two.  */
18933
18934 static HOST_WIDE_INT
18935 int_log2 (HOST_WIDE_INT power)
18936 {
18937   HOST_WIDE_INT shift = 0;
18938
18939   while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18940     {
18941       gcc_assert (shift <= 31);
18942       shift++;
18943     }
18944
18945   return shift;
18946 }
18947
18948 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
18949    because /bin/as is horribly restrictive.  The judgement about
18950    whether or not each character is 'printable' (and can be output as
18951    is) or not (and must be printed with an octal escape) must be made
18952    with reference to the *host* character set -- the situation is
18953    similar to that discussed in the comments above pp_c_char in
18954    c-pretty-print.c.  */
18955
18956 #define MAX_ASCII_LEN 51
18957
18958 void
18959 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18960 {
18961   int i;
18962   int len_so_far = 0;
18963
18964   fputs ("\t.ascii\t\"", stream);
18965
18966   for (i = 0; i < len; i++)
18967     {
18968       int c = p[i];
18969
18970       if (len_so_far >= MAX_ASCII_LEN)
18971         {
18972           fputs ("\"\n\t.ascii\t\"", stream);
18973           len_so_far = 0;
18974         }
18975
18976       if (ISPRINT (c))
18977         {
18978           if (c == '\\' || c == '\"')
18979             {
18980               putc ('\\', stream);
18981               len_so_far++;
18982             }
18983           putc (c, stream);
18984           len_so_far++;
18985         }
18986       else
18987         {
18988           fprintf (stream, "\\%03o", c);
18989           len_so_far += 4;
18990         }
18991     }
18992
18993   fputs ("\"\n", stream);
18994 }
18995 \f
18996 /* Compute the register save mask for registers 0 through 12
18997    inclusive.  This code is used by arm_compute_save_reg_mask.  */
18998
18999 static unsigned long
19000 arm_compute_save_reg0_reg12_mask (void)
19001 {
19002   unsigned long func_type = arm_current_func_type ();
19003   unsigned long save_reg_mask = 0;
19004   unsigned int reg;
19005
19006   if (IS_INTERRUPT (func_type))
19007     {
19008       unsigned int max_reg;
19009       /* Interrupt functions must not corrupt any registers,
19010          even call clobbered ones.  If this is a leaf function
19011          we can just examine the registers used by the RTL, but
19012          otherwise we have to assume that whatever function is
19013          called might clobber anything, and so we have to save
19014          all the call-clobbered registers as well.  */
19015       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19016         /* FIQ handlers have registers r8 - r12 banked, so
19017            we only need to check r0 - r7, Normal ISRs only
19018            bank r14 and r15, so we must check up to r12.
19019            r13 is the stack pointer which is always preserved,
19020            so we do not need to consider it here.  */
19021         max_reg = 7;
19022       else
19023         max_reg = 12;
19024
19025       for (reg = 0; reg <= max_reg; reg++)
19026         if (df_regs_ever_live_p (reg)
19027             || (! crtl->is_leaf && call_used_regs[reg]))
19028           save_reg_mask |= (1 << reg);
19029
19030       /* Also save the pic base register if necessary.  */
19031       if (flag_pic
19032           && !TARGET_SINGLE_PIC_BASE
19033           && arm_pic_register != INVALID_REGNUM
19034           && crtl->uses_pic_offset_table)
19035         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19036     }
19037   else if (IS_VOLATILE(func_type))
19038     {
19039       /* For noreturn functions we historically omitted register saves
19040          altogether.  However this really messes up debugging.  As a
19041          compromise save just the frame pointers.  Combined with the link
19042          register saved elsewhere this should be sufficient to get
19043          a backtrace.  */
19044       if (frame_pointer_needed)
19045         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19046       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19047         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19048       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19049         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19050     }
19051   else
19052     {
19053       /* In the normal case we only need to save those registers
19054          which are call saved and which are used by this function.  */
19055       for (reg = 0; reg <= 11; reg++)
19056         if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19057           save_reg_mask |= (1 << reg);
19058
19059       /* Handle the frame pointer as a special case.  */
19060       if (frame_pointer_needed)
19061         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19062
19063       /* If we aren't loading the PIC register,
19064          don't stack it even though it may be live.  */
19065       if (flag_pic
19066           && !TARGET_SINGLE_PIC_BASE
19067           && arm_pic_register != INVALID_REGNUM
19068           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19069               || crtl->uses_pic_offset_table))
19070         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19071
19072       /* The prologue will copy SP into R0, so save it.  */
19073       if (IS_STACKALIGN (func_type))
19074         save_reg_mask |= 1;
19075     }
19076
19077   /* Save registers so the exception handler can modify them.  */
19078   if (crtl->calls_eh_return)
19079     {
19080       unsigned int i;
19081
19082       for (i = 0; ; i++)
19083         {
19084           reg = EH_RETURN_DATA_REGNO (i);
19085           if (reg == INVALID_REGNUM)
19086             break;
19087           save_reg_mask |= 1 << reg;
19088         }
19089     }
19090
19091   return save_reg_mask;
19092 }
19093
19094 /* Return true if r3 is live at the start of the function.  */
19095
19096 static bool
19097 arm_r3_live_at_start_p (void)
19098 {
19099   /* Just look at cfg info, which is still close enough to correct at this
19100      point.  This gives false positives for broken functions that might use
19101      uninitialized data that happens to be allocated in r3, but who cares?  */
19102   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19103 }
19104
19105 /* Compute the number of bytes used to store the static chain register on the
19106    stack, above the stack frame.  We need to know this accurately to get the
19107    alignment of the rest of the stack frame correct.  */
19108
19109 static int
19110 arm_compute_static_chain_stack_bytes (void)
19111 {
19112   /* See the defining assertion in arm_expand_prologue.  */
19113   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19114       && IS_NESTED (arm_current_func_type ())
19115       && arm_r3_live_at_start_p ()
19116       && crtl->args.pretend_args_size == 0)
19117     return 4;
19118
19119   return 0;
19120 }
19121
19122 /* Compute a bit mask of which registers need to be
19123    saved on the stack for the current function.
19124    This is used by arm_get_frame_offsets, which may add extra registers.  */
19125
19126 static unsigned long
19127 arm_compute_save_reg_mask (void)
19128 {
19129   unsigned int save_reg_mask = 0;
19130   unsigned long func_type = arm_current_func_type ();
19131   unsigned int reg;
19132
19133   if (IS_NAKED (func_type))
19134     /* This should never really happen.  */
19135     return 0;
19136
19137   /* If we are creating a stack frame, then we must save the frame pointer,
19138      IP (which will hold the old stack pointer), LR and the PC.  */
19139   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19140     save_reg_mask |=
19141       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19142       | (1 << IP_REGNUM)
19143       | (1 << LR_REGNUM)
19144       | (1 << PC_REGNUM);
19145
19146   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19147
19148   /* Decide if we need to save the link register.
19149      Interrupt routines have their own banked link register,
19150      so they never need to save it.
19151      Otherwise if we do not use the link register we do not need to save
19152      it.  If we are pushing other registers onto the stack however, we
19153      can save an instruction in the epilogue by pushing the link register
19154      now and then popping it back into the PC.  This incurs extra memory
19155      accesses though, so we only do it when optimizing for size, and only
19156      if we know that we will not need a fancy return sequence.  */
19157   if (df_regs_ever_live_p (LR_REGNUM)
19158       || (save_reg_mask
19159           && optimize_size
19160           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19161           && !crtl->calls_eh_return))
19162     save_reg_mask |= 1 << LR_REGNUM;
19163
19164   if (cfun->machine->lr_save_eliminated)
19165     save_reg_mask &= ~ (1 << LR_REGNUM);
19166
19167   if (TARGET_REALLY_IWMMXT
19168       && ((bit_count (save_reg_mask)
19169            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19170                            arm_compute_static_chain_stack_bytes())
19171            ) % 2) != 0)
19172     {
19173       /* The total number of registers that are going to be pushed
19174          onto the stack is odd.  We need to ensure that the stack
19175          is 64-bit aligned before we start to save iWMMXt registers,
19176          and also before we start to create locals.  (A local variable
19177          might be a double or long long which we will load/store using
19178          an iWMMXt instruction).  Therefore we need to push another
19179          ARM register, so that the stack will be 64-bit aligned.  We
19180          try to avoid using the arg registers (r0 -r3) as they might be
19181          used to pass values in a tail call.  */
19182       for (reg = 4; reg <= 12; reg++)
19183         if ((save_reg_mask & (1 << reg)) == 0)
19184           break;
19185
19186       if (reg <= 12)
19187         save_reg_mask |= (1 << reg);
19188       else
19189         {
19190           cfun->machine->sibcall_blocked = 1;
19191           save_reg_mask |= (1 << 3);
19192         }
19193     }
19194
19195   /* We may need to push an additional register for use initializing the
19196      PIC base register.  */
19197   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19198       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19199     {
19200       reg = thumb_find_work_register (1 << 4);
19201       if (!call_used_regs[reg])
19202         save_reg_mask |= (1 << reg);
19203     }
19204
19205   return save_reg_mask;
19206 }
19207
19208
19209 /* Compute a bit mask of which registers need to be
19210    saved on the stack for the current function.  */
19211 static unsigned long
19212 thumb1_compute_save_reg_mask (void)
19213 {
19214   unsigned long mask;
19215   unsigned reg;
19216
19217   mask = 0;
19218   for (reg = 0; reg < 12; reg ++)
19219     if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19220       mask |= 1 << reg;
19221
19222   if (flag_pic
19223       && !TARGET_SINGLE_PIC_BASE
19224       && arm_pic_register != INVALID_REGNUM
19225       && crtl->uses_pic_offset_table)
19226     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19227
19228   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19229   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19230     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19231
19232   /* LR will also be pushed if any lo regs are pushed.  */
19233   if (mask & 0xff || thumb_force_lr_save ())
19234     mask |= (1 << LR_REGNUM);
19235
19236   /* Make sure we have a low work register if we need one.
19237      We will need one if we are going to push a high register,
19238      but we are not currently intending to push a low register.  */
19239   if ((mask & 0xff) == 0
19240       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19241     {
19242       /* Use thumb_find_work_register to choose which register
19243          we will use.  If the register is live then we will
19244          have to push it.  Use LAST_LO_REGNUM as our fallback
19245          choice for the register to select.  */
19246       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19247       /* Make sure the register returned by thumb_find_work_register is
19248          not part of the return value.  */
19249       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19250         reg = LAST_LO_REGNUM;
19251
19252       if (! call_used_regs[reg])
19253         mask |= 1 << reg;
19254     }
19255
19256   /* The 504 below is 8 bytes less than 512 because there are two possible
19257      alignment words.  We can't tell here if they will be present or not so we
19258      have to play it safe and assume that they are. */
19259   if ((CALLER_INTERWORKING_SLOT_SIZE +
19260        ROUND_UP_WORD (get_frame_size ()) +
19261        crtl->outgoing_args_size) >= 504)
19262     {
19263       /* This is the same as the code in thumb1_expand_prologue() which
19264          determines which register to use for stack decrement. */
19265       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19266         if (mask & (1 << reg))
19267           break;
19268
19269       if (reg > LAST_LO_REGNUM)
19270         {
19271           /* Make sure we have a register available for stack decrement. */
19272           mask |= 1 << LAST_LO_REGNUM;
19273         }
19274     }
19275
19276   return mask;
19277 }
19278
19279
19280 /* Return the number of bytes required to save VFP registers.  */
19281 static int
19282 arm_get_vfp_saved_size (void)
19283 {
19284   unsigned int regno;
19285   int count;
19286   int saved;
19287
19288   saved = 0;
19289   /* Space for saved VFP registers.  */
19290   if (TARGET_HARD_FLOAT && TARGET_VFP)
19291     {
19292       count = 0;
19293       for (regno = FIRST_VFP_REGNUM;
19294            regno < LAST_VFP_REGNUM;
19295            regno += 2)
19296         {
19297           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19298               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19299             {
19300               if (count > 0)
19301                 {
19302                   /* Workaround ARM10 VFPr1 bug.  */
19303                   if (count == 2 && !arm_arch6)
19304                     count++;
19305                   saved += count * 8;
19306                 }
19307               count = 0;
19308             }
19309           else
19310             count++;
19311         }
19312       if (count > 0)
19313         {
19314           if (count == 2 && !arm_arch6)
19315             count++;
19316           saved += count * 8;
19317         }
19318     }
19319   return saved;
19320 }
19321
19322
19323 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19324    everything bar the final return instruction.  If simple_return is true,
19325    then do not output epilogue, because it has already been emitted in RTL.  */
19326 const char *
19327 output_return_instruction (rtx operand, bool really_return, bool reverse,
19328                            bool simple_return)
19329 {
19330   char conditional[10];
19331   char instr[100];
19332   unsigned reg;
19333   unsigned long live_regs_mask;
19334   unsigned long func_type;
19335   arm_stack_offsets *offsets;
19336
19337   func_type = arm_current_func_type ();
19338
19339   if (IS_NAKED (func_type))
19340     return "";
19341
19342   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19343     {
19344       /* If this function was declared non-returning, and we have
19345          found a tail call, then we have to trust that the called
19346          function won't return.  */
19347       if (really_return)
19348         {
19349           rtx ops[2];
19350
19351           /* Otherwise, trap an attempted return by aborting.  */
19352           ops[0] = operand;
19353           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19354                                        : "abort");
19355           assemble_external_libcall (ops[1]);
19356           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19357         }
19358
19359       return "";
19360     }
19361
19362   gcc_assert (!cfun->calls_alloca || really_return);
19363
19364   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19365
19366   cfun->machine->return_used_this_function = 1;
19367
19368   offsets = arm_get_frame_offsets ();
19369   live_regs_mask = offsets->saved_regs_mask;
19370
19371   if (!simple_return && live_regs_mask)
19372     {
19373       const char * return_reg;
19374
19375       /* If we do not have any special requirements for function exit
19376          (e.g. interworking) then we can load the return address
19377          directly into the PC.  Otherwise we must load it into LR.  */
19378       if (really_return
19379           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19380         return_reg = reg_names[PC_REGNUM];
19381       else
19382         return_reg = reg_names[LR_REGNUM];
19383
19384       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19385         {
19386           /* There are three possible reasons for the IP register
19387              being saved.  1) a stack frame was created, in which case
19388              IP contains the old stack pointer, or 2) an ISR routine
19389              corrupted it, or 3) it was saved to align the stack on
19390              iWMMXt.  In case 1, restore IP into SP, otherwise just
19391              restore IP.  */
19392           if (frame_pointer_needed)
19393             {
19394               live_regs_mask &= ~ (1 << IP_REGNUM);
19395               live_regs_mask |=   (1 << SP_REGNUM);
19396             }
19397           else
19398             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19399         }
19400
19401       /* On some ARM architectures it is faster to use LDR rather than
19402          LDM to load a single register.  On other architectures, the
19403          cost is the same.  In 26 bit mode, or for exception handlers,
19404          we have to use LDM to load the PC so that the CPSR is also
19405          restored.  */
19406       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19407         if (live_regs_mask == (1U << reg))
19408           break;
19409
19410       if (reg <= LAST_ARM_REGNUM
19411           && (reg != LR_REGNUM
19412               || ! really_return
19413               || ! IS_INTERRUPT (func_type)))
19414         {
19415           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19416                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19417         }
19418       else
19419         {
19420           char *p;
19421           int first = 1;
19422
19423           /* Generate the load multiple instruction to restore the
19424              registers.  Note we can get here, even if
19425              frame_pointer_needed is true, but only if sp already
19426              points to the base of the saved core registers.  */
19427           if (live_regs_mask & (1 << SP_REGNUM))
19428             {
19429               unsigned HOST_WIDE_INT stack_adjust;
19430
19431               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19432               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19433
19434               if (stack_adjust && arm_arch5 && TARGET_ARM)
19435                 if (TARGET_UNIFIED_ASM)
19436                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19437                 else
19438                   sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19439               else
19440                 {
19441                   /* If we can't use ldmib (SA110 bug),
19442                      then try to pop r3 instead.  */
19443                   if (stack_adjust)
19444                     live_regs_mask |= 1 << 3;
19445
19446                   if (TARGET_UNIFIED_ASM)
19447                     sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19448                   else
19449                     sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19450                 }
19451             }
19452           else
19453             if (TARGET_UNIFIED_ASM)
19454               sprintf (instr, "pop%s\t{", conditional);
19455             else
19456               sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19457
19458           p = instr + strlen (instr);
19459
19460           for (reg = 0; reg <= SP_REGNUM; reg++)
19461             if (live_regs_mask & (1 << reg))
19462               {
19463                 int l = strlen (reg_names[reg]);
19464
19465                 if (first)
19466                   first = 0;
19467                 else
19468                   {
19469                     memcpy (p, ", ", 2);
19470                     p += 2;
19471                   }
19472
19473                 memcpy (p, "%|", 2);
19474                 memcpy (p + 2, reg_names[reg], l);
19475                 p += l + 2;
19476               }
19477
19478           if (live_regs_mask & (1 << LR_REGNUM))
19479             {
19480               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19481               /* If returning from an interrupt, restore the CPSR.  */
19482               if (IS_INTERRUPT (func_type))
19483                 strcat (p, "^");
19484             }
19485           else
19486             strcpy (p, "}");
19487         }
19488
19489       output_asm_insn (instr, & operand);
19490
19491       /* See if we need to generate an extra instruction to
19492          perform the actual function return.  */
19493       if (really_return
19494           && func_type != ARM_FT_INTERWORKED
19495           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19496         {
19497           /* The return has already been handled
19498              by loading the LR into the PC.  */
19499           return "";
19500         }
19501     }
19502
19503   if (really_return)
19504     {
19505       switch ((int) ARM_FUNC_TYPE (func_type))
19506         {
19507         case ARM_FT_ISR:
19508         case ARM_FT_FIQ:
19509           /* ??? This is wrong for unified assembly syntax.  */
19510           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19511           break;
19512
19513         case ARM_FT_INTERWORKED:
19514           sprintf (instr, "bx%s\t%%|lr", conditional);
19515           break;
19516
19517         case ARM_FT_EXCEPTION:
19518           /* ??? This is wrong for unified assembly syntax.  */
19519           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19520           break;
19521
19522         default:
19523           /* Use bx if it's available.  */
19524           if (arm_arch5 || arm_arch4t)
19525             sprintf (instr, "bx%s\t%%|lr", conditional);
19526           else
19527             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19528           break;
19529         }
19530
19531       output_asm_insn (instr, & operand);
19532     }
19533
19534   return "";
19535 }
19536
19537 /* Write the function name into the code section, directly preceding
19538    the function prologue.
19539
19540    Code will be output similar to this:
19541      t0
19542          .ascii "arm_poke_function_name", 0
19543          .align
19544      t1
19545          .word 0xff000000 + (t1 - t0)
19546      arm_poke_function_name
19547          mov     ip, sp
19548          stmfd   sp!, {fp, ip, lr, pc}
19549          sub     fp, ip, #4
19550
19551    When performing a stack backtrace, code can inspect the value
19552    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19553    at location pc - 12 and the top 8 bits are set, then we know
19554    that there is a function name embedded immediately preceding this
19555    location and has length ((pc[-3]) & 0xff000000).
19556
19557    We assume that pc is declared as a pointer to an unsigned long.
19558
19559    It is of no benefit to output the function name if we are assembling
19560    a leaf function.  These function types will not contain a stack
19561    backtrace structure, therefore it is not possible to determine the
19562    function name.  */
19563 void
19564 arm_poke_function_name (FILE *stream, const char *name)
19565 {
19566   unsigned long alignlength;
19567   unsigned long length;
19568   rtx           x;
19569
19570   length      = strlen (name) + 1;
19571   alignlength = ROUND_UP_WORD (length);
19572
19573   ASM_OUTPUT_ASCII (stream, name, length);
19574   ASM_OUTPUT_ALIGN (stream, 2);
19575   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19576   assemble_aligned_integer (UNITS_PER_WORD, x);
19577 }
19578
19579 /* Place some comments into the assembler stream
19580    describing the current function.  */
19581 static void
19582 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19583 {
19584   unsigned long func_type;
19585
19586   /* ??? Do we want to print some of the below anyway?  */
19587   if (TARGET_THUMB1)
19588     return;
19589
19590   /* Sanity check.  */
19591   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19592
19593   func_type = arm_current_func_type ();
19594
19595   switch ((int) ARM_FUNC_TYPE (func_type))
19596     {
19597     default:
19598     case ARM_FT_NORMAL:
19599       break;
19600     case ARM_FT_INTERWORKED:
19601       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19602       break;
19603     case ARM_FT_ISR:
19604       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19605       break;
19606     case ARM_FT_FIQ:
19607       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19608       break;
19609     case ARM_FT_EXCEPTION:
19610       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19611       break;
19612     }
19613
19614   if (IS_NAKED (func_type))
19615     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19616
19617   if (IS_VOLATILE (func_type))
19618     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19619
19620   if (IS_NESTED (func_type))
19621     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19622   if (IS_STACKALIGN (func_type))
19623     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19624
19625   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19626                crtl->args.size,
19627                crtl->args.pretend_args_size, frame_size);
19628
19629   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19630                frame_pointer_needed,
19631                cfun->machine->uses_anonymous_args);
19632
19633   if (cfun->machine->lr_save_eliminated)
19634     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19635
19636   if (crtl->calls_eh_return)
19637     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19638
19639 }
19640
19641 static void
19642 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19643                               HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19644 {
19645   arm_stack_offsets *offsets;
19646
19647   if (TARGET_THUMB1)
19648     {
19649       int regno;
19650
19651       /* Emit any call-via-reg trampolines that are needed for v4t support
19652          of call_reg and call_value_reg type insns.  */
19653       for (regno = 0; regno < LR_REGNUM; regno++)
19654         {
19655           rtx label = cfun->machine->call_via[regno];
19656
19657           if (label != NULL)
19658             {
19659               switch_to_section (function_section (current_function_decl));
19660               targetm.asm_out.internal_label (asm_out_file, "L",
19661                                               CODE_LABEL_NUMBER (label));
19662               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19663             }
19664         }
19665
19666       /* ??? Probably not safe to set this here, since it assumes that a
19667          function will be emitted as assembly immediately after we generate
19668          RTL for it.  This does not happen for inline functions.  */
19669       cfun->machine->return_used_this_function = 0;
19670     }
19671   else /* TARGET_32BIT */
19672     {
19673       /* We need to take into account any stack-frame rounding.  */
19674       offsets = arm_get_frame_offsets ();
19675
19676       gcc_assert (!use_return_insn (FALSE, NULL)
19677                   || (cfun->machine->return_used_this_function != 0)
19678                   || offsets->saved_regs == offsets->outgoing_args
19679                   || frame_pointer_needed);
19680     }
19681 }
19682
19683 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19684    STR and STRD.  If an even number of registers are being pushed, one
19685    or more STRD patterns are created for each register pair.  If an
19686    odd number of registers are pushed, emit an initial STR followed by
19687    as many STRD instructions as are needed.  This works best when the
19688    stack is initially 64-bit aligned (the normal case), since it
19689    ensures that each STRD is also 64-bit aligned.  */
19690 static void
19691 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19692 {
19693   int num_regs = 0;
19694   int i;
19695   int regno;
19696   rtx par = NULL_RTX;
19697   rtx dwarf = NULL_RTX;
19698   rtx tmp;
19699   bool first = true;
19700
19701   num_regs = bit_count (saved_regs_mask);
19702
19703   /* Must be at least one register to save, and can't save SP or PC.  */
19704   gcc_assert (num_regs > 0 && num_regs <= 14);
19705   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19706   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19707
19708   /* Create sequence for DWARF info.  All the frame-related data for
19709      debugging is held in this wrapper.  */
19710   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19711
19712   /* Describe the stack adjustment.  */
19713   tmp = gen_rtx_SET (VOIDmode,
19714                       stack_pointer_rtx,
19715                       plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19716   RTX_FRAME_RELATED_P (tmp) = 1;
19717   XVECEXP (dwarf, 0, 0) = tmp;
19718
19719   /* Find the first register.  */
19720   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19721     ;
19722
19723   i = 0;
19724
19725   /* If there's an odd number of registers to push.  Start off by
19726      pushing a single register.  This ensures that subsequent strd
19727      operations are dword aligned (assuming that SP was originally
19728      64-bit aligned).  */
19729   if ((num_regs & 1) != 0)
19730     {
19731       rtx reg, mem, insn;
19732
19733       reg = gen_rtx_REG (SImode, regno);
19734       if (num_regs == 1)
19735         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19736                                                      stack_pointer_rtx));
19737       else
19738         mem = gen_frame_mem (Pmode,
19739                              gen_rtx_PRE_MODIFY
19740                              (Pmode, stack_pointer_rtx,
19741                               plus_constant (Pmode, stack_pointer_rtx,
19742                                              -4 * num_regs)));
19743
19744       tmp = gen_rtx_SET (VOIDmode, mem, reg);
19745       RTX_FRAME_RELATED_P (tmp) = 1;
19746       insn = emit_insn (tmp);
19747       RTX_FRAME_RELATED_P (insn) = 1;
19748       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19749       tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19750                          reg);
19751       RTX_FRAME_RELATED_P (tmp) = 1;
19752       i++;
19753       regno++;
19754       XVECEXP (dwarf, 0, i) = tmp;
19755       first = false;
19756     }
19757
19758   while (i < num_regs)
19759     if (saved_regs_mask & (1 << regno))
19760       {
19761         rtx reg1, reg2, mem1, mem2;
19762         rtx tmp0, tmp1, tmp2;
19763         int regno2;
19764
19765         /* Find the register to pair with this one.  */
19766         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19767              regno2++)
19768           ;
19769
19770         reg1 = gen_rtx_REG (SImode, regno);
19771         reg2 = gen_rtx_REG (SImode, regno2);
19772
19773         if (first)
19774           {
19775             rtx insn;
19776
19777             first = false;
19778             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19779                                                         stack_pointer_rtx,
19780                                                         -4 * num_regs));
19781             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19782                                                         stack_pointer_rtx,
19783                                                         -4 * (num_regs - 1)));
19784             tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19785                                 plus_constant (Pmode, stack_pointer_rtx,
19786                                                -4 * (num_regs)));
19787             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19788             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19789             RTX_FRAME_RELATED_P (tmp0) = 1;
19790             RTX_FRAME_RELATED_P (tmp1) = 1;
19791             RTX_FRAME_RELATED_P (tmp2) = 1;
19792             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19793             XVECEXP (par, 0, 0) = tmp0;
19794             XVECEXP (par, 0, 1) = tmp1;
19795             XVECEXP (par, 0, 2) = tmp2;
19796             insn = emit_insn (par);
19797             RTX_FRAME_RELATED_P (insn) = 1;
19798             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19799           }
19800         else
19801           {
19802             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19803                                                         stack_pointer_rtx,
19804                                                         4 * i));
19805             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19806                                                         stack_pointer_rtx,
19807                                                         4 * (i + 1)));
19808             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19809             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19810             RTX_FRAME_RELATED_P (tmp1) = 1;
19811             RTX_FRAME_RELATED_P (tmp2) = 1;
19812             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19813             XVECEXP (par, 0, 0) = tmp1;
19814             XVECEXP (par, 0, 1) = tmp2;
19815             emit_insn (par);
19816           }
19817
19818         /* Create unwind information.  This is an approximation.  */
19819         tmp1 = gen_rtx_SET (VOIDmode,
19820                             gen_frame_mem (Pmode,
19821                                            plus_constant (Pmode,
19822                                                           stack_pointer_rtx,
19823                                                           4 * i)),
19824                             reg1);
19825         tmp2 = gen_rtx_SET (VOIDmode,
19826                             gen_frame_mem (Pmode,
19827                                            plus_constant (Pmode,
19828                                                           stack_pointer_rtx,
19829                                                           4 * (i + 1))),
19830                             reg2);
19831
19832         RTX_FRAME_RELATED_P (tmp1) = 1;
19833         RTX_FRAME_RELATED_P (tmp2) = 1;
19834         XVECEXP (dwarf, 0, i + 1) = tmp1;
19835         XVECEXP (dwarf, 0, i + 2) = tmp2;
19836         i += 2;
19837         regno = regno2 + 1;
19838       }
19839     else
19840       regno++;
19841
19842   return;
19843 }
19844
19845 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19846    whenever possible, otherwise it emits single-word stores.  The first store
19847    also allocates stack space for all saved registers, using writeback with
19848    post-addressing mode.  All other stores use offset addressing.  If no STRD
19849    can be emitted, this function emits a sequence of single-word stores,
19850    and not an STM as before, because single-word stores provide more freedom
19851    scheduling and can be turned into an STM by peephole optimizations.  */
19852 static void
19853 arm_emit_strd_push (unsigned long saved_regs_mask)
19854 {
19855   int num_regs = 0;
19856   int i, j, dwarf_index  = 0;
19857   int offset = 0;
19858   rtx dwarf = NULL_RTX;
19859   rtx insn = NULL_RTX;
19860   rtx tmp, mem;
19861
19862   /* TODO: A more efficient code can be emitted by changing the
19863      layout, e.g., first push all pairs that can use STRD to keep the
19864      stack aligned, and then push all other registers.  */
19865   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19866     if (saved_regs_mask & (1 << i))
19867       num_regs++;
19868
19869   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19870   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19871   gcc_assert (num_regs > 0);
19872
19873   /* Create sequence for DWARF info.  */
19874   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19875
19876   /* For dwarf info, we generate explicit stack update.  */
19877   tmp = gen_rtx_SET (VOIDmode,
19878                      stack_pointer_rtx,
19879                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19880   RTX_FRAME_RELATED_P (tmp) = 1;
19881   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19882
19883   /* Save registers.  */
19884   offset = - 4 * num_regs;
19885   j = 0;
19886   while (j <= LAST_ARM_REGNUM)
19887     if (saved_regs_mask & (1 << j))
19888       {
19889         if ((j % 2 == 0)
19890             && (saved_regs_mask & (1 << (j + 1))))
19891           {
19892             /* Current register and previous register form register pair for
19893                which STRD can be generated.  */
19894             if (offset < 0)
19895               {
19896                 /* Allocate stack space for all saved registers.  */
19897                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19898                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19899                 mem = gen_frame_mem (DImode, tmp);
19900                 offset = 0;
19901               }
19902             else if (offset > 0)
19903               mem = gen_frame_mem (DImode,
19904                                    plus_constant (Pmode,
19905                                                   stack_pointer_rtx,
19906                                                   offset));
19907             else
19908               mem = gen_frame_mem (DImode, stack_pointer_rtx);
19909
19910             tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19911             RTX_FRAME_RELATED_P (tmp) = 1;
19912             tmp = emit_insn (tmp);
19913
19914             /* Record the first store insn.  */
19915             if (dwarf_index == 1)
19916               insn = tmp;
19917
19918             /* Generate dwarf info.  */
19919             mem = gen_frame_mem (SImode,
19920                                  plus_constant (Pmode,
19921                                                 stack_pointer_rtx,
19922                                                 offset));
19923             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19924             RTX_FRAME_RELATED_P (tmp) = 1;
19925             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19926
19927             mem = gen_frame_mem (SImode,
19928                                  plus_constant (Pmode,
19929                                                 stack_pointer_rtx,
19930                                                 offset + 4));
19931             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19932             RTX_FRAME_RELATED_P (tmp) = 1;
19933             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19934
19935             offset += 8;
19936             j += 2;
19937           }
19938         else
19939           {
19940             /* Emit a single word store.  */
19941             if (offset < 0)
19942               {
19943                 /* Allocate stack space for all saved registers.  */
19944                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19945                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19946                 mem = gen_frame_mem (SImode, tmp);
19947                 offset = 0;
19948               }
19949             else if (offset > 0)
19950               mem = gen_frame_mem (SImode,
19951                                    plus_constant (Pmode,
19952                                                   stack_pointer_rtx,
19953                                                   offset));
19954             else
19955               mem = gen_frame_mem (SImode, stack_pointer_rtx);
19956
19957             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19958             RTX_FRAME_RELATED_P (tmp) = 1;
19959             tmp = emit_insn (tmp);
19960
19961             /* Record the first store insn.  */
19962             if (dwarf_index == 1)
19963               insn = tmp;
19964
19965             /* Generate dwarf info.  */
19966             mem = gen_frame_mem (SImode,
19967                                  plus_constant(Pmode,
19968                                                stack_pointer_rtx,
19969                                                offset));
19970             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19971             RTX_FRAME_RELATED_P (tmp) = 1;
19972             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19973
19974             offset += 4;
19975             j += 1;
19976           }
19977       }
19978     else
19979       j++;
19980
19981   /* Attach dwarf info to the first insn we generate.  */
19982   gcc_assert (insn != NULL_RTX);
19983   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19984   RTX_FRAME_RELATED_P (insn) = 1;
19985 }
19986
19987 /* Generate and emit an insn that we will recognize as a push_multi.
19988    Unfortunately, since this insn does not reflect very well the actual
19989    semantics of the operation, we need to annotate the insn for the benefit
19990    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
19991    MASK for registers that should be annotated for DWARF2 frame unwind
19992    information.  */
19993 static rtx
19994 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19995 {
19996   int num_regs = 0;
19997   int num_dwarf_regs = 0;
19998   int i, j;
19999   rtx par;
20000   rtx dwarf;
20001   int dwarf_par_index;
20002   rtx tmp, reg;
20003
20004   /* We don't record the PC in the dwarf frame information.  */
20005   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20006
20007   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20008     {
20009       if (mask & (1 << i))
20010         num_regs++;
20011       if (dwarf_regs_mask & (1 << i))
20012         num_dwarf_regs++;
20013     }
20014
20015   gcc_assert (num_regs && num_regs <= 16);
20016   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20017
20018   /* For the body of the insn we are going to generate an UNSPEC in
20019      parallel with several USEs.  This allows the insn to be recognized
20020      by the push_multi pattern in the arm.md file.
20021
20022      The body of the insn looks something like this:
20023
20024        (parallel [
20025            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20026                                         (const_int:SI <num>)))
20027                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20028            (use (reg:SI XX))
20029            (use (reg:SI YY))
20030            ...
20031         ])
20032
20033      For the frame note however, we try to be more explicit and actually
20034      show each register being stored into the stack frame, plus a (single)
20035      decrement of the stack pointer.  We do it this way in order to be
20036      friendly to the stack unwinding code, which only wants to see a single
20037      stack decrement per instruction.  The RTL we generate for the note looks
20038      something like this:
20039
20040       (sequence [
20041            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20042            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20043            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20044            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20045            ...
20046         ])
20047
20048      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20049      instead we'd have a parallel expression detailing all
20050      the stores to the various memory addresses so that debug
20051      information is more up-to-date. Remember however while writing
20052      this to take care of the constraints with the push instruction.
20053
20054      Note also that this has to be taken care of for the VFP registers.
20055
20056      For more see PR43399.  */
20057
20058   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20059   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20060   dwarf_par_index = 1;
20061
20062   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20063     {
20064       if (mask & (1 << i))
20065         {
20066           reg = gen_rtx_REG (SImode, i);
20067
20068           XVECEXP (par, 0, 0)
20069             = gen_rtx_SET (VOIDmode,
20070                            gen_frame_mem
20071                            (BLKmode,
20072                             gen_rtx_PRE_MODIFY (Pmode,
20073                                                 stack_pointer_rtx,
20074                                                 plus_constant
20075                                                 (Pmode, stack_pointer_rtx,
20076                                                  -4 * num_regs))
20077                             ),
20078                            gen_rtx_UNSPEC (BLKmode,
20079                                            gen_rtvec (1, reg),
20080                                            UNSPEC_PUSH_MULT));
20081
20082           if (dwarf_regs_mask & (1 << i))
20083             {
20084               tmp = gen_rtx_SET (VOIDmode,
20085                                  gen_frame_mem (SImode, stack_pointer_rtx),
20086                                  reg);
20087               RTX_FRAME_RELATED_P (tmp) = 1;
20088               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20089             }
20090
20091           break;
20092         }
20093     }
20094
20095   for (j = 1, i++; j < num_regs; i++)
20096     {
20097       if (mask & (1 << i))
20098         {
20099           reg = gen_rtx_REG (SImode, i);
20100
20101           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20102
20103           if (dwarf_regs_mask & (1 << i))
20104             {
20105               tmp
20106                 = gen_rtx_SET (VOIDmode,
20107                                gen_frame_mem
20108                                (SImode,
20109                                 plus_constant (Pmode, stack_pointer_rtx,
20110                                                4 * j)),
20111                                reg);
20112               RTX_FRAME_RELATED_P (tmp) = 1;
20113               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20114             }
20115
20116           j++;
20117         }
20118     }
20119
20120   par = emit_insn (par);
20121
20122   tmp = gen_rtx_SET (VOIDmode,
20123                      stack_pointer_rtx,
20124                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20125   RTX_FRAME_RELATED_P (tmp) = 1;
20126   XVECEXP (dwarf, 0, 0) = tmp;
20127
20128   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20129
20130   return par;
20131 }
20132
20133 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20134    SIZE is the offset to be adjusted.
20135    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20136 static void
20137 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20138 {
20139   rtx dwarf;
20140
20141   RTX_FRAME_RELATED_P (insn) = 1;
20142   dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20143   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20144 }
20145
20146 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20147    SAVED_REGS_MASK shows which registers need to be restored.
20148
20149    Unfortunately, since this insn does not reflect very well the actual
20150    semantics of the operation, we need to annotate the insn for the benefit
20151    of DWARF2 frame unwind information.  */
20152 static void
20153 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20154 {
20155   int num_regs = 0;
20156   int i, j;
20157   rtx par;
20158   rtx dwarf = NULL_RTX;
20159   rtx tmp, reg;
20160   bool return_in_pc;
20161   int offset_adj;
20162   int emit_update;
20163
20164   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20165   offset_adj = return_in_pc ? 1 : 0;
20166   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20167     if (saved_regs_mask & (1 << i))
20168       num_regs++;
20169
20170   gcc_assert (num_regs && num_regs <= 16);
20171
20172   /* If SP is in reglist, then we don't emit SP update insn.  */
20173   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20174
20175   /* The parallel needs to hold num_regs SETs
20176      and one SET for the stack update.  */
20177   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20178
20179   if (return_in_pc)
20180     {
20181       tmp = ret_rtx;
20182       XVECEXP (par, 0, 0) = tmp;
20183     }
20184
20185   if (emit_update)
20186     {
20187       /* Increment the stack pointer, based on there being
20188          num_regs 4-byte registers to restore.  */
20189       tmp = gen_rtx_SET (VOIDmode,
20190                          stack_pointer_rtx,
20191                          plus_constant (Pmode,
20192                                         stack_pointer_rtx,
20193                                         4 * num_regs));
20194       RTX_FRAME_RELATED_P (tmp) = 1;
20195       XVECEXP (par, 0, offset_adj) = tmp;
20196     }
20197
20198   /* Now restore every reg, which may include PC.  */
20199   for (j = 0, i = 0; j < num_regs; i++)
20200     if (saved_regs_mask & (1 << i))
20201       {
20202         reg = gen_rtx_REG (SImode, i);
20203         if ((num_regs == 1) && emit_update && !return_in_pc)
20204           {
20205             /* Emit single load with writeback.  */
20206             tmp = gen_frame_mem (SImode,
20207                                  gen_rtx_POST_INC (Pmode,
20208                                                    stack_pointer_rtx));
20209             tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20210             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20211             return;
20212           }
20213
20214         tmp = gen_rtx_SET (VOIDmode,
20215                            reg,
20216                            gen_frame_mem
20217                            (SImode,
20218                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20219         RTX_FRAME_RELATED_P (tmp) = 1;
20220         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20221
20222         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20223            should not have PC, skip PC.  */
20224         if (i != PC_REGNUM)
20225           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20226
20227         j++;
20228       }
20229
20230   if (return_in_pc)
20231     par = emit_jump_insn (par);
20232   else
20233     par = emit_insn (par);
20234
20235   REG_NOTES (par) = dwarf;
20236   if (!return_in_pc)
20237     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20238                                  stack_pointer_rtx, stack_pointer_rtx);
20239 }
20240
20241 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20242    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20243
20244    Unfortunately, since this insn does not reflect very well the actual
20245    semantics of the operation, we need to annotate the insn for the benefit
20246    of DWARF2 frame unwind information.  */
20247 static void
20248 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20249 {
20250   int i, j;
20251   rtx par;
20252   rtx dwarf = NULL_RTX;
20253   rtx tmp, reg;
20254
20255   gcc_assert (num_regs && num_regs <= 32);
20256
20257     /* Workaround ARM10 VFPr1 bug.  */
20258   if (num_regs == 2 && !arm_arch6)
20259     {
20260       if (first_reg == 15)
20261         first_reg--;
20262
20263       num_regs++;
20264     }
20265
20266   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20267      there could be up to 32 D-registers to restore.
20268      If there are more than 16 D-registers, make two recursive calls,
20269      each of which emits one pop_multi instruction.  */
20270   if (num_regs > 16)
20271     {
20272       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20273       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20274       return;
20275     }
20276
20277   /* The parallel needs to hold num_regs SETs
20278      and one SET for the stack update.  */
20279   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20280
20281   /* Increment the stack pointer, based on there being
20282      num_regs 8-byte registers to restore.  */
20283   tmp = gen_rtx_SET (VOIDmode,
20284                      base_reg,
20285                      plus_constant (Pmode, base_reg, 8 * num_regs));
20286   RTX_FRAME_RELATED_P (tmp) = 1;
20287   XVECEXP (par, 0, 0) = tmp;
20288
20289   /* Now show every reg that will be restored, using a SET for each.  */
20290   for (j = 0, i=first_reg; j < num_regs; i += 2)
20291     {
20292       reg = gen_rtx_REG (DFmode, i);
20293
20294       tmp = gen_rtx_SET (VOIDmode,
20295                          reg,
20296                          gen_frame_mem
20297                          (DFmode,
20298                           plus_constant (Pmode, base_reg, 8 * j)));
20299       RTX_FRAME_RELATED_P (tmp) = 1;
20300       XVECEXP (par, 0, j + 1) = tmp;
20301
20302       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20303
20304       j++;
20305     }
20306
20307   par = emit_insn (par);
20308   REG_NOTES (par) = dwarf;
20309
20310   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20311   if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20312     {
20313       RTX_FRAME_RELATED_P (par) = 1;
20314       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20315     }
20316   else
20317     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20318                                  base_reg, base_reg);
20319 }
20320
20321 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20322    number of registers are being popped, multiple LDRD patterns are created for
20323    all register pairs.  If odd number of registers are popped, last register is
20324    loaded by using LDR pattern.  */
20325 static void
20326 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20327 {
20328   int num_regs = 0;
20329   int i, j;
20330   rtx par = NULL_RTX;
20331   rtx dwarf = NULL_RTX;
20332   rtx tmp, reg, tmp1;
20333   bool return_in_pc;
20334
20335   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20336   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20337     if (saved_regs_mask & (1 << i))
20338       num_regs++;
20339
20340   gcc_assert (num_regs && num_regs <= 16);
20341
20342   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20343      to be popped.  So, if num_regs is even, now it will become odd,
20344      and we can generate pop with PC.  If num_regs is odd, it will be
20345      even now, and ldr with return can be generated for PC.  */
20346   if (return_in_pc)
20347     num_regs--;
20348
20349   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20350
20351   /* Var j iterates over all the registers to gather all the registers in
20352      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20353      A PARALLEL RTX of register-pair is created here, so that pattern for
20354      LDRD can be matched.  As PC is always last register to be popped, and
20355      we have already decremented num_regs if PC, we don't have to worry
20356      about PC in this loop.  */
20357   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20358     if (saved_regs_mask & (1 << j))
20359       {
20360         /* Create RTX for memory load.  */
20361         reg = gen_rtx_REG (SImode, j);
20362         tmp = gen_rtx_SET (SImode,
20363                            reg,
20364                            gen_frame_mem (SImode,
20365                                plus_constant (Pmode,
20366                                               stack_pointer_rtx, 4 * i)));
20367         RTX_FRAME_RELATED_P (tmp) = 1;
20368
20369         if (i % 2 == 0)
20370           {
20371             /* When saved-register index (i) is even, the RTX to be emitted is
20372                yet to be created.  Hence create it first.  The LDRD pattern we
20373                are generating is :
20374                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20375                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20376                where target registers need not be consecutive.  */
20377             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20378             dwarf = NULL_RTX;
20379           }
20380
20381         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20382            added as 0th element and if i is odd, reg_i is added as 1st element
20383            of LDRD pattern shown above.  */
20384         XVECEXP (par, 0, (i % 2)) = tmp;
20385         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20386
20387         if ((i % 2) == 1)
20388           {
20389             /* When saved-register index (i) is odd, RTXs for both the registers
20390                to be loaded are generated in above given LDRD pattern, and the
20391                pattern can be emitted now.  */
20392             par = emit_insn (par);
20393             REG_NOTES (par) = dwarf;
20394             RTX_FRAME_RELATED_P (par) = 1;
20395           }
20396
20397         i++;
20398       }
20399
20400   /* If the number of registers pushed is odd AND return_in_pc is false OR
20401      number of registers are even AND return_in_pc is true, last register is
20402      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20403      then LDR with post increment.  */
20404
20405   /* Increment the stack pointer, based on there being
20406      num_regs 4-byte registers to restore.  */
20407   tmp = gen_rtx_SET (VOIDmode,
20408                      stack_pointer_rtx,
20409                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20410   RTX_FRAME_RELATED_P (tmp) = 1;
20411   tmp = emit_insn (tmp);
20412   if (!return_in_pc)
20413     {
20414       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20415                                    stack_pointer_rtx, stack_pointer_rtx);
20416     }
20417
20418   dwarf = NULL_RTX;
20419
20420   if (((num_regs % 2) == 1 && !return_in_pc)
20421       || ((num_regs % 2) == 0 && return_in_pc))
20422     {
20423       /* Scan for the single register to be popped.  Skip until the saved
20424          register is found.  */
20425       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20426
20427       /* Gen LDR with post increment here.  */
20428       tmp1 = gen_rtx_MEM (SImode,
20429                           gen_rtx_POST_INC (SImode,
20430                                             stack_pointer_rtx));
20431       set_mem_alias_set (tmp1, get_frame_alias_set ());
20432
20433       reg = gen_rtx_REG (SImode, j);
20434       tmp = gen_rtx_SET (SImode, reg, tmp1);
20435       RTX_FRAME_RELATED_P (tmp) = 1;
20436       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20437
20438       if (return_in_pc)
20439         {
20440           /* If return_in_pc, j must be PC_REGNUM.  */
20441           gcc_assert (j == PC_REGNUM);
20442           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20443           XVECEXP (par, 0, 0) = ret_rtx;
20444           XVECEXP (par, 0, 1) = tmp;
20445           par = emit_jump_insn (par);
20446         }
20447       else
20448         {
20449           par = emit_insn (tmp);
20450           REG_NOTES (par) = dwarf;
20451           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20452                                        stack_pointer_rtx, stack_pointer_rtx);
20453         }
20454
20455     }
20456   else if ((num_regs % 2) == 1 && return_in_pc)
20457     {
20458       /* There are 2 registers to be popped.  So, generate the pattern
20459          pop_multiple_with_stack_update_and_return to pop in PC.  */
20460       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20461     }
20462
20463   return;
20464 }
20465
20466 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20467    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20468    offset addressing and then generates one separate stack udpate. This provides
20469    more scheduling freedom, compared to writeback on every load.  However,
20470    if the function returns using load into PC directly
20471    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20472    before the last load.  TODO: Add a peephole optimization to recognize
20473    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20474    peephole optimization to merge the load at stack-offset zero
20475    with the stack update instruction using load with writeback
20476    in post-index addressing mode.  */
20477 static void
20478 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20479 {
20480   int j = 0;
20481   int offset = 0;
20482   rtx par = NULL_RTX;
20483   rtx dwarf = NULL_RTX;
20484   rtx tmp, mem;
20485
20486   /* Restore saved registers.  */
20487   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20488   j = 0;
20489   while (j <= LAST_ARM_REGNUM)
20490     if (saved_regs_mask & (1 << j))
20491       {
20492         if ((j % 2) == 0
20493             && (saved_regs_mask & (1 << (j + 1)))
20494             && (j + 1) != PC_REGNUM)
20495           {
20496             /* Current register and next register form register pair for which
20497                LDRD can be generated. PC is always the last register popped, and
20498                we handle it separately.  */
20499             if (offset > 0)
20500               mem = gen_frame_mem (DImode,
20501                                    plus_constant (Pmode,
20502                                                   stack_pointer_rtx,
20503                                                   offset));
20504             else
20505               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20506
20507             tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20508             tmp = emit_insn (tmp);
20509             RTX_FRAME_RELATED_P (tmp) = 1;
20510
20511             /* Generate dwarf info.  */
20512
20513             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20514                                     gen_rtx_REG (SImode, j),
20515                                     NULL_RTX);
20516             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20517                                     gen_rtx_REG (SImode, j + 1),
20518                                     dwarf);
20519
20520             REG_NOTES (tmp) = dwarf;
20521
20522             offset += 8;
20523             j += 2;
20524           }
20525         else if (j != PC_REGNUM)
20526           {
20527             /* Emit a single word load.  */
20528             if (offset > 0)
20529               mem = gen_frame_mem (SImode,
20530                                    plus_constant (Pmode,
20531                                                   stack_pointer_rtx,
20532                                                   offset));
20533             else
20534               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20535
20536             tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20537             tmp = emit_insn (tmp);
20538             RTX_FRAME_RELATED_P (tmp) = 1;
20539
20540             /* Generate dwarf info.  */
20541             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20542                                               gen_rtx_REG (SImode, j),
20543                                               NULL_RTX);
20544
20545             offset += 4;
20546             j += 1;
20547           }
20548         else /* j == PC_REGNUM */
20549           j++;
20550       }
20551     else
20552       j++;
20553
20554   /* Update the stack.  */
20555   if (offset > 0)
20556     {
20557       tmp = gen_rtx_SET (Pmode,
20558                          stack_pointer_rtx,
20559                          plus_constant (Pmode,
20560                                         stack_pointer_rtx,
20561                                         offset));
20562       tmp = emit_insn (tmp);
20563       arm_add_cfa_adjust_cfa_note (tmp, offset,
20564                                    stack_pointer_rtx, stack_pointer_rtx);
20565       offset = 0;
20566     }
20567
20568   if (saved_regs_mask & (1 << PC_REGNUM))
20569     {
20570       /* Only PC is to be popped.  */
20571       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20572       XVECEXP (par, 0, 0) = ret_rtx;
20573       tmp = gen_rtx_SET (SImode,
20574                          gen_rtx_REG (SImode, PC_REGNUM),
20575                          gen_frame_mem (SImode,
20576                                         gen_rtx_POST_INC (SImode,
20577                                                           stack_pointer_rtx)));
20578       RTX_FRAME_RELATED_P (tmp) = 1;
20579       XVECEXP (par, 0, 1) = tmp;
20580       par = emit_jump_insn (par);
20581
20582       /* Generate dwarf info.  */
20583       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20584                               gen_rtx_REG (SImode, PC_REGNUM),
20585                               NULL_RTX);
20586       REG_NOTES (par) = dwarf;
20587       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20588                                    stack_pointer_rtx, stack_pointer_rtx);
20589     }
20590 }
20591
20592 /* Calculate the size of the return value that is passed in registers.  */
20593 static unsigned
20594 arm_size_return_regs (void)
20595 {
20596   machine_mode mode;
20597
20598   if (crtl->return_rtx != 0)
20599     mode = GET_MODE (crtl->return_rtx);
20600   else
20601     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20602
20603   return GET_MODE_SIZE (mode);
20604 }
20605
20606 /* Return true if the current function needs to save/restore LR.  */
20607 static bool
20608 thumb_force_lr_save (void)
20609 {
20610   return !cfun->machine->lr_save_eliminated
20611          && (!leaf_function_p ()
20612              || thumb_far_jump_used_p ()
20613              || df_regs_ever_live_p (LR_REGNUM));
20614 }
20615
20616 /* We do not know if r3 will be available because
20617    we do have an indirect tailcall happening in this
20618    particular case.  */
20619 static bool
20620 is_indirect_tailcall_p (rtx call)
20621 {
20622   rtx pat = PATTERN (call);
20623
20624   /* Indirect tail call.  */
20625   pat = XVECEXP (pat, 0, 0);
20626   if (GET_CODE (pat) == SET)
20627     pat = SET_SRC (pat);
20628
20629   pat = XEXP (XEXP (pat, 0), 0);
20630   return REG_P (pat);
20631 }
20632
20633 /* Return true if r3 is used by any of the tail call insns in the
20634    current function.  */
20635 static bool
20636 any_sibcall_could_use_r3 (void)
20637 {
20638   edge_iterator ei;
20639   edge e;
20640
20641   if (!crtl->tail_call_emit)
20642     return false;
20643   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20644     if (e->flags & EDGE_SIBCALL)
20645       {
20646         rtx call = BB_END (e->src);
20647         if (!CALL_P (call))
20648           call = prev_nonnote_nondebug_insn (call);
20649         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20650         if (find_regno_fusage (call, USE, 3)
20651             || is_indirect_tailcall_p (call))
20652           return true;
20653       }
20654   return false;
20655 }
20656
20657
20658 /* Compute the distance from register FROM to register TO.
20659    These can be the arg pointer (26), the soft frame pointer (25),
20660    the stack pointer (13) or the hard frame pointer (11).
20661    In thumb mode r7 is used as the soft frame pointer, if needed.
20662    Typical stack layout looks like this:
20663
20664        old stack pointer -> |    |
20665                              ----
20666                             |    | \
20667                             |    |   saved arguments for
20668                             |    |   vararg functions
20669                             |    | /
20670                               --
20671    hard FP & arg pointer -> |    | \
20672                             |    |   stack
20673                             |    |   frame
20674                             |    | /
20675                               --
20676                             |    | \
20677                             |    |   call saved
20678                             |    |   registers
20679       soft frame pointer -> |    | /
20680                               --
20681                             |    | \
20682                             |    |   local
20683                             |    |   variables
20684      locals base pointer -> |    | /
20685                               --
20686                             |    | \
20687                             |    |   outgoing
20688                             |    |   arguments
20689    current stack pointer -> |    | /
20690                               --
20691
20692   For a given function some or all of these stack components
20693   may not be needed, giving rise to the possibility of
20694   eliminating some of the registers.
20695
20696   The values returned by this function must reflect the behavior
20697   of arm_expand_prologue() and arm_compute_save_reg_mask().
20698
20699   The sign of the number returned reflects the direction of stack
20700   growth, so the values are positive for all eliminations except
20701   from the soft frame pointer to the hard frame pointer.
20702
20703   SFP may point just inside the local variables block to ensure correct
20704   alignment.  */
20705
20706
20707 /* Calculate stack offsets.  These are used to calculate register elimination
20708    offsets and in prologue/epilogue code.  Also calculates which registers
20709    should be saved.  */
20710
20711 static arm_stack_offsets *
20712 arm_get_frame_offsets (void)
20713 {
20714   struct arm_stack_offsets *offsets;
20715   unsigned long func_type;
20716   int leaf;
20717   int saved;
20718   int core_saved;
20719   HOST_WIDE_INT frame_size;
20720   int i;
20721
20722   offsets = &cfun->machine->stack_offsets;
20723
20724   /* We need to know if we are a leaf function.  Unfortunately, it
20725      is possible to be called after start_sequence has been called,
20726      which causes get_insns to return the insns for the sequence,
20727      not the function, which will cause leaf_function_p to return
20728      the incorrect result.
20729
20730      to know about leaf functions once reload has completed, and the
20731      frame size cannot be changed after that time, so we can safely
20732      use the cached value.  */
20733
20734   if (reload_completed)
20735     return offsets;
20736
20737   /* Initially this is the size of the local variables.  It will translated
20738      into an offset once we have determined the size of preceding data.  */
20739   frame_size = ROUND_UP_WORD (get_frame_size ());
20740
20741   leaf = leaf_function_p ();
20742
20743   /* Space for variadic functions.  */
20744   offsets->saved_args = crtl->args.pretend_args_size;
20745
20746   /* In Thumb mode this is incorrect, but never used.  */
20747   offsets->frame
20748     = (offsets->saved_args
20749        + arm_compute_static_chain_stack_bytes ()
20750        + (frame_pointer_needed ? 4 : 0));
20751
20752   if (TARGET_32BIT)
20753     {
20754       unsigned int regno;
20755
20756       offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20757       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20758       saved = core_saved;
20759
20760       /* We know that SP will be doubleword aligned on entry, and we must
20761          preserve that condition at any subroutine call.  We also require the
20762          soft frame pointer to be doubleword aligned.  */
20763
20764       if (TARGET_REALLY_IWMMXT)
20765         {
20766           /* Check for the call-saved iWMMXt registers.  */
20767           for (regno = FIRST_IWMMXT_REGNUM;
20768                regno <= LAST_IWMMXT_REGNUM;
20769                regno++)
20770             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20771               saved += 8;
20772         }
20773
20774       func_type = arm_current_func_type ();
20775       /* Space for saved VFP registers.  */
20776       if (! IS_VOLATILE (func_type)
20777           && TARGET_HARD_FLOAT && TARGET_VFP)
20778         saved += arm_get_vfp_saved_size ();
20779     }
20780   else /* TARGET_THUMB1 */
20781     {
20782       offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20783       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20784       saved = core_saved;
20785       if (TARGET_BACKTRACE)
20786         saved += 16;
20787     }
20788
20789   /* Saved registers include the stack frame.  */
20790   offsets->saved_regs
20791     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20792   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20793
20794   /* A leaf function does not need any stack alignment if it has nothing
20795      on the stack.  */
20796   if (leaf && frame_size == 0
20797       /* However if it calls alloca(), we have a dynamically allocated
20798          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20799       && ! cfun->calls_alloca)
20800     {
20801       offsets->outgoing_args = offsets->soft_frame;
20802       offsets->locals_base = offsets->soft_frame;
20803       return offsets;
20804     }
20805
20806   /* Ensure SFP has the correct alignment.  */
20807   if (ARM_DOUBLEWORD_ALIGN
20808       && (offsets->soft_frame & 7))
20809     {
20810       offsets->soft_frame += 4;
20811       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20812          when there is a stack frame as the alignment will be rolled into
20813          the normal stack adjustment.  */
20814       if (frame_size + crtl->outgoing_args_size == 0)
20815         {
20816           int reg = -1;
20817
20818           /* Register r3 is caller-saved.  Normally it does not need to be
20819              saved on entry by the prologue.  However if we choose to save
20820              it for padding then we may confuse the compiler into thinking
20821              a prologue sequence is required when in fact it is not.  This
20822              will occur when shrink-wrapping if r3 is used as a scratch
20823              register and there are no other callee-saved writes.
20824
20825              This situation can be avoided when other callee-saved registers
20826              are available and r3 is not mandatory if we choose a callee-saved
20827              register for padding.  */
20828           bool prefer_callee_reg_p = false;
20829
20830           /* If it is safe to use r3, then do so.  This sometimes
20831              generates better code on Thumb-2 by avoiding the need to
20832              use 32-bit push/pop instructions.  */
20833           if (! any_sibcall_could_use_r3 ()
20834               && arm_size_return_regs () <= 12
20835               && (offsets->saved_regs_mask & (1 << 3)) == 0
20836               && (TARGET_THUMB2
20837                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20838             {
20839               reg = 3;
20840               if (!TARGET_THUMB2)
20841                 prefer_callee_reg_p = true;
20842             }
20843           if (reg == -1
20844               || prefer_callee_reg_p)
20845             {
20846               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20847                 {
20848                   /* Avoid fixed registers; they may be changed at
20849                      arbitrary times so it's unsafe to restore them
20850                      during the epilogue.  */
20851                   if (!fixed_regs[i]
20852                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20853                     {
20854                       reg = i;
20855                       break;
20856                     }
20857                 }
20858             }
20859
20860           if (reg != -1)
20861             {
20862               offsets->saved_regs += 4;
20863               offsets->saved_regs_mask |= (1 << reg);
20864             }
20865         }
20866     }
20867
20868   offsets->locals_base = offsets->soft_frame + frame_size;
20869   offsets->outgoing_args = (offsets->locals_base
20870                             + crtl->outgoing_args_size);
20871
20872   if (ARM_DOUBLEWORD_ALIGN)
20873     {
20874       /* Ensure SP remains doubleword aligned.  */
20875       if (offsets->outgoing_args & 7)
20876         offsets->outgoing_args += 4;
20877       gcc_assert (!(offsets->outgoing_args & 7));
20878     }
20879
20880   return offsets;
20881 }
20882
20883
20884 /* Calculate the relative offsets for the different stack pointers.  Positive
20885    offsets are in the direction of stack growth.  */
20886
20887 HOST_WIDE_INT
20888 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20889 {
20890   arm_stack_offsets *offsets;
20891
20892   offsets = arm_get_frame_offsets ();
20893
20894   /* OK, now we have enough information to compute the distances.
20895      There must be an entry in these switch tables for each pair
20896      of registers in ELIMINABLE_REGS, even if some of the entries
20897      seem to be redundant or useless.  */
20898   switch (from)
20899     {
20900     case ARG_POINTER_REGNUM:
20901       switch (to)
20902         {
20903         case THUMB_HARD_FRAME_POINTER_REGNUM:
20904           return 0;
20905
20906         case FRAME_POINTER_REGNUM:
20907           /* This is the reverse of the soft frame pointer
20908              to hard frame pointer elimination below.  */
20909           return offsets->soft_frame - offsets->saved_args;
20910
20911         case ARM_HARD_FRAME_POINTER_REGNUM:
20912           /* This is only non-zero in the case where the static chain register
20913              is stored above the frame.  */
20914           return offsets->frame - offsets->saved_args - 4;
20915
20916         case STACK_POINTER_REGNUM:
20917           /* If nothing has been pushed on the stack at all
20918              then this will return -4.  This *is* correct!  */
20919           return offsets->outgoing_args - (offsets->saved_args + 4);
20920
20921         default:
20922           gcc_unreachable ();
20923         }
20924       gcc_unreachable ();
20925
20926     case FRAME_POINTER_REGNUM:
20927       switch (to)
20928         {
20929         case THUMB_HARD_FRAME_POINTER_REGNUM:
20930           return 0;
20931
20932         case ARM_HARD_FRAME_POINTER_REGNUM:
20933           /* The hard frame pointer points to the top entry in the
20934              stack frame.  The soft frame pointer to the bottom entry
20935              in the stack frame.  If there is no stack frame at all,
20936              then they are identical.  */
20937
20938           return offsets->frame - offsets->soft_frame;
20939
20940         case STACK_POINTER_REGNUM:
20941           return offsets->outgoing_args - offsets->soft_frame;
20942
20943         default:
20944           gcc_unreachable ();
20945         }
20946       gcc_unreachable ();
20947
20948     default:
20949       /* You cannot eliminate from the stack pointer.
20950          In theory you could eliminate from the hard frame
20951          pointer to the stack pointer, but this will never
20952          happen, since if a stack frame is not needed the
20953          hard frame pointer will never be used.  */
20954       gcc_unreachable ();
20955     }
20956 }
20957
20958 /* Given FROM and TO register numbers, say whether this elimination is
20959    allowed.  Frame pointer elimination is automatically handled.
20960
20961    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
20962    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
20963    pointer, we must eliminate FRAME_POINTER_REGNUM into
20964    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20965    ARG_POINTER_REGNUM.  */
20966
20967 bool
20968 arm_can_eliminate (const int from, const int to)
20969 {
20970   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20971           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20972           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20973           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20974            true);
20975 }
20976
20977 /* Emit RTL to save coprocessor registers on function entry.  Returns the
20978    number of bytes pushed.  */
20979
20980 static int
20981 arm_save_coproc_regs(void)
20982 {
20983   int saved_size = 0;
20984   unsigned reg;
20985   unsigned start_reg;
20986   rtx insn;
20987
20988   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20989     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20990       {
20991         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20992         insn = gen_rtx_MEM (V2SImode, insn);
20993         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20994         RTX_FRAME_RELATED_P (insn) = 1;
20995         saved_size += 8;
20996       }
20997
20998   if (TARGET_HARD_FLOAT && TARGET_VFP)
20999     {
21000       start_reg = FIRST_VFP_REGNUM;
21001
21002       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21003         {
21004           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21005               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21006             {
21007               if (start_reg != reg)
21008                 saved_size += vfp_emit_fstmd (start_reg,
21009                                               (reg - start_reg) / 2);
21010               start_reg = reg + 2;
21011             }
21012         }
21013       if (start_reg != reg)
21014         saved_size += vfp_emit_fstmd (start_reg,
21015                                       (reg - start_reg) / 2);
21016     }
21017   return saved_size;
21018 }
21019
21020
21021 /* Set the Thumb frame pointer from the stack pointer.  */
21022
21023 static void
21024 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21025 {
21026   HOST_WIDE_INT amount;
21027   rtx insn, dwarf;
21028
21029   amount = offsets->outgoing_args - offsets->locals_base;
21030   if (amount < 1024)
21031     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21032                                   stack_pointer_rtx, GEN_INT (amount)));
21033   else
21034     {
21035       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21036       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21037          expects the first two operands to be the same.  */
21038       if (TARGET_THUMB2)
21039         {
21040           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21041                                         stack_pointer_rtx,
21042                                         hard_frame_pointer_rtx));
21043         }
21044       else
21045         {
21046           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21047                                         hard_frame_pointer_rtx,
21048                                         stack_pointer_rtx));
21049         }
21050       dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21051                            plus_constant (Pmode, stack_pointer_rtx, amount));
21052       RTX_FRAME_RELATED_P (dwarf) = 1;
21053       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21054     }
21055
21056   RTX_FRAME_RELATED_P (insn) = 1;
21057 }
21058
21059 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21060    function.  */
21061 void
21062 arm_expand_prologue (void)
21063 {
21064   rtx amount;
21065   rtx insn;
21066   rtx ip_rtx;
21067   unsigned long live_regs_mask;
21068   unsigned long func_type;
21069   int fp_offset = 0;
21070   int saved_pretend_args = 0;
21071   int saved_regs = 0;
21072   unsigned HOST_WIDE_INT args_to_push;
21073   arm_stack_offsets *offsets;
21074
21075   func_type = arm_current_func_type ();
21076
21077   /* Naked functions don't have prologues.  */
21078   if (IS_NAKED (func_type))
21079     return;
21080
21081   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21082   args_to_push = crtl->args.pretend_args_size;
21083
21084   /* Compute which register we will have to save onto the stack.  */
21085   offsets = arm_get_frame_offsets ();
21086   live_regs_mask = offsets->saved_regs_mask;
21087
21088   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21089
21090   if (IS_STACKALIGN (func_type))
21091     {
21092       rtx r0, r1;
21093
21094       /* Handle a word-aligned stack pointer.  We generate the following:
21095
21096           mov r0, sp
21097           bic r1, r0, #7
21098           mov sp, r1
21099           <save and restore r0 in normal prologue/epilogue>
21100           mov sp, r0
21101           bx lr
21102
21103          The unwinder doesn't need to know about the stack realignment.
21104          Just tell it we saved SP in r0.  */
21105       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21106
21107       r0 = gen_rtx_REG (SImode, 0);
21108       r1 = gen_rtx_REG (SImode, 1);
21109
21110       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21111       RTX_FRAME_RELATED_P (insn) = 1;
21112       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21113
21114       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21115
21116       /* ??? The CFA changes here, which may cause GDB to conclude that it
21117          has entered a different function.  That said, the unwind info is
21118          correct, individually, before and after this instruction because
21119          we've described the save of SP, which will override the default
21120          handling of SP as restoring from the CFA.  */
21121       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21122     }
21123
21124   /* For APCS frames, if IP register is clobbered
21125      when creating frame, save that register in a special
21126      way.  */
21127   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21128     {
21129       if (IS_INTERRUPT (func_type))
21130         {
21131           /* Interrupt functions must not corrupt any registers.
21132              Creating a frame pointer however, corrupts the IP
21133              register, so we must push it first.  */
21134           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21135
21136           /* Do not set RTX_FRAME_RELATED_P on this insn.
21137              The dwarf stack unwinding code only wants to see one
21138              stack decrement per function, and this is not it.  If
21139              this instruction is labeled as being part of the frame
21140              creation sequence then dwarf2out_frame_debug_expr will
21141              die when it encounters the assignment of IP to FP
21142              later on, since the use of SP here establishes SP as
21143              the CFA register and not IP.
21144
21145              Anyway this instruction is not really part of the stack
21146              frame creation although it is part of the prologue.  */
21147         }
21148       else if (IS_NESTED (func_type))
21149         {
21150           /* The static chain register is the same as the IP register
21151              used as a scratch register during stack frame creation.
21152              To get around this need to find somewhere to store IP
21153              whilst the frame is being created.  We try the following
21154              places in order:
21155
21156                1. The last argument register r3 if it is available.
21157                2. A slot on the stack above the frame if there are no
21158                   arguments to push onto the stack.
21159                3. Register r3 again, after pushing the argument registers
21160                   onto the stack, if this is a varargs function.
21161                4. The last slot on the stack created for the arguments to
21162                   push, if this isn't a varargs function.
21163
21164              Note - we only need to tell the dwarf2 backend about the SP
21165              adjustment in the second variant; the static chain register
21166              doesn't need to be unwound, as it doesn't contain a value
21167              inherited from the caller.  */
21168
21169           if (!arm_r3_live_at_start_p ())
21170             insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21171           else if (args_to_push == 0)
21172             {
21173               rtx addr, dwarf;
21174
21175               gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21176               saved_regs += 4;
21177
21178               addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21179               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21180               fp_offset = 4;
21181
21182               /* Just tell the dwarf backend that we adjusted SP.  */
21183               dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21184                                    plus_constant (Pmode, stack_pointer_rtx,
21185                                                   -fp_offset));
21186               RTX_FRAME_RELATED_P (insn) = 1;
21187               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21188             }
21189           else
21190             {
21191               /* Store the args on the stack.  */
21192               if (cfun->machine->uses_anonymous_args)
21193                 {
21194                   insn
21195                     = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21196                                            (0xf0 >> (args_to_push / 4)) & 0xf);
21197                   emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21198                   saved_pretend_args = 1;
21199                 }
21200               else
21201                 {
21202                   rtx addr, dwarf;
21203
21204                   if (args_to_push == 4)
21205                     addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21206                   else
21207                     addr
21208                       = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21209                                             plus_constant (Pmode,
21210                                                            stack_pointer_rtx,
21211                                                            -args_to_push));
21212
21213                   insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21214
21215                   /* Just tell the dwarf backend that we adjusted SP.  */
21216                   dwarf
21217                     = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21218                                    plus_constant (Pmode, stack_pointer_rtx,
21219                                                   -args_to_push));
21220                   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21221                 }
21222
21223               RTX_FRAME_RELATED_P (insn) = 1;
21224               fp_offset = args_to_push;
21225               args_to_push = 0;
21226             }
21227         }
21228
21229       insn = emit_set_insn (ip_rtx,
21230                             plus_constant (Pmode, stack_pointer_rtx,
21231                                            fp_offset));
21232       RTX_FRAME_RELATED_P (insn) = 1;
21233     }
21234
21235   if (args_to_push)
21236     {
21237       /* Push the argument registers, or reserve space for them.  */
21238       if (cfun->machine->uses_anonymous_args)
21239         insn = emit_multi_reg_push
21240           ((0xf0 >> (args_to_push / 4)) & 0xf,
21241            (0xf0 >> (args_to_push / 4)) & 0xf);
21242       else
21243         insn = emit_insn
21244           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21245                        GEN_INT (- args_to_push)));
21246       RTX_FRAME_RELATED_P (insn) = 1;
21247     }
21248
21249   /* If this is an interrupt service routine, and the link register
21250      is going to be pushed, and we're not generating extra
21251      push of IP (needed when frame is needed and frame layout if apcs),
21252      subtracting four from LR now will mean that the function return
21253      can be done with a single instruction.  */
21254   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21255       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21256       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21257       && TARGET_ARM)
21258     {
21259       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21260
21261       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21262     }
21263
21264   if (live_regs_mask)
21265     {
21266       unsigned long dwarf_regs_mask = live_regs_mask;
21267
21268       saved_regs += bit_count (live_regs_mask) * 4;
21269       if (optimize_size && !frame_pointer_needed
21270           && saved_regs == offsets->saved_regs - offsets->saved_args)
21271         {
21272           /* If no coprocessor registers are being pushed and we don't have
21273              to worry about a frame pointer then push extra registers to
21274              create the stack frame.  This is done is a way that does not
21275              alter the frame layout, so is independent of the epilogue.  */
21276           int n;
21277           int frame;
21278           n = 0;
21279           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21280             n++;
21281           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21282           if (frame && n * 4 >= frame)
21283             {
21284               n = frame / 4;
21285               live_regs_mask |= (1 << n) - 1;
21286               saved_regs += frame;
21287             }
21288         }
21289
21290       if (TARGET_LDRD
21291           && current_tune->prefer_ldrd_strd
21292           && !optimize_function_for_size_p (cfun))
21293         {
21294           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21295           if (TARGET_THUMB2)
21296             thumb2_emit_strd_push (live_regs_mask);
21297           else if (TARGET_ARM
21298                    && !TARGET_APCS_FRAME
21299                    && !IS_INTERRUPT (func_type))
21300             arm_emit_strd_push (live_regs_mask);
21301           else
21302             {
21303               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21304               RTX_FRAME_RELATED_P (insn) = 1;
21305             }
21306         }
21307       else
21308         {
21309           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21310           RTX_FRAME_RELATED_P (insn) = 1;
21311         }
21312     }
21313
21314   if (! IS_VOLATILE (func_type))
21315     saved_regs += arm_save_coproc_regs ();
21316
21317   if (frame_pointer_needed && TARGET_ARM)
21318     {
21319       /* Create the new frame pointer.  */
21320       if (TARGET_APCS_FRAME)
21321         {
21322           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21323           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21324           RTX_FRAME_RELATED_P (insn) = 1;
21325
21326           if (IS_NESTED (func_type))
21327             {
21328               /* Recover the static chain register.  */
21329               if (!arm_r3_live_at_start_p () || saved_pretend_args)
21330                 insn = gen_rtx_REG (SImode, 3);
21331               else
21332                 {
21333                   insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21334                   insn = gen_frame_mem (SImode, insn);
21335                 }
21336               emit_set_insn (ip_rtx, insn);
21337               /* Add a USE to stop propagate_one_insn() from barfing.  */
21338               emit_insn (gen_force_register_use (ip_rtx));
21339             }
21340         }
21341       else
21342         {
21343           insn = GEN_INT (saved_regs - 4);
21344           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21345                                         stack_pointer_rtx, insn));
21346           RTX_FRAME_RELATED_P (insn) = 1;
21347         }
21348     }
21349
21350   if (flag_stack_usage_info)
21351     current_function_static_stack_size
21352       = offsets->outgoing_args - offsets->saved_args;
21353
21354   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21355     {
21356       /* This add can produce multiple insns for a large constant, so we
21357          need to get tricky.  */
21358       rtx_insn *last = get_last_insn ();
21359
21360       amount = GEN_INT (offsets->saved_args + saved_regs
21361                         - offsets->outgoing_args);
21362
21363       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21364                                     amount));
21365       do
21366         {
21367           last = last ? NEXT_INSN (last) : get_insns ();
21368           RTX_FRAME_RELATED_P (last) = 1;
21369         }
21370       while (last != insn);
21371
21372       /* If the frame pointer is needed, emit a special barrier that
21373          will prevent the scheduler from moving stores to the frame
21374          before the stack adjustment.  */
21375       if (frame_pointer_needed)
21376         insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21377                                          hard_frame_pointer_rtx));
21378     }
21379
21380
21381   if (frame_pointer_needed && TARGET_THUMB2)
21382     thumb_set_frame_pointer (offsets);
21383
21384   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21385     {
21386       unsigned long mask;
21387
21388       mask = live_regs_mask;
21389       mask &= THUMB2_WORK_REGS;
21390       if (!IS_NESTED (func_type))
21391         mask |= (1 << IP_REGNUM);
21392       arm_load_pic_register (mask);
21393     }
21394
21395   /* If we are profiling, make sure no instructions are scheduled before
21396      the call to mcount.  Similarly if the user has requested no
21397      scheduling in the prolog.  Similarly if we want non-call exceptions
21398      using the EABI unwinder, to prevent faulting instructions from being
21399      swapped with a stack adjustment.  */
21400   if (crtl->profile || !TARGET_SCHED_PROLOG
21401       || (arm_except_unwind_info (&global_options) == UI_TARGET
21402           && cfun->can_throw_non_call_exceptions))
21403     emit_insn (gen_blockage ());
21404
21405   /* If the link register is being kept alive, with the return address in it,
21406      then make sure that it does not get reused by the ce2 pass.  */
21407   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21408     cfun->machine->lr_save_eliminated = 1;
21409 }
21410 \f
21411 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21412 static void
21413 arm_print_condition (FILE *stream)
21414 {
21415   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21416     {
21417       /* Branch conversion is not implemented for Thumb-2.  */
21418       if (TARGET_THUMB)
21419         {
21420           output_operand_lossage ("predicated Thumb instruction");
21421           return;
21422         }
21423       if (current_insn_predicate != NULL)
21424         {
21425           output_operand_lossage
21426             ("predicated instruction in conditional sequence");
21427           return;
21428         }
21429
21430       fputs (arm_condition_codes[arm_current_cc], stream);
21431     }
21432   else if (current_insn_predicate)
21433     {
21434       enum arm_cond_code code;
21435
21436       if (TARGET_THUMB1)
21437         {
21438           output_operand_lossage ("predicated Thumb instruction");
21439           return;
21440         }
21441
21442       code = get_arm_condition_code (current_insn_predicate);
21443       fputs (arm_condition_codes[code], stream);
21444     }
21445 }
21446
21447
21448 /* Globally reserved letters: acln
21449    Puncutation letters currently used: @_|?().!#
21450    Lower case letters currently used: bcdefhimpqtvwxyz
21451    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21452    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21453
21454    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21455
21456    If CODE is 'd', then the X is a condition operand and the instruction
21457    should only be executed if the condition is true.
21458    if CODE is 'D', then the X is a condition operand and the instruction
21459    should only be executed if the condition is false: however, if the mode
21460    of the comparison is CCFPEmode, then always execute the instruction -- we
21461    do this because in these circumstances !GE does not necessarily imply LT;
21462    in these cases the instruction pattern will take care to make sure that
21463    an instruction containing %d will follow, thereby undoing the effects of
21464    doing this instruction unconditionally.
21465    If CODE is 'N' then X is a floating point operand that must be negated
21466    before output.
21467    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21468    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21469 static void
21470 arm_print_operand (FILE *stream, rtx x, int code)
21471 {
21472   switch (code)
21473     {
21474     case '@':
21475       fputs (ASM_COMMENT_START, stream);
21476       return;
21477
21478     case '_':
21479       fputs (user_label_prefix, stream);
21480       return;
21481
21482     case '|':
21483       fputs (REGISTER_PREFIX, stream);
21484       return;
21485
21486     case '?':
21487       arm_print_condition (stream);
21488       return;
21489
21490     case '(':
21491       /* Nothing in unified syntax, otherwise the current condition code.  */
21492       if (!TARGET_UNIFIED_ASM)
21493         arm_print_condition (stream);
21494       break;
21495
21496     case ')':
21497       /* The current condition code in unified syntax, otherwise nothing.  */
21498       if (TARGET_UNIFIED_ASM)
21499         arm_print_condition (stream);
21500       break;
21501
21502     case '.':
21503       /* The current condition code for a condition code setting instruction.
21504          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21505       if (TARGET_UNIFIED_ASM)
21506         {
21507           fputc('s', stream);
21508           arm_print_condition (stream);
21509         }
21510       else
21511         {
21512           arm_print_condition (stream);
21513           fputc('s', stream);
21514         }
21515       return;
21516
21517     case '!':
21518       /* If the instruction is conditionally executed then print
21519          the current condition code, otherwise print 's'.  */
21520       gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21521       if (current_insn_predicate)
21522         arm_print_condition (stream);
21523       else
21524         fputc('s', stream);
21525       break;
21526
21527     /* %# is a "break" sequence. It doesn't output anything, but is used to
21528        separate e.g. operand numbers from following text, if that text consists
21529        of further digits which we don't want to be part of the operand
21530        number.  */
21531     case '#':
21532       return;
21533
21534     case 'N':
21535       {
21536         REAL_VALUE_TYPE r;
21537         REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21538         r = real_value_negate (&r);
21539         fprintf (stream, "%s", fp_const_from_val (&r));
21540       }
21541       return;
21542
21543     /* An integer or symbol address without a preceding # sign.  */
21544     case 'c':
21545       switch (GET_CODE (x))
21546         {
21547         case CONST_INT:
21548           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21549           break;
21550
21551         case SYMBOL_REF:
21552           output_addr_const (stream, x);
21553           break;
21554
21555         case CONST:
21556           if (GET_CODE (XEXP (x, 0)) == PLUS
21557               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21558             {
21559               output_addr_const (stream, x);
21560               break;
21561             }
21562           /* Fall through.  */
21563
21564         default:
21565           output_operand_lossage ("Unsupported operand for code '%c'", code);
21566         }
21567       return;
21568
21569     /* An integer that we want to print in HEX.  */
21570     case 'x':
21571       switch (GET_CODE (x))
21572         {
21573         case CONST_INT:
21574           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21575           break;
21576
21577         default:
21578           output_operand_lossage ("Unsupported operand for code '%c'", code);
21579         }
21580       return;
21581
21582     case 'B':
21583       if (CONST_INT_P (x))
21584         {
21585           HOST_WIDE_INT val;
21586           val = ARM_SIGN_EXTEND (~INTVAL (x));
21587           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21588         }
21589       else
21590         {
21591           putc ('~', stream);
21592           output_addr_const (stream, x);
21593         }
21594       return;
21595
21596     case 'b':
21597       /* Print the log2 of a CONST_INT.  */
21598       {
21599         HOST_WIDE_INT val;
21600
21601         if (!CONST_INT_P (x)
21602             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21603           output_operand_lossage ("Unsupported operand for code '%c'", code);
21604         else
21605           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21606       }
21607       return;
21608
21609     case 'L':
21610       /* The low 16 bits of an immediate constant.  */
21611       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21612       return;
21613
21614     case 'i':
21615       fprintf (stream, "%s", arithmetic_instr (x, 1));
21616       return;
21617
21618     case 'I':
21619       fprintf (stream, "%s", arithmetic_instr (x, 0));
21620       return;
21621
21622     case 'S':
21623       {
21624         HOST_WIDE_INT val;
21625         const char *shift;
21626
21627         shift = shift_op (x, &val);
21628
21629         if (shift)
21630           {
21631             fprintf (stream, ", %s ", shift);
21632             if (val == -1)
21633               arm_print_operand (stream, XEXP (x, 1), 0);
21634             else
21635               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21636           }
21637       }
21638       return;
21639
21640       /* An explanation of the 'Q', 'R' and 'H' register operands:
21641
21642          In a pair of registers containing a DI or DF value the 'Q'
21643          operand returns the register number of the register containing
21644          the least significant part of the value.  The 'R' operand returns
21645          the register number of the register containing the most
21646          significant part of the value.
21647
21648          The 'H' operand returns the higher of the two register numbers.
21649          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21650          same as the 'Q' operand, since the most significant part of the
21651          value is held in the lower number register.  The reverse is true
21652          on systems where WORDS_BIG_ENDIAN is false.
21653
21654          The purpose of these operands is to distinguish between cases
21655          where the endian-ness of the values is important (for example
21656          when they are added together), and cases where the endian-ness
21657          is irrelevant, but the order of register operations is important.
21658          For example when loading a value from memory into a register
21659          pair, the endian-ness does not matter.  Provided that the value
21660          from the lower memory address is put into the lower numbered
21661          register, and the value from the higher address is put into the
21662          higher numbered register, the load will work regardless of whether
21663          the value being loaded is big-wordian or little-wordian.  The
21664          order of the two register loads can matter however, if the address
21665          of the memory location is actually held in one of the registers
21666          being overwritten by the load.
21667
21668          The 'Q' and 'R' constraints are also available for 64-bit
21669          constants.  */
21670     case 'Q':
21671       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21672         {
21673           rtx part = gen_lowpart (SImode, x);
21674           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21675           return;
21676         }
21677
21678       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21679         {
21680           output_operand_lossage ("invalid operand for code '%c'", code);
21681           return;
21682         }
21683
21684       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21685       return;
21686
21687     case 'R':
21688       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21689         {
21690           machine_mode mode = GET_MODE (x);
21691           rtx part;
21692
21693           if (mode == VOIDmode)
21694             mode = DImode;
21695           part = gen_highpart_mode (SImode, mode, x);
21696           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21697           return;
21698         }
21699
21700       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21701         {
21702           output_operand_lossage ("invalid operand for code '%c'", code);
21703           return;
21704         }
21705
21706       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21707       return;
21708
21709     case 'H':
21710       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21711         {
21712           output_operand_lossage ("invalid operand for code '%c'", code);
21713           return;
21714         }
21715
21716       asm_fprintf (stream, "%r", REGNO (x) + 1);
21717       return;
21718
21719     case 'J':
21720       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21721         {
21722           output_operand_lossage ("invalid operand for code '%c'", code);
21723           return;
21724         }
21725
21726       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21727       return;
21728
21729     case 'K':
21730       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21731         {
21732           output_operand_lossage ("invalid operand for code '%c'", code);
21733           return;
21734         }
21735
21736       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21737       return;
21738
21739     case 'm':
21740       asm_fprintf (stream, "%r",
21741                    REG_P (XEXP (x, 0))
21742                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21743       return;
21744
21745     case 'M':
21746       asm_fprintf (stream, "{%r-%r}",
21747                    REGNO (x),
21748                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21749       return;
21750
21751     /* Like 'M', but writing doubleword vector registers, for use by Neon
21752        insns.  */
21753     case 'h':
21754       {
21755         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21756         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21757         if (numregs == 1)
21758           asm_fprintf (stream, "{d%d}", regno);
21759         else
21760           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21761       }
21762       return;
21763
21764     case 'd':
21765       /* CONST_TRUE_RTX means always -- that's the default.  */
21766       if (x == const_true_rtx)
21767         return;
21768
21769       if (!COMPARISON_P (x))
21770         {
21771           output_operand_lossage ("invalid operand for code '%c'", code);
21772           return;
21773         }
21774
21775       fputs (arm_condition_codes[get_arm_condition_code (x)],
21776              stream);
21777       return;
21778
21779     case 'D':
21780       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
21781          want to do that.  */
21782       if (x == const_true_rtx)
21783         {
21784           output_operand_lossage ("instruction never executed");
21785           return;
21786         }
21787       if (!COMPARISON_P (x))
21788         {
21789           output_operand_lossage ("invalid operand for code '%c'", code);
21790           return;
21791         }
21792
21793       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21794                                  (get_arm_condition_code (x))],
21795              stream);
21796       return;
21797
21798     case 's':
21799     case 'V':
21800     case 'W':
21801     case 'X':
21802     case 'Y':
21803     case 'Z':
21804       /* Former Maverick support, removed after GCC-4.7.  */
21805       output_operand_lossage ("obsolete Maverick format code '%c'", code);
21806       return;
21807
21808     case 'U':
21809       if (!REG_P (x)
21810           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21811           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21812         /* Bad value for wCG register number.  */
21813         {
21814           output_operand_lossage ("invalid operand for code '%c'", code);
21815           return;
21816         }
21817
21818       else
21819         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21820       return;
21821
21822       /* Print an iWMMXt control register name.  */
21823     case 'w':
21824       if (!CONST_INT_P (x)
21825           || INTVAL (x) < 0
21826           || INTVAL (x) >= 16)
21827         /* Bad value for wC register number.  */
21828         {
21829           output_operand_lossage ("invalid operand for code '%c'", code);
21830           return;
21831         }
21832
21833       else
21834         {
21835           static const char * wc_reg_names [16] =
21836             {
21837               "wCID",  "wCon",  "wCSSF", "wCASF",
21838               "wC4",   "wC5",   "wC6",   "wC7",
21839               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21840               "wC12",  "wC13",  "wC14",  "wC15"
21841             };
21842
21843           fputs (wc_reg_names [INTVAL (x)], stream);
21844         }
21845       return;
21846
21847     /* Print the high single-precision register of a VFP double-precision
21848        register.  */
21849     case 'p':
21850       {
21851         machine_mode mode = GET_MODE (x);
21852         int regno;
21853
21854         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21855           {
21856             output_operand_lossage ("invalid operand for code '%c'", code);
21857             return;
21858           }
21859
21860         regno = REGNO (x);
21861         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21862           {
21863             output_operand_lossage ("invalid operand for code '%c'", code);
21864             return;
21865           }
21866
21867         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21868       }
21869       return;
21870
21871     /* Print a VFP/Neon double precision or quad precision register name.  */
21872     case 'P':
21873     case 'q':
21874       {
21875         machine_mode mode = GET_MODE (x);
21876         int is_quad = (code == 'q');
21877         int regno;
21878
21879         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21880           {
21881             output_operand_lossage ("invalid operand for code '%c'", code);
21882             return;
21883           }
21884
21885         if (!REG_P (x)
21886             || !IS_VFP_REGNUM (REGNO (x)))
21887           {
21888             output_operand_lossage ("invalid operand for code '%c'", code);
21889             return;
21890           }
21891
21892         regno = REGNO (x);
21893         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21894             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21895           {
21896             output_operand_lossage ("invalid operand for code '%c'", code);
21897             return;
21898           }
21899
21900         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21901           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21902       }
21903       return;
21904
21905     /* These two codes print the low/high doubleword register of a Neon quad
21906        register, respectively.  For pair-structure types, can also print
21907        low/high quadword registers.  */
21908     case 'e':
21909     case 'f':
21910       {
21911         machine_mode mode = GET_MODE (x);
21912         int regno;
21913
21914         if ((GET_MODE_SIZE (mode) != 16
21915              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21916           {
21917             output_operand_lossage ("invalid operand for code '%c'", code);
21918             return;
21919           }
21920
21921         regno = REGNO (x);
21922         if (!NEON_REGNO_OK_FOR_QUAD (regno))
21923           {
21924             output_operand_lossage ("invalid operand for code '%c'", code);
21925             return;
21926           }
21927
21928         if (GET_MODE_SIZE (mode) == 16)
21929           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21930                                   + (code == 'f' ? 1 : 0));
21931         else
21932           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21933                                   + (code == 'f' ? 1 : 0));
21934       }
21935       return;
21936
21937     /* Print a VFPv3 floating-point constant, represented as an integer
21938        index.  */
21939     case 'G':
21940       {
21941         int index = vfp3_const_double_index (x);
21942         gcc_assert (index != -1);
21943         fprintf (stream, "%d", index);
21944       }
21945       return;
21946
21947     /* Print bits representing opcode features for Neon.
21948
21949        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
21950        and polynomials as unsigned.
21951
21952        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21953
21954        Bit 2 is 1 for rounding functions, 0 otherwise.  */
21955
21956     /* Identify the type as 's', 'u', 'p' or 'f'.  */
21957     case 'T':
21958       {
21959         HOST_WIDE_INT bits = INTVAL (x);
21960         fputc ("uspf"[bits & 3], stream);
21961       }
21962       return;
21963
21964     /* Likewise, but signed and unsigned integers are both 'i'.  */
21965     case 'F':
21966       {
21967         HOST_WIDE_INT bits = INTVAL (x);
21968         fputc ("iipf"[bits & 3], stream);
21969       }
21970       return;
21971
21972     /* As for 'T', but emit 'u' instead of 'p'.  */
21973     case 't':
21974       {
21975         HOST_WIDE_INT bits = INTVAL (x);
21976         fputc ("usuf"[bits & 3], stream);
21977       }
21978       return;
21979
21980     /* Bit 2: rounding (vs none).  */
21981     case 'O':
21982       {
21983         HOST_WIDE_INT bits = INTVAL (x);
21984         fputs ((bits & 4) != 0 ? "r" : "", stream);
21985       }
21986       return;
21987
21988     /* Memory operand for vld1/vst1 instruction.  */
21989     case 'A':
21990       {
21991         rtx addr;
21992         bool postinc = FALSE;
21993         rtx postinc_reg = NULL;
21994         unsigned align, memsize, align_bits;
21995
21996         gcc_assert (MEM_P (x));
21997         addr = XEXP (x, 0);
21998         if (GET_CODE (addr) == POST_INC)
21999           {
22000             postinc = 1;
22001             addr = XEXP (addr, 0);
22002           }
22003         if (GET_CODE (addr) == POST_MODIFY)
22004           {
22005             postinc_reg = XEXP( XEXP (addr, 1), 1);
22006             addr = XEXP (addr, 0);
22007           }
22008         asm_fprintf (stream, "[%r", REGNO (addr));
22009
22010         /* We know the alignment of this access, so we can emit a hint in the
22011            instruction (for some alignments) as an aid to the memory subsystem
22012            of the target.  */
22013         align = MEM_ALIGN (x) >> 3;
22014         memsize = MEM_SIZE (x);
22015
22016         /* Only certain alignment specifiers are supported by the hardware.  */
22017         if (memsize == 32 && (align % 32) == 0)
22018           align_bits = 256;
22019         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22020           align_bits = 128;
22021         else if (memsize >= 8 && (align % 8) == 0)
22022           align_bits = 64;
22023         else
22024           align_bits = 0;
22025
22026         if (align_bits != 0)
22027           asm_fprintf (stream, ":%d", align_bits);
22028
22029         asm_fprintf (stream, "]");
22030
22031         if (postinc)
22032           fputs("!", stream);
22033         if (postinc_reg)
22034           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22035       }
22036       return;
22037
22038     case 'C':
22039       {
22040         rtx addr;
22041
22042         gcc_assert (MEM_P (x));
22043         addr = XEXP (x, 0);
22044         gcc_assert (REG_P (addr));
22045         asm_fprintf (stream, "[%r]", REGNO (addr));
22046       }
22047       return;
22048
22049     /* Translate an S register number into a D register number and element index.  */
22050     case 'y':
22051       {
22052         machine_mode mode = GET_MODE (x);
22053         int regno;
22054
22055         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22056           {
22057             output_operand_lossage ("invalid operand for code '%c'", code);
22058             return;
22059           }
22060
22061         regno = REGNO (x);
22062         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22063           {
22064             output_operand_lossage ("invalid operand for code '%c'", code);
22065             return;
22066           }
22067
22068         regno = regno - FIRST_VFP_REGNUM;
22069         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22070       }
22071       return;
22072
22073     case 'v':
22074         gcc_assert (CONST_DOUBLE_P (x));
22075         int result;
22076         result = vfp3_const_double_for_fract_bits (x);
22077         if (result == 0)
22078           result = vfp3_const_double_for_bits (x);
22079         fprintf (stream, "#%d", result);
22080         return;
22081
22082     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22083        number into a D register number and element index.  */
22084     case 'z':
22085       {
22086         machine_mode mode = GET_MODE (x);
22087         int regno;
22088
22089         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22090           {
22091             output_operand_lossage ("invalid operand for code '%c'", code);
22092             return;
22093           }
22094
22095         regno = REGNO (x);
22096         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22097           {
22098             output_operand_lossage ("invalid operand for code '%c'", code);
22099             return;
22100           }
22101
22102         regno = regno - FIRST_VFP_REGNUM;
22103         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22104       }
22105       return;
22106
22107     default:
22108       if (x == 0)
22109         {
22110           output_operand_lossage ("missing operand");
22111           return;
22112         }
22113
22114       switch (GET_CODE (x))
22115         {
22116         case REG:
22117           asm_fprintf (stream, "%r", REGNO (x));
22118           break;
22119
22120         case MEM:
22121           output_memory_reference_mode = GET_MODE (x);
22122           output_address (XEXP (x, 0));
22123           break;
22124
22125         case CONST_DOUBLE:
22126           {
22127             char fpstr[20];
22128             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22129                               sizeof (fpstr), 0, 1);
22130             fprintf (stream, "#%s", fpstr);
22131           }
22132           break;
22133
22134         default:
22135           gcc_assert (GET_CODE (x) != NEG);
22136           fputc ('#', stream);
22137           if (GET_CODE (x) == HIGH)
22138             {
22139               fputs (":lower16:", stream);
22140               x = XEXP (x, 0);
22141             }
22142
22143           output_addr_const (stream, x);
22144           break;
22145         }
22146     }
22147 }
22148 \f
22149 /* Target hook for printing a memory address.  */
22150 static void
22151 arm_print_operand_address (FILE *stream, rtx x)
22152 {
22153   if (TARGET_32BIT)
22154     {
22155       int is_minus = GET_CODE (x) == MINUS;
22156
22157       if (REG_P (x))
22158         asm_fprintf (stream, "[%r]", REGNO (x));
22159       else if (GET_CODE (x) == PLUS || is_minus)
22160         {
22161           rtx base = XEXP (x, 0);
22162           rtx index = XEXP (x, 1);
22163           HOST_WIDE_INT offset = 0;
22164           if (!REG_P (base)
22165               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22166             {
22167               /* Ensure that BASE is a register.  */
22168               /* (one of them must be).  */
22169               /* Also ensure the SP is not used as in index register.  */
22170               std::swap (base, index);
22171             }
22172           switch (GET_CODE (index))
22173             {
22174             case CONST_INT:
22175               offset = INTVAL (index);
22176               if (is_minus)
22177                 offset = -offset;
22178               asm_fprintf (stream, "[%r, #%wd]",
22179                            REGNO (base), offset);
22180               break;
22181
22182             case REG:
22183               asm_fprintf (stream, "[%r, %s%r]",
22184                            REGNO (base), is_minus ? "-" : "",
22185                            REGNO (index));
22186               break;
22187
22188             case MULT:
22189             case ASHIFTRT:
22190             case LSHIFTRT:
22191             case ASHIFT:
22192             case ROTATERT:
22193               {
22194                 asm_fprintf (stream, "[%r, %s%r",
22195                              REGNO (base), is_minus ? "-" : "",
22196                              REGNO (XEXP (index, 0)));
22197                 arm_print_operand (stream, index, 'S');
22198                 fputs ("]", stream);
22199                 break;
22200               }
22201
22202             default:
22203               gcc_unreachable ();
22204             }
22205         }
22206       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22207                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22208         {
22209           extern machine_mode output_memory_reference_mode;
22210
22211           gcc_assert (REG_P (XEXP (x, 0)));
22212
22213           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22214             asm_fprintf (stream, "[%r, #%s%d]!",
22215                          REGNO (XEXP (x, 0)),
22216                          GET_CODE (x) == PRE_DEC ? "-" : "",
22217                          GET_MODE_SIZE (output_memory_reference_mode));
22218           else
22219             asm_fprintf (stream, "[%r], #%s%d",
22220                          REGNO (XEXP (x, 0)),
22221                          GET_CODE (x) == POST_DEC ? "-" : "",
22222                          GET_MODE_SIZE (output_memory_reference_mode));
22223         }
22224       else if (GET_CODE (x) == PRE_MODIFY)
22225         {
22226           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22227           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22228             asm_fprintf (stream, "#%wd]!",
22229                          INTVAL (XEXP (XEXP (x, 1), 1)));
22230           else
22231             asm_fprintf (stream, "%r]!",
22232                          REGNO (XEXP (XEXP (x, 1), 1)));
22233         }
22234       else if (GET_CODE (x) == POST_MODIFY)
22235         {
22236           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22237           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22238             asm_fprintf (stream, "#%wd",
22239                          INTVAL (XEXP (XEXP (x, 1), 1)));
22240           else
22241             asm_fprintf (stream, "%r",
22242                          REGNO (XEXP (XEXP (x, 1), 1)));
22243         }
22244       else output_addr_const (stream, x);
22245     }
22246   else
22247     {
22248       if (REG_P (x))
22249         asm_fprintf (stream, "[%r]", REGNO (x));
22250       else if (GET_CODE (x) == POST_INC)
22251         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22252       else if (GET_CODE (x) == PLUS)
22253         {
22254           gcc_assert (REG_P (XEXP (x, 0)));
22255           if (CONST_INT_P (XEXP (x, 1)))
22256             asm_fprintf (stream, "[%r, #%wd]",
22257                          REGNO (XEXP (x, 0)),
22258                          INTVAL (XEXP (x, 1)));
22259           else
22260             asm_fprintf (stream, "[%r, %r]",
22261                          REGNO (XEXP (x, 0)),
22262                          REGNO (XEXP (x, 1)));
22263         }
22264       else
22265         output_addr_const (stream, x);
22266     }
22267 }
22268 \f
22269 /* Target hook for indicating whether a punctuation character for
22270    TARGET_PRINT_OPERAND is valid.  */
22271 static bool
22272 arm_print_operand_punct_valid_p (unsigned char code)
22273 {
22274   return (code == '@' || code == '|' || code == '.'
22275           || code == '(' || code == ')' || code == '#'
22276           || (TARGET_32BIT && (code == '?'))
22277           || (TARGET_THUMB2 && (code == '!'))
22278           || (TARGET_THUMB && (code == '_')));
22279 }
22280 \f
22281 /* Target hook for assembling integer objects.  The ARM version needs to
22282    handle word-sized values specially.  */
22283 static bool
22284 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22285 {
22286   machine_mode mode;
22287
22288   if (size == UNITS_PER_WORD && aligned_p)
22289     {
22290       fputs ("\t.word\t", asm_out_file);
22291       output_addr_const (asm_out_file, x);
22292
22293       /* Mark symbols as position independent.  We only do this in the
22294          .text segment, not in the .data segment.  */
22295       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22296           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22297         {
22298           /* See legitimize_pic_address for an explanation of the
22299              TARGET_VXWORKS_RTP check.  */
22300           if (!arm_pic_data_is_text_relative
22301               || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22302             fputs ("(GOT)", asm_out_file);
22303           else
22304             fputs ("(GOTOFF)", asm_out_file);
22305         }
22306       fputc ('\n', asm_out_file);
22307       return true;
22308     }
22309
22310   mode = GET_MODE (x);
22311
22312   if (arm_vector_mode_supported_p (mode))
22313     {
22314       int i, units;
22315
22316       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22317
22318       units = CONST_VECTOR_NUNITS (x);
22319       size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22320
22321       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22322         for (i = 0; i < units; i++)
22323           {
22324             rtx elt = CONST_VECTOR_ELT (x, i);
22325             assemble_integer
22326               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22327           }
22328       else
22329         for (i = 0; i < units; i++)
22330           {
22331             rtx elt = CONST_VECTOR_ELT (x, i);
22332             REAL_VALUE_TYPE rval;
22333
22334             REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22335
22336             assemble_real
22337               (rval, GET_MODE_INNER (mode),
22338               i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22339           }
22340
22341       return true;
22342     }
22343
22344   return default_assemble_integer (x, size, aligned_p);
22345 }
22346
22347 static void
22348 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22349 {
22350   section *s;
22351
22352   if (!TARGET_AAPCS_BASED)
22353     {
22354       (is_ctor ?
22355        default_named_section_asm_out_constructor
22356        : default_named_section_asm_out_destructor) (symbol, priority);
22357       return;
22358     }
22359
22360   /* Put these in the .init_array section, using a special relocation.  */
22361   if (priority != DEFAULT_INIT_PRIORITY)
22362     {
22363       char buf[18];
22364       sprintf (buf, "%s.%.5u",
22365                is_ctor ? ".init_array" : ".fini_array",
22366                priority);
22367       s = get_section (buf, SECTION_WRITE, NULL_TREE);
22368     }
22369   else if (is_ctor)
22370     s = ctors_section;
22371   else
22372     s = dtors_section;
22373
22374   switch_to_section (s);
22375   assemble_align (POINTER_SIZE);
22376   fputs ("\t.word\t", asm_out_file);
22377   output_addr_const (asm_out_file, symbol);
22378   fputs ("(target1)\n", asm_out_file);
22379 }
22380
22381 /* Add a function to the list of static constructors.  */
22382
22383 static void
22384 arm_elf_asm_constructor (rtx symbol, int priority)
22385 {
22386   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22387 }
22388
22389 /* Add a function to the list of static destructors.  */
22390
22391 static void
22392 arm_elf_asm_destructor (rtx symbol, int priority)
22393 {
22394   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22395 }
22396 \f
22397 /* A finite state machine takes care of noticing whether or not instructions
22398    can be conditionally executed, and thus decrease execution time and code
22399    size by deleting branch instructions.  The fsm is controlled by
22400    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22401
22402 /* The state of the fsm controlling condition codes are:
22403    0: normal, do nothing special
22404    1: make ASM_OUTPUT_OPCODE not output this instruction
22405    2: make ASM_OUTPUT_OPCODE not output this instruction
22406    3: make instructions conditional
22407    4: make instructions conditional
22408
22409    State transitions (state->state by whom under condition):
22410    0 -> 1 final_prescan_insn if the `target' is a label
22411    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22412    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22413    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22414    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22415           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22416    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22417           (the target insn is arm_target_insn).
22418
22419    If the jump clobbers the conditions then we use states 2 and 4.
22420
22421    A similar thing can be done with conditional return insns.
22422
22423    XXX In case the `target' is an unconditional branch, this conditionalising
22424    of the instructions always reduces code size, but not always execution
22425    time.  But then, I want to reduce the code size to somewhere near what
22426    /bin/cc produces.  */
22427
22428 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22429    instructions.  When a COND_EXEC instruction is seen the subsequent
22430    instructions are scanned so that multiple conditional instructions can be
22431    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22432    specify the length and true/false mask for the IT block.  These will be
22433    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22434
22435 /* Returns the index of the ARM condition code string in
22436    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22437    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22438
22439 enum arm_cond_code
22440 maybe_get_arm_condition_code (rtx comparison)
22441 {
22442   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22443   enum arm_cond_code code;
22444   enum rtx_code comp_code = GET_CODE (comparison);
22445
22446   if (GET_MODE_CLASS (mode) != MODE_CC)
22447     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22448                            XEXP (comparison, 1));
22449
22450   switch (mode)
22451     {
22452     case CC_DNEmode: code = ARM_NE; goto dominance;
22453     case CC_DEQmode: code = ARM_EQ; goto dominance;
22454     case CC_DGEmode: code = ARM_GE; goto dominance;
22455     case CC_DGTmode: code = ARM_GT; goto dominance;
22456     case CC_DLEmode: code = ARM_LE; goto dominance;
22457     case CC_DLTmode: code = ARM_LT; goto dominance;
22458     case CC_DGEUmode: code = ARM_CS; goto dominance;
22459     case CC_DGTUmode: code = ARM_HI; goto dominance;
22460     case CC_DLEUmode: code = ARM_LS; goto dominance;
22461     case CC_DLTUmode: code = ARM_CC;
22462
22463     dominance:
22464       if (comp_code == EQ)
22465         return ARM_INVERSE_CONDITION_CODE (code);
22466       if (comp_code == NE)
22467         return code;
22468       return ARM_NV;
22469
22470     case CC_NOOVmode:
22471       switch (comp_code)
22472         {
22473         case NE: return ARM_NE;
22474         case EQ: return ARM_EQ;
22475         case GE: return ARM_PL;
22476         case LT: return ARM_MI;
22477         default: return ARM_NV;
22478         }
22479
22480     case CC_Zmode:
22481       switch (comp_code)
22482         {
22483         case NE: return ARM_NE;
22484         case EQ: return ARM_EQ;
22485         default: return ARM_NV;
22486         }
22487
22488     case CC_Nmode:
22489       switch (comp_code)
22490         {
22491         case NE: return ARM_MI;
22492         case EQ: return ARM_PL;
22493         default: return ARM_NV;
22494         }
22495
22496     case CCFPEmode:
22497     case CCFPmode:
22498       /* We can handle all cases except UNEQ and LTGT.  */
22499       switch (comp_code)
22500         {
22501         case GE: return ARM_GE;
22502         case GT: return ARM_GT;
22503         case LE: return ARM_LS;
22504         case LT: return ARM_MI;
22505         case NE: return ARM_NE;
22506         case EQ: return ARM_EQ;
22507         case ORDERED: return ARM_VC;
22508         case UNORDERED: return ARM_VS;
22509         case UNLT: return ARM_LT;
22510         case UNLE: return ARM_LE;
22511         case UNGT: return ARM_HI;
22512         case UNGE: return ARM_PL;
22513           /* UNEQ and LTGT do not have a representation.  */
22514         case UNEQ: /* Fall through.  */
22515         case LTGT: /* Fall through.  */
22516         default: return ARM_NV;
22517         }
22518
22519     case CC_SWPmode:
22520       switch (comp_code)
22521         {
22522         case NE: return ARM_NE;
22523         case EQ: return ARM_EQ;
22524         case GE: return ARM_LE;
22525         case GT: return ARM_LT;
22526         case LE: return ARM_GE;
22527         case LT: return ARM_GT;
22528         case GEU: return ARM_LS;
22529         case GTU: return ARM_CC;
22530         case LEU: return ARM_CS;
22531         case LTU: return ARM_HI;
22532         default: return ARM_NV;
22533         }
22534
22535     case CC_Cmode:
22536       switch (comp_code)
22537         {
22538         case LTU: return ARM_CS;
22539         case GEU: return ARM_CC;
22540         default: return ARM_NV;
22541         }
22542
22543     case CC_CZmode:
22544       switch (comp_code)
22545         {
22546         case NE: return ARM_NE;
22547         case EQ: return ARM_EQ;
22548         case GEU: return ARM_CS;
22549         case GTU: return ARM_HI;
22550         case LEU: return ARM_LS;
22551         case LTU: return ARM_CC;
22552         default: return ARM_NV;
22553         }
22554
22555     case CC_NCVmode:
22556       switch (comp_code)
22557         {
22558         case GE: return ARM_GE;
22559         case LT: return ARM_LT;
22560         case GEU: return ARM_CS;
22561         case LTU: return ARM_CC;
22562         default: return ARM_NV;
22563         }
22564
22565     case CCmode:
22566       switch (comp_code)
22567         {
22568         case NE: return ARM_NE;
22569         case EQ: return ARM_EQ;
22570         case GE: return ARM_GE;
22571         case GT: return ARM_GT;
22572         case LE: return ARM_LE;
22573         case LT: return ARM_LT;
22574         case GEU: return ARM_CS;
22575         case GTU: return ARM_HI;
22576         case LEU: return ARM_LS;
22577         case LTU: return ARM_CC;
22578         default: return ARM_NV;
22579         }
22580
22581     default: gcc_unreachable ();
22582     }
22583 }
22584
22585 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22586 static enum arm_cond_code
22587 get_arm_condition_code (rtx comparison)
22588 {
22589   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22590   gcc_assert (code != ARM_NV);
22591   return code;
22592 }
22593
22594 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22595    instructions.  */
22596 void
22597 thumb2_final_prescan_insn (rtx_insn *insn)
22598 {
22599   rtx_insn *first_insn = insn;
22600   rtx body = PATTERN (insn);
22601   rtx predicate;
22602   enum arm_cond_code code;
22603   int n;
22604   int mask;
22605   int max;
22606
22607   /* max_insns_skipped in the tune was already taken into account in the
22608      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22609      just emit the IT blocks as we can.  It does not make sense to split
22610      the IT blocks.  */
22611   max = MAX_INSN_PER_IT_BLOCK;
22612
22613   /* Remove the previous insn from the count of insns to be output.  */
22614   if (arm_condexec_count)
22615       arm_condexec_count--;
22616
22617   /* Nothing to do if we are already inside a conditional block.  */
22618   if (arm_condexec_count)
22619     return;
22620
22621   if (GET_CODE (body) != COND_EXEC)
22622     return;
22623
22624   /* Conditional jumps are implemented directly.  */
22625   if (JUMP_P (insn))
22626     return;
22627
22628   predicate = COND_EXEC_TEST (body);
22629   arm_current_cc = get_arm_condition_code (predicate);
22630
22631   n = get_attr_ce_count (insn);
22632   arm_condexec_count = 1;
22633   arm_condexec_mask = (1 << n) - 1;
22634   arm_condexec_masklen = n;
22635   /* See if subsequent instructions can be combined into the same block.  */
22636   for (;;)
22637     {
22638       insn = next_nonnote_insn (insn);
22639
22640       /* Jumping into the middle of an IT block is illegal, so a label or
22641          barrier terminates the block.  */
22642       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22643         break;
22644
22645       body = PATTERN (insn);
22646       /* USE and CLOBBER aren't really insns, so just skip them.  */
22647       if (GET_CODE (body) == USE
22648           || GET_CODE (body) == CLOBBER)
22649         continue;
22650
22651       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
22652       if (GET_CODE (body) != COND_EXEC)
22653         break;
22654       /* Maximum number of conditionally executed instructions in a block.  */
22655       n = get_attr_ce_count (insn);
22656       if (arm_condexec_masklen + n > max)
22657         break;
22658
22659       predicate = COND_EXEC_TEST (body);
22660       code = get_arm_condition_code (predicate);
22661       mask = (1 << n) - 1;
22662       if (arm_current_cc == code)
22663         arm_condexec_mask |= (mask << arm_condexec_masklen);
22664       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22665         break;
22666
22667       arm_condexec_count++;
22668       arm_condexec_masklen += n;
22669
22670       /* A jump must be the last instruction in a conditional block.  */
22671       if (JUMP_P (insn))
22672         break;
22673     }
22674   /* Restore recog_data (getting the attributes of other insns can
22675      destroy this array, but final.c assumes that it remains intact
22676      across this call).  */
22677   extract_constrain_insn_cached (first_insn);
22678 }
22679
22680 void
22681 arm_final_prescan_insn (rtx_insn *insn)
22682 {
22683   /* BODY will hold the body of INSN.  */
22684   rtx body = PATTERN (insn);
22685
22686   /* This will be 1 if trying to repeat the trick, and things need to be
22687      reversed if it appears to fail.  */
22688   int reverse = 0;
22689
22690   /* If we start with a return insn, we only succeed if we find another one.  */
22691   int seeking_return = 0;
22692   enum rtx_code return_code = UNKNOWN;
22693
22694   /* START_INSN will hold the insn from where we start looking.  This is the
22695      first insn after the following code_label if REVERSE is true.  */
22696   rtx_insn *start_insn = insn;
22697
22698   /* If in state 4, check if the target branch is reached, in order to
22699      change back to state 0.  */
22700   if (arm_ccfsm_state == 4)
22701     {
22702       if (insn == arm_target_insn)
22703         {
22704           arm_target_insn = NULL;
22705           arm_ccfsm_state = 0;
22706         }
22707       return;
22708     }
22709
22710   /* If in state 3, it is possible to repeat the trick, if this insn is an
22711      unconditional branch to a label, and immediately following this branch
22712      is the previous target label which is only used once, and the label this
22713      branch jumps to is not too far off.  */
22714   if (arm_ccfsm_state == 3)
22715     {
22716       if (simplejump_p (insn))
22717         {
22718           start_insn = next_nonnote_insn (start_insn);
22719           if (BARRIER_P (start_insn))
22720             {
22721               /* XXX Isn't this always a barrier?  */
22722               start_insn = next_nonnote_insn (start_insn);
22723             }
22724           if (LABEL_P (start_insn)
22725               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22726               && LABEL_NUSES (start_insn) == 1)
22727             reverse = TRUE;
22728           else
22729             return;
22730         }
22731       else if (ANY_RETURN_P (body))
22732         {
22733           start_insn = next_nonnote_insn (start_insn);
22734           if (BARRIER_P (start_insn))
22735             start_insn = next_nonnote_insn (start_insn);
22736           if (LABEL_P (start_insn)
22737               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22738               && LABEL_NUSES (start_insn) == 1)
22739             {
22740               reverse = TRUE;
22741               seeking_return = 1;
22742               return_code = GET_CODE (body);
22743             }
22744           else
22745             return;
22746         }
22747       else
22748         return;
22749     }
22750
22751   gcc_assert (!arm_ccfsm_state || reverse);
22752   if (!JUMP_P (insn))
22753     return;
22754
22755   /* This jump might be paralleled with a clobber of the condition codes
22756      the jump should always come first */
22757   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22758     body = XVECEXP (body, 0, 0);
22759
22760   if (reverse
22761       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22762           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22763     {
22764       int insns_skipped;
22765       int fail = FALSE, succeed = FALSE;
22766       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
22767       int then_not_else = TRUE;
22768       rtx_insn *this_insn = start_insn;
22769       rtx label = 0;
22770
22771       /* Register the insn jumped to.  */
22772       if (reverse)
22773         {
22774           if (!seeking_return)
22775             label = XEXP (SET_SRC (body), 0);
22776         }
22777       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22778         label = XEXP (XEXP (SET_SRC (body), 1), 0);
22779       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22780         {
22781           label = XEXP (XEXP (SET_SRC (body), 2), 0);
22782           then_not_else = FALSE;
22783         }
22784       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22785         {
22786           seeking_return = 1;
22787           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22788         }
22789       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22790         {
22791           seeking_return = 1;
22792           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22793           then_not_else = FALSE;
22794         }
22795       else
22796         gcc_unreachable ();
22797
22798       /* See how many insns this branch skips, and what kind of insns.  If all
22799          insns are okay, and the label or unconditional branch to the same
22800          label is not too far away, succeed.  */
22801       for (insns_skipped = 0;
22802            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22803         {
22804           rtx scanbody;
22805
22806           this_insn = next_nonnote_insn (this_insn);
22807           if (!this_insn)
22808             break;
22809
22810           switch (GET_CODE (this_insn))
22811             {
22812             case CODE_LABEL:
22813               /* Succeed if it is the target label, otherwise fail since
22814                  control falls in from somewhere else.  */
22815               if (this_insn == label)
22816                 {
22817                   arm_ccfsm_state = 1;
22818                   succeed = TRUE;
22819                 }
22820               else
22821                 fail = TRUE;
22822               break;
22823
22824             case BARRIER:
22825               /* Succeed if the following insn is the target label.
22826                  Otherwise fail.
22827                  If return insns are used then the last insn in a function
22828                  will be a barrier.  */
22829               this_insn = next_nonnote_insn (this_insn);
22830               if (this_insn && this_insn == label)
22831                 {
22832                   arm_ccfsm_state = 1;
22833                   succeed = TRUE;
22834                 }
22835               else
22836                 fail = TRUE;
22837               break;
22838
22839             case CALL_INSN:
22840               /* The AAPCS says that conditional calls should not be
22841                  used since they make interworking inefficient (the
22842                  linker can't transform BL<cond> into BLX).  That's
22843                  only a problem if the machine has BLX.  */
22844               if (arm_arch5)
22845                 {
22846                   fail = TRUE;
22847                   break;
22848                 }
22849
22850               /* Succeed if the following insn is the target label, or
22851                  if the following two insns are a barrier and the
22852                  target label.  */
22853               this_insn = next_nonnote_insn (this_insn);
22854               if (this_insn && BARRIER_P (this_insn))
22855                 this_insn = next_nonnote_insn (this_insn);
22856
22857               if (this_insn && this_insn == label
22858                   && insns_skipped < max_insns_skipped)
22859                 {
22860                   arm_ccfsm_state = 1;
22861                   succeed = TRUE;
22862                 }
22863               else
22864                 fail = TRUE;
22865               break;
22866
22867             case JUMP_INSN:
22868               /* If this is an unconditional branch to the same label, succeed.
22869                  If it is to another label, do nothing.  If it is conditional,
22870                  fail.  */
22871               /* XXX Probably, the tests for SET and the PC are
22872                  unnecessary.  */
22873
22874               scanbody = PATTERN (this_insn);
22875               if (GET_CODE (scanbody) == SET
22876                   && GET_CODE (SET_DEST (scanbody)) == PC)
22877                 {
22878                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22879                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22880                     {
22881                       arm_ccfsm_state = 2;
22882                       succeed = TRUE;
22883                     }
22884                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22885                     fail = TRUE;
22886                 }
22887               /* Fail if a conditional return is undesirable (e.g. on a
22888                  StrongARM), but still allow this if optimizing for size.  */
22889               else if (GET_CODE (scanbody) == return_code
22890                        && !use_return_insn (TRUE, NULL)
22891                        && !optimize_size)
22892                 fail = TRUE;
22893               else if (GET_CODE (scanbody) == return_code)
22894                 {
22895                   arm_ccfsm_state = 2;
22896                   succeed = TRUE;
22897                 }
22898               else if (GET_CODE (scanbody) == PARALLEL)
22899                 {
22900                   switch (get_attr_conds (this_insn))
22901                     {
22902                     case CONDS_NOCOND:
22903                       break;
22904                     default:
22905                       fail = TRUE;
22906                       break;
22907                     }
22908                 }
22909               else
22910                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
22911
22912               break;
22913
22914             case INSN:
22915               /* Instructions using or affecting the condition codes make it
22916                  fail.  */
22917               scanbody = PATTERN (this_insn);
22918               if (!(GET_CODE (scanbody) == SET
22919                     || GET_CODE (scanbody) == PARALLEL)
22920                   || get_attr_conds (this_insn) != CONDS_NOCOND)
22921                 fail = TRUE;
22922               break;
22923
22924             default:
22925               break;
22926             }
22927         }
22928       if (succeed)
22929         {
22930           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22931             arm_target_label = CODE_LABEL_NUMBER (label);
22932           else
22933             {
22934               gcc_assert (seeking_return || arm_ccfsm_state == 2);
22935
22936               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22937                 {
22938                   this_insn = next_nonnote_insn (this_insn);
22939                   gcc_assert (!this_insn
22940                               || (!BARRIER_P (this_insn)
22941                                   && !LABEL_P (this_insn)));
22942                 }
22943               if (!this_insn)
22944                 {
22945                   /* Oh, dear! we ran off the end.. give up.  */
22946                   extract_constrain_insn_cached (insn);
22947                   arm_ccfsm_state = 0;
22948                   arm_target_insn = NULL;
22949                   return;
22950                 }
22951               arm_target_insn = this_insn;
22952             }
22953
22954           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22955              what it was.  */
22956           if (!reverse)
22957             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22958
22959           if (reverse || then_not_else)
22960             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22961         }
22962
22963       /* Restore recog_data (getting the attributes of other insns can
22964          destroy this array, but final.c assumes that it remains intact
22965          across this call.  */
22966       extract_constrain_insn_cached (insn);
22967     }
22968 }
22969
22970 /* Output IT instructions.  */
22971 void
22972 thumb2_asm_output_opcode (FILE * stream)
22973 {
22974   char buff[5];
22975   int n;
22976
22977   if (arm_condexec_mask)
22978     {
22979       for (n = 0; n < arm_condexec_masklen; n++)
22980         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22981       buff[n] = 0;
22982       asm_fprintf(stream, "i%s\t%s\n\t", buff,
22983                   arm_condition_codes[arm_current_cc]);
22984       arm_condexec_mask = 0;
22985     }
22986 }
22987
22988 /* Returns true if REGNO is a valid register
22989    for holding a quantity of type MODE.  */
22990 int
22991 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22992 {
22993   if (GET_MODE_CLASS (mode) == MODE_CC)
22994     return (regno == CC_REGNUM
22995             || (TARGET_HARD_FLOAT && TARGET_VFP
22996                 && regno == VFPCC_REGNUM));
22997
22998   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
22999     return false;
23000
23001   if (TARGET_THUMB1)
23002     /* For the Thumb we only allow values bigger than SImode in
23003        registers 0 - 6, so that there is always a second low
23004        register available to hold the upper part of the value.
23005        We probably we ought to ensure that the register is the
23006        start of an even numbered register pair.  */
23007     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23008
23009   if (TARGET_HARD_FLOAT && TARGET_VFP
23010       && IS_VFP_REGNUM (regno))
23011     {
23012       if (mode == SFmode || mode == SImode)
23013         return VFP_REGNO_OK_FOR_SINGLE (regno);
23014
23015       if (mode == DFmode)
23016         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23017
23018       /* VFP registers can hold HFmode values, but there is no point in
23019          putting them there unless we have hardware conversion insns. */
23020       if (mode == HFmode)
23021         return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23022
23023       if (TARGET_NEON)
23024         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23025                || (VALID_NEON_QREG_MODE (mode)
23026                    && NEON_REGNO_OK_FOR_QUAD (regno))
23027                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23028                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23029                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23030                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23031                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23032
23033       return FALSE;
23034     }
23035
23036   if (TARGET_REALLY_IWMMXT)
23037     {
23038       if (IS_IWMMXT_GR_REGNUM (regno))
23039         return mode == SImode;
23040
23041       if (IS_IWMMXT_REGNUM (regno))
23042         return VALID_IWMMXT_REG_MODE (mode);
23043     }
23044
23045   /* We allow almost any value to be stored in the general registers.
23046      Restrict doubleword quantities to even register pairs in ARM state
23047      so that we can use ldrd.  Do not allow very large Neon structure
23048      opaque modes in general registers; they would use too many.  */
23049   if (regno <= LAST_ARM_REGNUM)
23050     {
23051       if (ARM_NUM_REGS (mode) > 4)
23052           return FALSE;
23053
23054       if (TARGET_THUMB2)
23055         return TRUE;
23056
23057       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23058     }
23059
23060   if (regno == FRAME_POINTER_REGNUM
23061       || regno == ARG_POINTER_REGNUM)
23062     /* We only allow integers in the fake hard registers.  */
23063     return GET_MODE_CLASS (mode) == MODE_INT;
23064
23065   return FALSE;
23066 }
23067
23068 /* Implement MODES_TIEABLE_P.  */
23069
23070 bool
23071 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23072 {
23073   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23074     return true;
23075
23076   /* We specifically want to allow elements of "structure" modes to
23077      be tieable to the structure.  This more general condition allows
23078      other rarer situations too.  */
23079   if (TARGET_NEON
23080       && (VALID_NEON_DREG_MODE (mode1)
23081           || VALID_NEON_QREG_MODE (mode1)
23082           || VALID_NEON_STRUCT_MODE (mode1))
23083       && (VALID_NEON_DREG_MODE (mode2)
23084           || VALID_NEON_QREG_MODE (mode2)
23085           || VALID_NEON_STRUCT_MODE (mode2)))
23086     return true;
23087
23088   return false;
23089 }
23090
23091 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23092    not used in arm mode.  */
23093
23094 enum reg_class
23095 arm_regno_class (int regno)
23096 {
23097   if (regno == PC_REGNUM)
23098     return NO_REGS;
23099
23100   if (TARGET_THUMB1)
23101     {
23102       if (regno == STACK_POINTER_REGNUM)
23103         return STACK_REG;
23104       if (regno == CC_REGNUM)
23105         return CC_REG;
23106       if (regno < 8)
23107         return LO_REGS;
23108       return HI_REGS;
23109     }
23110
23111   if (TARGET_THUMB2 && regno < 8)
23112     return LO_REGS;
23113
23114   if (   regno <= LAST_ARM_REGNUM
23115       || regno == FRAME_POINTER_REGNUM
23116       || regno == ARG_POINTER_REGNUM)
23117     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23118
23119   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23120     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23121
23122   if (IS_VFP_REGNUM (regno))
23123     {
23124       if (regno <= D7_VFP_REGNUM)
23125         return VFP_D0_D7_REGS;
23126       else if (regno <= LAST_LO_VFP_REGNUM)
23127         return VFP_LO_REGS;
23128       else
23129         return VFP_HI_REGS;
23130     }
23131
23132   if (IS_IWMMXT_REGNUM (regno))
23133     return IWMMXT_REGS;
23134
23135   if (IS_IWMMXT_GR_REGNUM (regno))
23136     return IWMMXT_GR_REGS;
23137
23138   return NO_REGS;
23139 }
23140
23141 /* Handle a special case when computing the offset
23142    of an argument from the frame pointer.  */
23143 int
23144 arm_debugger_arg_offset (int value, rtx addr)
23145 {
23146   rtx_insn *insn;
23147
23148   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23149   if (value != 0)
23150     return 0;
23151
23152   /* We can only cope with the case where the address is held in a register.  */
23153   if (!REG_P (addr))
23154     return 0;
23155
23156   /* If we are using the frame pointer to point at the argument, then
23157      an offset of 0 is correct.  */
23158   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23159     return 0;
23160
23161   /* If we are using the stack pointer to point at the
23162      argument, then an offset of 0 is correct.  */
23163   /* ??? Check this is consistent with thumb2 frame layout.  */
23164   if ((TARGET_THUMB || !frame_pointer_needed)
23165       && REGNO (addr) == SP_REGNUM)
23166     return 0;
23167
23168   /* Oh dear.  The argument is pointed to by a register rather
23169      than being held in a register, or being stored at a known
23170      offset from the frame pointer.  Since GDB only understands
23171      those two kinds of argument we must translate the address
23172      held in the register into an offset from the frame pointer.
23173      We do this by searching through the insns for the function
23174      looking to see where this register gets its value.  If the
23175      register is initialized from the frame pointer plus an offset
23176      then we are in luck and we can continue, otherwise we give up.
23177
23178      This code is exercised by producing debugging information
23179      for a function with arguments like this:
23180
23181            double func (double a, double b, int c, double d) {return d;}
23182
23183      Without this code the stab for parameter 'd' will be set to
23184      an offset of 0 from the frame pointer, rather than 8.  */
23185
23186   /* The if() statement says:
23187
23188      If the insn is a normal instruction
23189      and if the insn is setting the value in a register
23190      and if the register being set is the register holding the address of the argument
23191      and if the address is computing by an addition
23192      that involves adding to a register
23193      which is the frame pointer
23194      a constant integer
23195
23196      then...  */
23197
23198   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23199     {
23200       if (   NONJUMP_INSN_P (insn)
23201           && GET_CODE (PATTERN (insn)) == SET
23202           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23203           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23204           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23205           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23206           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23207              )
23208         {
23209           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23210
23211           break;
23212         }
23213     }
23214
23215   if (value == 0)
23216     {
23217       debug_rtx (addr);
23218       warning (0, "unable to compute real location of stacked parameter");
23219       value = 8; /* XXX magic hack */
23220     }
23221
23222   return value;
23223 }
23224 \f
23225 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
23226
23227 static const char *
23228 arm_invalid_parameter_type (const_tree t)
23229 {
23230   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23231     return N_("function parameters cannot have __fp16 type");
23232   return NULL;
23233 }
23234
23235 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
23236
23237 static const char *
23238 arm_invalid_return_type (const_tree t)
23239 {
23240   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23241     return N_("functions cannot return __fp16 type");
23242   return NULL;
23243 }
23244
23245 /* Implement TARGET_PROMOTED_TYPE.  */
23246
23247 static tree
23248 arm_promoted_type (const_tree t)
23249 {
23250   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23251     return float_type_node;
23252   return NULL_TREE;
23253 }
23254
23255 /* Implement TARGET_CONVERT_TO_TYPE.
23256    Specifically, this hook implements the peculiarity of the ARM
23257    half-precision floating-point C semantics that requires conversions between
23258    __fp16 to or from double to do an intermediate conversion to float.  */
23259
23260 static tree
23261 arm_convert_to_type (tree type, tree expr)
23262 {
23263   tree fromtype = TREE_TYPE (expr);
23264   if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23265     return NULL_TREE;
23266   if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23267       || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23268     return convert (type, convert (float_type_node, expr));
23269   return NULL_TREE;
23270 }
23271
23272 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23273    This simply adds HFmode as a supported mode; even though we don't
23274    implement arithmetic on this type directly, it's supported by
23275    optabs conversions, much the way the double-word arithmetic is
23276    special-cased in the default hook.  */
23277
23278 static bool
23279 arm_scalar_mode_supported_p (machine_mode mode)
23280 {
23281   if (mode == HFmode)
23282     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23283   else if (ALL_FIXED_POINT_MODE_P (mode))
23284     return true;
23285   else
23286     return default_scalar_mode_supported_p (mode);
23287 }
23288
23289 /* Emit code to reinterpret one Neon type as another, without altering bits.  */
23290 void
23291 neon_reinterpret (rtx dest, rtx src)
23292 {
23293   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23294 }
23295
23296 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23297    not to early-clobber SRC registers in the process.
23298
23299    We assume that the operands described by SRC and DEST represent a
23300    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23301    number of components into which the copy has been decomposed.  */
23302 void
23303 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23304 {
23305   unsigned int i;
23306
23307   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23308       || REGNO (operands[0]) < REGNO (operands[1]))
23309     {
23310       for (i = 0; i < count; i++)
23311         {
23312           operands[2 * i] = dest[i];
23313           operands[2 * i + 1] = src[i];
23314         }
23315     }
23316   else
23317     {
23318       for (i = 0; i < count; i++)
23319         {
23320           operands[2 * i] = dest[count - i - 1];
23321           operands[2 * i + 1] = src[count - i - 1];
23322         }
23323     }
23324 }
23325
23326 /* Split operands into moves from op[1] + op[2] into op[0].  */
23327
23328 void
23329 neon_split_vcombine (rtx operands[3])
23330 {
23331   unsigned int dest = REGNO (operands[0]);
23332   unsigned int src1 = REGNO (operands[1]);
23333   unsigned int src2 = REGNO (operands[2]);
23334   machine_mode halfmode = GET_MODE (operands[1]);
23335   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23336   rtx destlo, desthi;
23337
23338   if (src1 == dest && src2 == dest + halfregs)
23339     {
23340       /* No-op move.  Can't split to nothing; emit something.  */
23341       emit_note (NOTE_INSN_DELETED);
23342       return;
23343     }
23344
23345   /* Preserve register attributes for variable tracking.  */
23346   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23347   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23348                                GET_MODE_SIZE (halfmode));
23349
23350   /* Special case of reversed high/low parts.  Use VSWP.  */
23351   if (src2 == dest && src1 == dest + halfregs)
23352     {
23353       rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
23354       rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
23355       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23356       return;
23357     }
23358
23359   if (!reg_overlap_mentioned_p (operands[2], destlo))
23360     {
23361       /* Try to avoid unnecessary moves if part of the result
23362          is in the right place already.  */
23363       if (src1 != dest)
23364         emit_move_insn (destlo, operands[1]);
23365       if (src2 != dest + halfregs)
23366         emit_move_insn (desthi, operands[2]);
23367     }
23368   else
23369     {
23370       if (src2 != dest + halfregs)
23371         emit_move_insn (desthi, operands[2]);
23372       if (src1 != dest)
23373         emit_move_insn (destlo, operands[1]);
23374     }
23375 }
23376 \f
23377 /* Return the number (counting from 0) of
23378    the least significant set bit in MASK.  */
23379
23380 inline static int
23381 number_of_first_bit_set (unsigned mask)
23382 {
23383   return ctz_hwi (mask);
23384 }
23385
23386 /* Like emit_multi_reg_push, but allowing for a different set of
23387    registers to be described as saved.  MASK is the set of registers
23388    to be saved; REAL_REGS is the set of registers to be described as
23389    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23390
23391 static rtx_insn *
23392 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23393 {
23394   unsigned long regno;
23395   rtx par[10], tmp, reg;
23396   rtx_insn *insn;
23397   int i, j;
23398
23399   /* Build the parallel of the registers actually being stored.  */
23400   for (i = 0; mask; ++i, mask &= mask - 1)
23401     {
23402       regno = ctz_hwi (mask);
23403       reg = gen_rtx_REG (SImode, regno);
23404
23405       if (i == 0)
23406         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23407       else
23408         tmp = gen_rtx_USE (VOIDmode, reg);
23409
23410       par[i] = tmp;
23411     }
23412
23413   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23414   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23415   tmp = gen_frame_mem (BLKmode, tmp);
23416   tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23417   par[0] = tmp;
23418
23419   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23420   insn = emit_insn (tmp);
23421
23422   /* Always build the stack adjustment note for unwind info.  */
23423   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23424   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23425   par[0] = tmp;
23426
23427   /* Build the parallel of the registers recorded as saved for unwind.  */
23428   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23429     {
23430       regno = ctz_hwi (real_regs);
23431       reg = gen_rtx_REG (SImode, regno);
23432
23433       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23434       tmp = gen_frame_mem (SImode, tmp);
23435       tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23436       RTX_FRAME_RELATED_P (tmp) = 1;
23437       par[j + 1] = tmp;
23438     }
23439
23440   if (j == 0)
23441     tmp = par[0];
23442   else
23443     {
23444       RTX_FRAME_RELATED_P (par[0]) = 1;
23445       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23446     }
23447
23448   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23449
23450   return insn;
23451 }
23452
23453 /* Emit code to push or pop registers to or from the stack.  F is the
23454    assembly file.  MASK is the registers to pop.  */
23455 static void
23456 thumb_pop (FILE *f, unsigned long mask)
23457 {
23458   int regno;
23459   int lo_mask = mask & 0xFF;
23460   int pushed_words = 0;
23461
23462   gcc_assert (mask);
23463
23464   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23465     {
23466       /* Special case.  Do not generate a POP PC statement here, do it in
23467          thumb_exit() */
23468       thumb_exit (f, -1);
23469       return;
23470     }
23471
23472   fprintf (f, "\tpop\t{");
23473
23474   /* Look at the low registers first.  */
23475   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23476     {
23477       if (lo_mask & 1)
23478         {
23479           asm_fprintf (f, "%r", regno);
23480
23481           if ((lo_mask & ~1) != 0)
23482             fprintf (f, ", ");
23483
23484           pushed_words++;
23485         }
23486     }
23487
23488   if (mask & (1 << PC_REGNUM))
23489     {
23490       /* Catch popping the PC.  */
23491       if (TARGET_INTERWORK || TARGET_BACKTRACE
23492           || crtl->calls_eh_return)
23493         {
23494           /* The PC is never poped directly, instead
23495              it is popped into r3 and then BX is used.  */
23496           fprintf (f, "}\n");
23497
23498           thumb_exit (f, -1);
23499
23500           return;
23501         }
23502       else
23503         {
23504           if (mask & 0xFF)
23505             fprintf (f, ", ");
23506
23507           asm_fprintf (f, "%r", PC_REGNUM);
23508         }
23509     }
23510
23511   fprintf (f, "}\n");
23512 }
23513
23514 /* Generate code to return from a thumb function.
23515    If 'reg_containing_return_addr' is -1, then the return address is
23516    actually on the stack, at the stack pointer.  */
23517 static void
23518 thumb_exit (FILE *f, int reg_containing_return_addr)
23519 {
23520   unsigned regs_available_for_popping;
23521   unsigned regs_to_pop;
23522   int pops_needed;
23523   unsigned available;
23524   unsigned required;
23525   machine_mode mode;
23526   int size;
23527   int restore_a4 = FALSE;
23528
23529   /* Compute the registers we need to pop.  */
23530   regs_to_pop = 0;
23531   pops_needed = 0;
23532
23533   if (reg_containing_return_addr == -1)
23534     {
23535       regs_to_pop |= 1 << LR_REGNUM;
23536       ++pops_needed;
23537     }
23538
23539   if (TARGET_BACKTRACE)
23540     {
23541       /* Restore the (ARM) frame pointer and stack pointer.  */
23542       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23543       pops_needed += 2;
23544     }
23545
23546   /* If there is nothing to pop then just emit the BX instruction and
23547      return.  */
23548   if (pops_needed == 0)
23549     {
23550       if (crtl->calls_eh_return)
23551         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23552
23553       asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23554       return;
23555     }
23556   /* Otherwise if we are not supporting interworking and we have not created
23557      a backtrace structure and the function was not entered in ARM mode then
23558      just pop the return address straight into the PC.  */
23559   else if (!TARGET_INTERWORK
23560            && !TARGET_BACKTRACE
23561            && !is_called_in_ARM_mode (current_function_decl)
23562            && !crtl->calls_eh_return)
23563     {
23564       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23565       return;
23566     }
23567
23568   /* Find out how many of the (return) argument registers we can corrupt.  */
23569   regs_available_for_popping = 0;
23570
23571   /* If returning via __builtin_eh_return, the bottom three registers
23572      all contain information needed for the return.  */
23573   if (crtl->calls_eh_return)
23574     size = 12;
23575   else
23576     {
23577       /* If we can deduce the registers used from the function's
23578          return value.  This is more reliable that examining
23579          df_regs_ever_live_p () because that will be set if the register is
23580          ever used in the function, not just if the register is used
23581          to hold a return value.  */
23582
23583       if (crtl->return_rtx != 0)
23584         mode = GET_MODE (crtl->return_rtx);
23585       else
23586         mode = DECL_MODE (DECL_RESULT (current_function_decl));
23587
23588       size = GET_MODE_SIZE (mode);
23589
23590       if (size == 0)
23591         {
23592           /* In a void function we can use any argument register.
23593              In a function that returns a structure on the stack
23594              we can use the second and third argument registers.  */
23595           if (mode == VOIDmode)
23596             regs_available_for_popping =
23597               (1 << ARG_REGISTER (1))
23598               | (1 << ARG_REGISTER (2))
23599               | (1 << ARG_REGISTER (3));
23600           else
23601             regs_available_for_popping =
23602               (1 << ARG_REGISTER (2))
23603               | (1 << ARG_REGISTER (3));
23604         }
23605       else if (size <= 4)
23606         regs_available_for_popping =
23607           (1 << ARG_REGISTER (2))
23608           | (1 << ARG_REGISTER (3));
23609       else if (size <= 8)
23610         regs_available_for_popping =
23611           (1 << ARG_REGISTER (3));
23612     }
23613
23614   /* Match registers to be popped with registers into which we pop them.  */
23615   for (available = regs_available_for_popping,
23616        required  = regs_to_pop;
23617        required != 0 && available != 0;
23618        available &= ~(available & - available),
23619        required  &= ~(required  & - required))
23620     -- pops_needed;
23621
23622   /* If we have any popping registers left over, remove them.  */
23623   if (available > 0)
23624     regs_available_for_popping &= ~available;
23625
23626   /* Otherwise if we need another popping register we can use
23627      the fourth argument register.  */
23628   else if (pops_needed)
23629     {
23630       /* If we have not found any free argument registers and
23631          reg a4 contains the return address, we must move it.  */
23632       if (regs_available_for_popping == 0
23633           && reg_containing_return_addr == LAST_ARG_REGNUM)
23634         {
23635           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23636           reg_containing_return_addr = LR_REGNUM;
23637         }
23638       else if (size > 12)
23639         {
23640           /* Register a4 is being used to hold part of the return value,
23641              but we have dire need of a free, low register.  */
23642           restore_a4 = TRUE;
23643
23644           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23645         }
23646
23647       if (reg_containing_return_addr != LAST_ARG_REGNUM)
23648         {
23649           /* The fourth argument register is available.  */
23650           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23651
23652           --pops_needed;
23653         }
23654     }
23655
23656   /* Pop as many registers as we can.  */
23657   thumb_pop (f, regs_available_for_popping);
23658
23659   /* Process the registers we popped.  */
23660   if (reg_containing_return_addr == -1)
23661     {
23662       /* The return address was popped into the lowest numbered register.  */
23663       regs_to_pop &= ~(1 << LR_REGNUM);
23664
23665       reg_containing_return_addr =
23666         number_of_first_bit_set (regs_available_for_popping);
23667
23668       /* Remove this register for the mask of available registers, so that
23669          the return address will not be corrupted by further pops.  */
23670       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23671     }
23672
23673   /* If we popped other registers then handle them here.  */
23674   if (regs_available_for_popping)
23675     {
23676       int frame_pointer;
23677
23678       /* Work out which register currently contains the frame pointer.  */
23679       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23680
23681       /* Move it into the correct place.  */
23682       asm_fprintf (f, "\tmov\t%r, %r\n",
23683                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23684
23685       /* (Temporarily) remove it from the mask of popped registers.  */
23686       regs_available_for_popping &= ~(1 << frame_pointer);
23687       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23688
23689       if (regs_available_for_popping)
23690         {
23691           int stack_pointer;
23692
23693           /* We popped the stack pointer as well,
23694              find the register that contains it.  */
23695           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23696
23697           /* Move it into the stack register.  */
23698           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23699
23700           /* At this point we have popped all necessary registers, so
23701              do not worry about restoring regs_available_for_popping
23702              to its correct value:
23703
23704              assert (pops_needed == 0)
23705              assert (regs_available_for_popping == (1 << frame_pointer))
23706              assert (regs_to_pop == (1 << STACK_POINTER))  */
23707         }
23708       else
23709         {
23710           /* Since we have just move the popped value into the frame
23711              pointer, the popping register is available for reuse, and
23712              we know that we still have the stack pointer left to pop.  */
23713           regs_available_for_popping |= (1 << frame_pointer);
23714         }
23715     }
23716
23717   /* If we still have registers left on the stack, but we no longer have
23718      any registers into which we can pop them, then we must move the return
23719      address into the link register and make available the register that
23720      contained it.  */
23721   if (regs_available_for_popping == 0 && pops_needed > 0)
23722     {
23723       regs_available_for_popping |= 1 << reg_containing_return_addr;
23724
23725       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23726                    reg_containing_return_addr);
23727
23728       reg_containing_return_addr = LR_REGNUM;
23729     }
23730
23731   /* If we have registers left on the stack then pop some more.
23732      We know that at most we will want to pop FP and SP.  */
23733   if (pops_needed > 0)
23734     {
23735       int  popped_into;
23736       int  move_to;
23737
23738       thumb_pop (f, regs_available_for_popping);
23739
23740       /* We have popped either FP or SP.
23741          Move whichever one it is into the correct register.  */
23742       popped_into = number_of_first_bit_set (regs_available_for_popping);
23743       move_to     = number_of_first_bit_set (regs_to_pop);
23744
23745       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23746
23747       regs_to_pop &= ~(1 << move_to);
23748
23749       --pops_needed;
23750     }
23751
23752   /* If we still have not popped everything then we must have only
23753      had one register available to us and we are now popping the SP.  */
23754   if (pops_needed > 0)
23755     {
23756       int  popped_into;
23757
23758       thumb_pop (f, regs_available_for_popping);
23759
23760       popped_into = number_of_first_bit_set (regs_available_for_popping);
23761
23762       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23763       /*
23764         assert (regs_to_pop == (1 << STACK_POINTER))
23765         assert (pops_needed == 1)
23766       */
23767     }
23768
23769   /* If necessary restore the a4 register.  */
23770   if (restore_a4)
23771     {
23772       if (reg_containing_return_addr != LR_REGNUM)
23773         {
23774           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23775           reg_containing_return_addr = LR_REGNUM;
23776         }
23777
23778       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23779     }
23780
23781   if (crtl->calls_eh_return)
23782     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23783
23784   /* Return to caller.  */
23785   asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23786 }
23787 \f
23788 /* Scan INSN just before assembler is output for it.
23789    For Thumb-1, we track the status of the condition codes; this
23790    information is used in the cbranchsi4_insn pattern.  */
23791 void
23792 thumb1_final_prescan_insn (rtx_insn *insn)
23793 {
23794   if (flag_print_asm_name)
23795     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23796                  INSN_ADDRESSES (INSN_UID (insn)));
23797   /* Don't overwrite the previous setter when we get to a cbranch.  */
23798   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23799     {
23800       enum attr_conds conds;
23801
23802       if (cfun->machine->thumb1_cc_insn)
23803         {
23804           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23805               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23806             CC_STATUS_INIT;
23807         }
23808       conds = get_attr_conds (insn);
23809       if (conds == CONDS_SET)
23810         {
23811           rtx set = single_set (insn);
23812           cfun->machine->thumb1_cc_insn = insn;
23813           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23814           cfun->machine->thumb1_cc_op1 = const0_rtx;
23815           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23816           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23817             {
23818               rtx src1 = XEXP (SET_SRC (set), 1);
23819               if (src1 == const0_rtx)
23820                 cfun->machine->thumb1_cc_mode = CCmode;
23821             }
23822           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23823             {
23824               /* Record the src register operand instead of dest because
23825                  cprop_hardreg pass propagates src.  */
23826               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23827             }
23828         }
23829       else if (conds != CONDS_NOCOND)
23830         cfun->machine->thumb1_cc_insn = NULL_RTX;
23831     }
23832
23833     /* Check if unexpected far jump is used.  */
23834     if (cfun->machine->lr_save_eliminated
23835         && get_attr_far_jump (insn) == FAR_JUMP_YES)
23836       internal_error("Unexpected thumb1 far jump");
23837 }
23838
23839 int
23840 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23841 {
23842   unsigned HOST_WIDE_INT mask = 0xff;
23843   int i;
23844
23845   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23846   if (val == 0) /* XXX */
23847     return 0;
23848
23849   for (i = 0; i < 25; i++)
23850     if ((val & (mask << i)) == val)
23851       return 1;
23852
23853   return 0;
23854 }
23855
23856 /* Returns nonzero if the current function contains,
23857    or might contain a far jump.  */
23858 static int
23859 thumb_far_jump_used_p (void)
23860 {
23861   rtx_insn *insn;
23862   bool far_jump = false;
23863   unsigned int func_size = 0;
23864
23865   /* This test is only important for leaf functions.  */
23866   /* assert (!leaf_function_p ()); */
23867
23868   /* If we have already decided that far jumps may be used,
23869      do not bother checking again, and always return true even if
23870      it turns out that they are not being used.  Once we have made
23871      the decision that far jumps are present (and that hence the link
23872      register will be pushed onto the stack) we cannot go back on it.  */
23873   if (cfun->machine->far_jump_used)
23874     return 1;
23875
23876   /* If this function is not being called from the prologue/epilogue
23877      generation code then it must be being called from the
23878      INITIAL_ELIMINATION_OFFSET macro.  */
23879   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23880     {
23881       /* In this case we know that we are being asked about the elimination
23882          of the arg pointer register.  If that register is not being used,
23883          then there are no arguments on the stack, and we do not have to
23884          worry that a far jump might force the prologue to push the link
23885          register, changing the stack offsets.  In this case we can just
23886          return false, since the presence of far jumps in the function will
23887          not affect stack offsets.
23888
23889          If the arg pointer is live (or if it was live, but has now been
23890          eliminated and so set to dead) then we do have to test to see if
23891          the function might contain a far jump.  This test can lead to some
23892          false negatives, since before reload is completed, then length of
23893          branch instructions is not known, so gcc defaults to returning their
23894          longest length, which in turn sets the far jump attribute to true.
23895
23896          A false negative will not result in bad code being generated, but it
23897          will result in a needless push and pop of the link register.  We
23898          hope that this does not occur too often.
23899
23900          If we need doubleword stack alignment this could affect the other
23901          elimination offsets so we can't risk getting it wrong.  */
23902       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23903         cfun->machine->arg_pointer_live = 1;
23904       else if (!cfun->machine->arg_pointer_live)
23905         return 0;
23906     }
23907
23908   /* We should not change far_jump_used during or after reload, as there is
23909      no chance to change stack frame layout.  */
23910   if (reload_in_progress || reload_completed)
23911     return 0;
23912
23913   /* Check to see if the function contains a branch
23914      insn with the far jump attribute set.  */
23915   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23916     {
23917       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23918         {
23919           far_jump = true;
23920         }
23921       func_size += get_attr_length (insn);
23922     }
23923
23924   /* Attribute far_jump will always be true for thumb1 before
23925      shorten_branch pass.  So checking far_jump attribute before
23926      shorten_branch isn't much useful.
23927
23928      Following heuristic tries to estimate more accurately if a far jump
23929      may finally be used.  The heuristic is very conservative as there is
23930      no chance to roll-back the decision of not to use far jump.
23931
23932      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
23933      2-byte insn is associated with a 4 byte constant pool.  Using
23934      function size 2048/3 as the threshold is conservative enough.  */
23935   if (far_jump)
23936     {
23937       if ((func_size * 3) >= 2048)
23938         {
23939           /* Record the fact that we have decided that
23940              the function does use far jumps.  */
23941           cfun->machine->far_jump_used = 1;
23942           return 1;
23943         }
23944     }
23945
23946   return 0;
23947 }
23948
23949 /* Return nonzero if FUNC must be entered in ARM mode.  */
23950 int
23951 is_called_in_ARM_mode (tree func)
23952 {
23953   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23954
23955   /* Ignore the problem about functions whose address is taken.  */
23956   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23957     return TRUE;
23958
23959 #ifdef ARM_PE
23960   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23961 #else
23962   return FALSE;
23963 #endif
23964 }
23965
23966 /* Given the stack offsets and register mask in OFFSETS, decide how
23967    many additional registers to push instead of subtracting a constant
23968    from SP.  For epilogues the principle is the same except we use pop.
23969    FOR_PROLOGUE indicates which we're generating.  */
23970 static int
23971 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23972 {
23973   HOST_WIDE_INT amount;
23974   unsigned long live_regs_mask = offsets->saved_regs_mask;
23975   /* Extract a mask of the ones we can give to the Thumb's push/pop
23976      instruction.  */
23977   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23978   /* Then count how many other high registers will need to be pushed.  */
23979   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23980   int n_free, reg_base, size;
23981
23982   if (!for_prologue && frame_pointer_needed)
23983     amount = offsets->locals_base - offsets->saved_regs;
23984   else
23985     amount = offsets->outgoing_args - offsets->saved_regs;
23986
23987   /* If the stack frame size is 512 exactly, we can save one load
23988      instruction, which should make this a win even when optimizing
23989      for speed.  */
23990   if (!optimize_size && amount != 512)
23991     return 0;
23992
23993   /* Can't do this if there are high registers to push.  */
23994   if (high_regs_pushed != 0)
23995     return 0;
23996
23997   /* Shouldn't do it in the prologue if no registers would normally
23998      be pushed at all.  In the epilogue, also allow it if we'll have
23999      a pop insn for the PC.  */
24000   if  (l_mask == 0
24001        && (for_prologue
24002            || TARGET_BACKTRACE
24003            || (live_regs_mask & 1 << LR_REGNUM) == 0
24004            || TARGET_INTERWORK
24005            || crtl->args.pretend_args_size != 0))
24006     return 0;
24007
24008   /* Don't do this if thumb_expand_prologue wants to emit instructions
24009      between the push and the stack frame allocation.  */
24010   if (for_prologue
24011       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24012           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24013     return 0;
24014
24015   reg_base = 0;
24016   n_free = 0;
24017   if (!for_prologue)
24018     {
24019       size = arm_size_return_regs ();
24020       reg_base = ARM_NUM_INTS (size);
24021       live_regs_mask >>= reg_base;
24022     }
24023
24024   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24025          && (for_prologue || call_used_regs[reg_base + n_free]))
24026     {
24027       live_regs_mask >>= 1;
24028       n_free++;
24029     }
24030
24031   if (n_free == 0)
24032     return 0;
24033   gcc_assert (amount / 4 * 4 == amount);
24034
24035   if (amount >= 512 && (amount - n_free * 4) < 512)
24036     return (amount - 508) / 4;
24037   if (amount <= n_free * 4)
24038     return amount / 4;
24039   return 0;
24040 }
24041
24042 /* The bits which aren't usefully expanded as rtl.  */
24043 const char *
24044 thumb1_unexpanded_epilogue (void)
24045 {
24046   arm_stack_offsets *offsets;
24047   int regno;
24048   unsigned long live_regs_mask = 0;
24049   int high_regs_pushed = 0;
24050   int extra_pop;
24051   int had_to_push_lr;
24052   int size;
24053
24054   if (cfun->machine->return_used_this_function != 0)
24055     return "";
24056
24057   if (IS_NAKED (arm_current_func_type ()))
24058     return "";
24059
24060   offsets = arm_get_frame_offsets ();
24061   live_regs_mask = offsets->saved_regs_mask;
24062   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24063
24064   /* If we can deduce the registers used from the function's return value.
24065      This is more reliable that examining df_regs_ever_live_p () because that
24066      will be set if the register is ever used in the function, not just if
24067      the register is used to hold a return value.  */
24068   size = arm_size_return_regs ();
24069
24070   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24071   if (extra_pop > 0)
24072     {
24073       unsigned long extra_mask = (1 << extra_pop) - 1;
24074       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24075     }
24076
24077   /* The prolog may have pushed some high registers to use as
24078      work registers.  e.g. the testsuite file:
24079      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24080      compiles to produce:
24081         push    {r4, r5, r6, r7, lr}
24082         mov     r7, r9
24083         mov     r6, r8
24084         push    {r6, r7}
24085      as part of the prolog.  We have to undo that pushing here.  */
24086
24087   if (high_regs_pushed)
24088     {
24089       unsigned long mask = live_regs_mask & 0xff;
24090       int next_hi_reg;
24091
24092       /* The available low registers depend on the size of the value we are
24093          returning.  */
24094       if (size <= 12)
24095         mask |=  1 << 3;
24096       if (size <= 8)
24097         mask |= 1 << 2;
24098
24099       if (mask == 0)
24100         /* Oh dear!  We have no low registers into which we can pop
24101            high registers!  */
24102         internal_error
24103           ("no low registers available for popping high registers");
24104
24105       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24106         if (live_regs_mask & (1 << next_hi_reg))
24107           break;
24108
24109       while (high_regs_pushed)
24110         {
24111           /* Find lo register(s) into which the high register(s) can
24112              be popped.  */
24113           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24114             {
24115               if (mask & (1 << regno))
24116                 high_regs_pushed--;
24117               if (high_regs_pushed == 0)
24118                 break;
24119             }
24120
24121           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24122
24123           /* Pop the values into the low register(s).  */
24124           thumb_pop (asm_out_file, mask);
24125
24126           /* Move the value(s) into the high registers.  */
24127           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24128             {
24129               if (mask & (1 << regno))
24130                 {
24131                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24132                                regno);
24133
24134                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24135                     if (live_regs_mask & (1 << next_hi_reg))
24136                       break;
24137                 }
24138             }
24139         }
24140       live_regs_mask &= ~0x0f00;
24141     }
24142
24143   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24144   live_regs_mask &= 0xff;
24145
24146   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24147     {
24148       /* Pop the return address into the PC.  */
24149       if (had_to_push_lr)
24150         live_regs_mask |= 1 << PC_REGNUM;
24151
24152       /* Either no argument registers were pushed or a backtrace
24153          structure was created which includes an adjusted stack
24154          pointer, so just pop everything.  */
24155       if (live_regs_mask)
24156         thumb_pop (asm_out_file, live_regs_mask);
24157
24158       /* We have either just popped the return address into the
24159          PC or it is was kept in LR for the entire function.
24160          Note that thumb_pop has already called thumb_exit if the
24161          PC was in the list.  */
24162       if (!had_to_push_lr)
24163         thumb_exit (asm_out_file, LR_REGNUM);
24164     }
24165   else
24166     {
24167       /* Pop everything but the return address.  */
24168       if (live_regs_mask)
24169         thumb_pop (asm_out_file, live_regs_mask);
24170
24171       if (had_to_push_lr)
24172         {
24173           if (size > 12)
24174             {
24175               /* We have no free low regs, so save one.  */
24176               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24177                            LAST_ARG_REGNUM);
24178             }
24179
24180           /* Get the return address into a temporary register.  */
24181           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24182
24183           if (size > 12)
24184             {
24185               /* Move the return address to lr.  */
24186               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24187                            LAST_ARG_REGNUM);
24188               /* Restore the low register.  */
24189               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24190                            IP_REGNUM);
24191               regno = LR_REGNUM;
24192             }
24193           else
24194             regno = LAST_ARG_REGNUM;
24195         }
24196       else
24197         regno = LR_REGNUM;
24198
24199       /* Remove the argument registers that were pushed onto the stack.  */
24200       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24201                    SP_REGNUM, SP_REGNUM,
24202                    crtl->args.pretend_args_size);
24203
24204       thumb_exit (asm_out_file, regno);
24205     }
24206
24207   return "";
24208 }
24209
24210 /* Functions to save and restore machine-specific function data.  */
24211 static struct machine_function *
24212 arm_init_machine_status (void)
24213 {
24214   struct machine_function *machine;
24215   machine = ggc_cleared_alloc<machine_function> ();
24216
24217 #if ARM_FT_UNKNOWN != 0
24218   machine->func_type = ARM_FT_UNKNOWN;
24219 #endif
24220   return machine;
24221 }
24222
24223 /* Return an RTX indicating where the return address to the
24224    calling function can be found.  */
24225 rtx
24226 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24227 {
24228   if (count != 0)
24229     return NULL_RTX;
24230
24231   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24232 }
24233
24234 /* Do anything needed before RTL is emitted for each function.  */
24235 void
24236 arm_init_expanders (void)
24237 {
24238   /* Arrange to initialize and mark the machine per-function status.  */
24239   init_machine_status = arm_init_machine_status;
24240
24241   /* This is to stop the combine pass optimizing away the alignment
24242      adjustment of va_arg.  */
24243   /* ??? It is claimed that this should not be necessary.  */
24244   if (cfun)
24245     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24246 }
24247
24248
24249 /* Like arm_compute_initial_elimination offset.  Simpler because there
24250    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24251    to point at the base of the local variables after static stack
24252    space for a function has been allocated.  */
24253
24254 HOST_WIDE_INT
24255 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24256 {
24257   arm_stack_offsets *offsets;
24258
24259   offsets = arm_get_frame_offsets ();
24260
24261   switch (from)
24262     {
24263     case ARG_POINTER_REGNUM:
24264       switch (to)
24265         {
24266         case STACK_POINTER_REGNUM:
24267           return offsets->outgoing_args - offsets->saved_args;
24268
24269         case FRAME_POINTER_REGNUM:
24270           return offsets->soft_frame - offsets->saved_args;
24271
24272         case ARM_HARD_FRAME_POINTER_REGNUM:
24273           return offsets->saved_regs - offsets->saved_args;
24274
24275         case THUMB_HARD_FRAME_POINTER_REGNUM:
24276           return offsets->locals_base - offsets->saved_args;
24277
24278         default:
24279           gcc_unreachable ();
24280         }
24281       break;
24282
24283     case FRAME_POINTER_REGNUM:
24284       switch (to)
24285         {
24286         case STACK_POINTER_REGNUM:
24287           return offsets->outgoing_args - offsets->soft_frame;
24288
24289         case ARM_HARD_FRAME_POINTER_REGNUM:
24290           return offsets->saved_regs - offsets->soft_frame;
24291
24292         case THUMB_HARD_FRAME_POINTER_REGNUM:
24293           return offsets->locals_base - offsets->soft_frame;
24294
24295         default:
24296           gcc_unreachable ();
24297         }
24298       break;
24299
24300     default:
24301       gcc_unreachable ();
24302     }
24303 }
24304
24305 /* Generate the function's prologue.  */
24306
24307 void
24308 thumb1_expand_prologue (void)
24309 {
24310   rtx_insn *insn;
24311
24312   HOST_WIDE_INT amount;
24313   arm_stack_offsets *offsets;
24314   unsigned long func_type;
24315   int regno;
24316   unsigned long live_regs_mask;
24317   unsigned long l_mask;
24318   unsigned high_regs_pushed = 0;
24319
24320   func_type = arm_current_func_type ();
24321
24322   /* Naked functions don't have prologues.  */
24323   if (IS_NAKED (func_type))
24324     return;
24325
24326   if (IS_INTERRUPT (func_type))
24327     {
24328       error ("interrupt Service Routines cannot be coded in Thumb mode");
24329       return;
24330     }
24331
24332   if (is_called_in_ARM_mode (current_function_decl))
24333     emit_insn (gen_prologue_thumb1_interwork ());
24334
24335   offsets = arm_get_frame_offsets ();
24336   live_regs_mask = offsets->saved_regs_mask;
24337
24338   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24339   l_mask = live_regs_mask & 0x40ff;
24340   /* Then count how many other high registers will need to be pushed.  */
24341   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24342
24343   if (crtl->args.pretend_args_size)
24344     {
24345       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24346
24347       if (cfun->machine->uses_anonymous_args)
24348         {
24349           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24350           unsigned long mask;
24351
24352           mask = 1ul << (LAST_ARG_REGNUM + 1);
24353           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24354
24355           insn = thumb1_emit_multi_reg_push (mask, 0);
24356         }
24357       else
24358         {
24359           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24360                                         stack_pointer_rtx, x));
24361         }
24362       RTX_FRAME_RELATED_P (insn) = 1;
24363     }
24364
24365   if (TARGET_BACKTRACE)
24366     {
24367       HOST_WIDE_INT offset = 0;
24368       unsigned work_register;
24369       rtx work_reg, x, arm_hfp_rtx;
24370
24371       /* We have been asked to create a stack backtrace structure.
24372          The code looks like this:
24373
24374          0   .align 2
24375          0   func:
24376          0     sub   SP, #16         Reserve space for 4 registers.
24377          2     push  {R7}            Push low registers.
24378          4     add   R7, SP, #20     Get the stack pointer before the push.
24379          6     str   R7, [SP, #8]    Store the stack pointer
24380                                         (before reserving the space).
24381          8     mov   R7, PC          Get hold of the start of this code + 12.
24382         10     str   R7, [SP, #16]   Store it.
24383         12     mov   R7, FP          Get hold of the current frame pointer.
24384         14     str   R7, [SP, #4]    Store it.
24385         16     mov   R7, LR          Get hold of the current return address.
24386         18     str   R7, [SP, #12]   Store it.
24387         20     add   R7, SP, #16     Point at the start of the
24388                                         backtrace structure.
24389         22     mov   FP, R7          Put this value into the frame pointer.  */
24390
24391       work_register = thumb_find_work_register (live_regs_mask);
24392       work_reg = gen_rtx_REG (SImode, work_register);
24393       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24394
24395       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24396                                     stack_pointer_rtx, GEN_INT (-16)));
24397       RTX_FRAME_RELATED_P (insn) = 1;
24398
24399       if (l_mask)
24400         {
24401           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24402           RTX_FRAME_RELATED_P (insn) = 1;
24403
24404           offset = bit_count (l_mask) * UNITS_PER_WORD;
24405         }
24406
24407       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24408       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24409
24410       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24411       x = gen_frame_mem (SImode, x);
24412       emit_move_insn (x, work_reg);
24413
24414       /* Make sure that the instruction fetching the PC is in the right place
24415          to calculate "start of backtrace creation code + 12".  */
24416       /* ??? The stores using the common WORK_REG ought to be enough to
24417          prevent the scheduler from doing anything weird.  Failing that
24418          we could always move all of the following into an UNSPEC_VOLATILE.  */
24419       if (l_mask)
24420         {
24421           x = gen_rtx_REG (SImode, PC_REGNUM);
24422           emit_move_insn (work_reg, x);
24423
24424           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24425           x = gen_frame_mem (SImode, x);
24426           emit_move_insn (x, work_reg);
24427
24428           emit_move_insn (work_reg, arm_hfp_rtx);
24429
24430           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24431           x = gen_frame_mem (SImode, x);
24432           emit_move_insn (x, work_reg);
24433         }
24434       else
24435         {
24436           emit_move_insn (work_reg, arm_hfp_rtx);
24437
24438           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24439           x = gen_frame_mem (SImode, x);
24440           emit_move_insn (x, work_reg);
24441
24442           x = gen_rtx_REG (SImode, PC_REGNUM);
24443           emit_move_insn (work_reg, x);
24444
24445           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24446           x = gen_frame_mem (SImode, x);
24447           emit_move_insn (x, work_reg);
24448         }
24449
24450       x = gen_rtx_REG (SImode, LR_REGNUM);
24451       emit_move_insn (work_reg, x);
24452
24453       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24454       x = gen_frame_mem (SImode, x);
24455       emit_move_insn (x, work_reg);
24456
24457       x = GEN_INT (offset + 12);
24458       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24459
24460       emit_move_insn (arm_hfp_rtx, work_reg);
24461     }
24462   /* Optimization:  If we are not pushing any low registers but we are going
24463      to push some high registers then delay our first push.  This will just
24464      be a push of LR and we can combine it with the push of the first high
24465      register.  */
24466   else if ((l_mask & 0xff) != 0
24467            || (high_regs_pushed == 0 && l_mask))
24468     {
24469       unsigned long mask = l_mask;
24470       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24471       insn = thumb1_emit_multi_reg_push (mask, mask);
24472       RTX_FRAME_RELATED_P (insn) = 1;
24473     }
24474
24475   if (high_regs_pushed)
24476     {
24477       unsigned pushable_regs;
24478       unsigned next_hi_reg;
24479       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24480                                                  : crtl->args.info.nregs;
24481       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24482
24483       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24484         if (live_regs_mask & (1 << next_hi_reg))
24485           break;
24486
24487       /* Here we need to mask out registers used for passing arguments
24488          even if they can be pushed.  This is to avoid using them to stash the high
24489          registers.  Such kind of stash may clobber the use of arguments.  */
24490       pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24491
24492       if (pushable_regs == 0)
24493         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24494
24495       while (high_regs_pushed > 0)
24496         {
24497           unsigned long real_regs_mask = 0;
24498
24499           for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24500             {
24501               if (pushable_regs & (1 << regno))
24502                 {
24503                   emit_move_insn (gen_rtx_REG (SImode, regno),
24504                                   gen_rtx_REG (SImode, next_hi_reg));
24505
24506                   high_regs_pushed --;
24507                   real_regs_mask |= (1 << next_hi_reg);
24508
24509                   if (high_regs_pushed)
24510                     {
24511                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24512                            next_hi_reg --)
24513                         if (live_regs_mask & (1 << next_hi_reg))
24514                           break;
24515                     }
24516                   else
24517                     {
24518                       pushable_regs &= ~((1 << regno) - 1);
24519                       break;
24520                     }
24521                 }
24522             }
24523
24524           /* If we had to find a work register and we have not yet
24525              saved the LR then add it to the list of regs to push.  */
24526           if (l_mask == (1 << LR_REGNUM))
24527             {
24528               pushable_regs |= l_mask;
24529               real_regs_mask |= l_mask;
24530               l_mask = 0;
24531             }
24532
24533           insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24534           RTX_FRAME_RELATED_P (insn) = 1;
24535         }
24536     }
24537
24538   /* Load the pic register before setting the frame pointer,
24539      so we can use r7 as a temporary work register.  */
24540   if (flag_pic && arm_pic_register != INVALID_REGNUM)
24541     arm_load_pic_register (live_regs_mask);
24542
24543   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24544     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24545                     stack_pointer_rtx);
24546
24547   if (flag_stack_usage_info)
24548     current_function_static_stack_size
24549       = offsets->outgoing_args - offsets->saved_args;
24550
24551   amount = offsets->outgoing_args - offsets->saved_regs;
24552   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24553   if (amount)
24554     {
24555       if (amount < 512)
24556         {
24557           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24558                                         GEN_INT (- amount)));
24559           RTX_FRAME_RELATED_P (insn) = 1;
24560         }
24561       else
24562         {
24563           rtx reg, dwarf;
24564
24565           /* The stack decrement is too big for an immediate value in a single
24566              insn.  In theory we could issue multiple subtracts, but after
24567              three of them it becomes more space efficient to place the full
24568              value in the constant pool and load into a register.  (Also the
24569              ARM debugger really likes to see only one stack decrement per
24570              function).  So instead we look for a scratch register into which
24571              we can load the decrement, and then we subtract this from the
24572              stack pointer.  Unfortunately on the thumb the only available
24573              scratch registers are the argument registers, and we cannot use
24574              these as they may hold arguments to the function.  Instead we
24575              attempt to locate a call preserved register which is used by this
24576              function.  If we can find one, then we know that it will have
24577              been pushed at the start of the prologue and so we can corrupt
24578              it now.  */
24579           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24580             if (live_regs_mask & (1 << regno))
24581               break;
24582
24583           gcc_assert(regno <= LAST_LO_REGNUM);
24584
24585           reg = gen_rtx_REG (SImode, regno);
24586
24587           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24588
24589           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24590                                         stack_pointer_rtx, reg));
24591
24592           dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24593                                plus_constant (Pmode, stack_pointer_rtx,
24594                                               -amount));
24595           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24596           RTX_FRAME_RELATED_P (insn) = 1;
24597         }
24598     }
24599
24600   if (frame_pointer_needed)
24601     thumb_set_frame_pointer (offsets);
24602
24603   /* If we are profiling, make sure no instructions are scheduled before
24604      the call to mcount.  Similarly if the user has requested no
24605      scheduling in the prolog.  Similarly if we want non-call exceptions
24606      using the EABI unwinder, to prevent faulting instructions from being
24607      swapped with a stack adjustment.  */
24608   if (crtl->profile || !TARGET_SCHED_PROLOG
24609       || (arm_except_unwind_info (&global_options) == UI_TARGET
24610           && cfun->can_throw_non_call_exceptions))
24611     emit_insn (gen_blockage ());
24612
24613   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24614   if (live_regs_mask & 0xff)
24615     cfun->machine->lr_save_eliminated = 0;
24616 }
24617
24618 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24619    POP instruction can be generated.  LR should be replaced by PC.  All
24620    the checks required are already done by  USE_RETURN_INSN ().  Hence,
24621    all we really need to check here is if single register is to be
24622    returned, or multiple register return.  */
24623 void
24624 thumb2_expand_return (bool simple_return)
24625 {
24626   int i, num_regs;
24627   unsigned long saved_regs_mask;
24628   arm_stack_offsets *offsets;
24629
24630   offsets = arm_get_frame_offsets ();
24631   saved_regs_mask = offsets->saved_regs_mask;
24632
24633   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24634     if (saved_regs_mask & (1 << i))
24635       num_regs++;
24636
24637   if (!simple_return && saved_regs_mask)
24638     {
24639       if (num_regs == 1)
24640         {
24641           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24642           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24643           rtx addr = gen_rtx_MEM (SImode,
24644                                   gen_rtx_POST_INC (SImode,
24645                                                     stack_pointer_rtx));
24646           set_mem_alias_set (addr, get_frame_alias_set ());
24647           XVECEXP (par, 0, 0) = ret_rtx;
24648           XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24649           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24650           emit_jump_insn (par);
24651         }
24652       else
24653         {
24654           saved_regs_mask &= ~ (1 << LR_REGNUM);
24655           saved_regs_mask |=   (1 << PC_REGNUM);
24656           arm_emit_multi_reg_pop (saved_regs_mask);
24657         }
24658     }
24659   else
24660     {
24661       emit_jump_insn (simple_return_rtx);
24662     }
24663 }
24664
24665 void
24666 thumb1_expand_epilogue (void)
24667 {
24668   HOST_WIDE_INT amount;
24669   arm_stack_offsets *offsets;
24670   int regno;
24671
24672   /* Naked functions don't have prologues.  */
24673   if (IS_NAKED (arm_current_func_type ()))
24674     return;
24675
24676   offsets = arm_get_frame_offsets ();
24677   amount = offsets->outgoing_args - offsets->saved_regs;
24678
24679   if (frame_pointer_needed)
24680     {
24681       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24682       amount = offsets->locals_base - offsets->saved_regs;
24683     }
24684   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24685
24686   gcc_assert (amount >= 0);
24687   if (amount)
24688     {
24689       emit_insn (gen_blockage ());
24690
24691       if (amount < 512)
24692         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24693                                GEN_INT (amount)));
24694       else
24695         {
24696           /* r3 is always free in the epilogue.  */
24697           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24698
24699           emit_insn (gen_movsi (reg, GEN_INT (amount)));
24700           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24701         }
24702     }
24703
24704   /* Emit a USE (stack_pointer_rtx), so that
24705      the stack adjustment will not be deleted.  */
24706   emit_insn (gen_force_register_use (stack_pointer_rtx));
24707
24708   if (crtl->profile || !TARGET_SCHED_PROLOG)
24709     emit_insn (gen_blockage ());
24710
24711   /* Emit a clobber for each insn that will be restored in the epilogue,
24712      so that flow2 will get register lifetimes correct.  */
24713   for (regno = 0; regno < 13; regno++)
24714     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24715       emit_clobber (gen_rtx_REG (SImode, regno));
24716
24717   if (! df_regs_ever_live_p (LR_REGNUM))
24718     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24719 }
24720
24721 /* Epilogue code for APCS frame.  */
24722 static void
24723 arm_expand_epilogue_apcs_frame (bool really_return)
24724 {
24725   unsigned long func_type;
24726   unsigned long saved_regs_mask;
24727   int num_regs = 0;
24728   int i;
24729   int floats_from_frame = 0;
24730   arm_stack_offsets *offsets;
24731
24732   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24733   func_type = arm_current_func_type ();
24734
24735   /* Get frame offsets for ARM.  */
24736   offsets = arm_get_frame_offsets ();
24737   saved_regs_mask = offsets->saved_regs_mask;
24738
24739   /* Find the offset of the floating-point save area in the frame.  */
24740   floats_from_frame
24741     = (offsets->saved_args
24742        + arm_compute_static_chain_stack_bytes ()
24743        - offsets->frame);
24744
24745   /* Compute how many core registers saved and how far away the floats are.  */
24746   for (i = 0; i <= LAST_ARM_REGNUM; i++)
24747     if (saved_regs_mask & (1 << i))
24748       {
24749         num_regs++;
24750         floats_from_frame += 4;
24751       }
24752
24753   if (TARGET_HARD_FLOAT && TARGET_VFP)
24754     {
24755       int start_reg;
24756       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24757
24758       /* The offset is from IP_REGNUM.  */
24759       int saved_size = arm_get_vfp_saved_size ();
24760       if (saved_size > 0)
24761         {
24762           rtx_insn *insn;
24763           floats_from_frame += saved_size;
24764           insn = emit_insn (gen_addsi3 (ip_rtx,
24765                                         hard_frame_pointer_rtx,
24766                                         GEN_INT (-floats_from_frame)));
24767           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24768                                        ip_rtx, hard_frame_pointer_rtx);
24769         }
24770
24771       /* Generate VFP register multi-pop.  */
24772       start_reg = FIRST_VFP_REGNUM;
24773
24774       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24775         /* Look for a case where a reg does not need restoring.  */
24776         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24777             && (!df_regs_ever_live_p (i + 1)
24778                 || call_used_regs[i + 1]))
24779           {
24780             if (start_reg != i)
24781               arm_emit_vfp_multi_reg_pop (start_reg,
24782                                           (i - start_reg) / 2,
24783                                           gen_rtx_REG (SImode,
24784                                                        IP_REGNUM));
24785             start_reg = i + 2;
24786           }
24787
24788       /* Restore the remaining regs that we have discovered (or possibly
24789          even all of them, if the conditional in the for loop never
24790          fired).  */
24791       if (start_reg != i)
24792         arm_emit_vfp_multi_reg_pop (start_reg,
24793                                     (i - start_reg) / 2,
24794                                     gen_rtx_REG (SImode, IP_REGNUM));
24795     }
24796
24797   if (TARGET_IWMMXT)
24798     {
24799       /* The frame pointer is guaranteed to be non-double-word aligned, as
24800          it is set to double-word-aligned old_stack_pointer - 4.  */
24801       rtx_insn *insn;
24802       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24803
24804       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24805         if (df_regs_ever_live_p (i) && !call_used_regs[i])
24806           {
24807             rtx addr = gen_frame_mem (V2SImode,
24808                                  plus_constant (Pmode, hard_frame_pointer_rtx,
24809                                                 - lrm_count * 4));
24810             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24811             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24812                                                gen_rtx_REG (V2SImode, i),
24813                                                NULL_RTX);
24814             lrm_count += 2;
24815           }
24816     }
24817
24818   /* saved_regs_mask should contain IP which contains old stack pointer
24819      at the time of activation creation.  Since SP and IP are adjacent registers,
24820      we can restore the value directly into SP.  */
24821   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24822   saved_regs_mask &= ~(1 << IP_REGNUM);
24823   saved_regs_mask |= (1 << SP_REGNUM);
24824
24825   /* There are two registers left in saved_regs_mask - LR and PC.  We
24826      only need to restore LR (the return address), but to
24827      save time we can load it directly into PC, unless we need a
24828      special function exit sequence, or we are not really returning.  */
24829   if (really_return
24830       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24831       && !crtl->calls_eh_return)
24832     /* Delete LR from the register mask, so that LR on
24833        the stack is loaded into the PC in the register mask.  */
24834     saved_regs_mask &= ~(1 << LR_REGNUM);
24835   else
24836     saved_regs_mask &= ~(1 << PC_REGNUM);
24837
24838   num_regs = bit_count (saved_regs_mask);
24839   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24840     {
24841       rtx_insn *insn;
24842       emit_insn (gen_blockage ());
24843       /* Unwind the stack to just below the saved registers.  */
24844       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24845                                     hard_frame_pointer_rtx,
24846                                     GEN_INT (- 4 * num_regs)));
24847
24848       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24849                                    stack_pointer_rtx, hard_frame_pointer_rtx);
24850     }
24851
24852   arm_emit_multi_reg_pop (saved_regs_mask);
24853
24854   if (IS_INTERRUPT (func_type))
24855     {
24856       /* Interrupt handlers will have pushed the
24857          IP onto the stack, so restore it now.  */
24858       rtx_insn *insn;
24859       rtx addr = gen_rtx_MEM (SImode,
24860                               gen_rtx_POST_INC (SImode,
24861                               stack_pointer_rtx));
24862       set_mem_alias_set (addr, get_frame_alias_set ());
24863       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24864       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24865                                          gen_rtx_REG (SImode, IP_REGNUM),
24866                                          NULL_RTX);
24867     }
24868
24869   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24870     return;
24871
24872   if (crtl->calls_eh_return)
24873     emit_insn (gen_addsi3 (stack_pointer_rtx,
24874                            stack_pointer_rtx,
24875                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24876
24877   if (IS_STACKALIGN (func_type))
24878     /* Restore the original stack pointer.  Before prologue, the stack was
24879        realigned and the original stack pointer saved in r0.  For details,
24880        see comment in arm_expand_prologue.  */
24881     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24882
24883   emit_jump_insn (simple_return_rtx);
24884 }
24885
24886 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
24887    function is not a sibcall.  */
24888 void
24889 arm_expand_epilogue (bool really_return)
24890 {
24891   unsigned long func_type;
24892   unsigned long saved_regs_mask;
24893   int num_regs = 0;
24894   int i;
24895   int amount;
24896   arm_stack_offsets *offsets;
24897
24898   func_type = arm_current_func_type ();
24899
24900   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
24901      let output_return_instruction take care of instruction emission if any.  */
24902   if (IS_NAKED (func_type)
24903       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24904     {
24905       if (really_return)
24906         emit_jump_insn (simple_return_rtx);
24907       return;
24908     }
24909
24910   /* If we are throwing an exception, then we really must be doing a
24911      return, so we can't tail-call.  */
24912   gcc_assert (!crtl->calls_eh_return || really_return);
24913
24914   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24915     {
24916       arm_expand_epilogue_apcs_frame (really_return);
24917       return;
24918     }
24919
24920   /* Get frame offsets for ARM.  */
24921   offsets = arm_get_frame_offsets ();
24922   saved_regs_mask = offsets->saved_regs_mask;
24923   num_regs = bit_count (saved_regs_mask);
24924
24925   if (frame_pointer_needed)
24926     {
24927       rtx_insn *insn;
24928       /* Restore stack pointer if necessary.  */
24929       if (TARGET_ARM)
24930         {
24931           /* In ARM mode, frame pointer points to first saved register.
24932              Restore stack pointer to last saved register.  */
24933           amount = offsets->frame - offsets->saved_regs;
24934
24935           /* Force out any pending memory operations that reference stacked data
24936              before stack de-allocation occurs.  */
24937           emit_insn (gen_blockage ());
24938           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24939                             hard_frame_pointer_rtx,
24940                             GEN_INT (amount)));
24941           arm_add_cfa_adjust_cfa_note (insn, amount,
24942                                        stack_pointer_rtx,
24943                                        hard_frame_pointer_rtx);
24944
24945           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24946              deleted.  */
24947           emit_insn (gen_force_register_use (stack_pointer_rtx));
24948         }
24949       else
24950         {
24951           /* In Thumb-2 mode, the frame pointer points to the last saved
24952              register.  */
24953           amount = offsets->locals_base - offsets->saved_regs;
24954           if (amount)
24955             {
24956               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24957                                 hard_frame_pointer_rtx,
24958                                 GEN_INT (amount)));
24959               arm_add_cfa_adjust_cfa_note (insn, amount,
24960                                            hard_frame_pointer_rtx,
24961                                            hard_frame_pointer_rtx);
24962             }
24963
24964           /* Force out any pending memory operations that reference stacked data
24965              before stack de-allocation occurs.  */
24966           emit_insn (gen_blockage ());
24967           insn = emit_insn (gen_movsi (stack_pointer_rtx,
24968                                        hard_frame_pointer_rtx));
24969           arm_add_cfa_adjust_cfa_note (insn, 0,
24970                                        stack_pointer_rtx,
24971                                        hard_frame_pointer_rtx);
24972           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24973              deleted.  */
24974           emit_insn (gen_force_register_use (stack_pointer_rtx));
24975         }
24976     }
24977   else
24978     {
24979       /* Pop off outgoing args and local frame to adjust stack pointer to
24980          last saved register.  */
24981       amount = offsets->outgoing_args - offsets->saved_regs;
24982       if (amount)
24983         {
24984           rtx_insn *tmp;
24985           /* Force out any pending memory operations that reference stacked data
24986              before stack de-allocation occurs.  */
24987           emit_insn (gen_blockage ());
24988           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24989                                        stack_pointer_rtx,
24990                                        GEN_INT (amount)));
24991           arm_add_cfa_adjust_cfa_note (tmp, amount,
24992                                        stack_pointer_rtx, stack_pointer_rtx);
24993           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24994              not deleted.  */
24995           emit_insn (gen_force_register_use (stack_pointer_rtx));
24996         }
24997     }
24998
24999   if (TARGET_HARD_FLOAT && TARGET_VFP)
25000     {
25001       /* Generate VFP register multi-pop.  */
25002       int end_reg = LAST_VFP_REGNUM + 1;
25003
25004       /* Scan the registers in reverse order.  We need to match
25005          any groupings made in the prologue and generate matching
25006          vldm operations.  The need to match groups is because,
25007          unlike pop, vldm can only do consecutive regs.  */
25008       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25009         /* Look for a case where a reg does not need restoring.  */
25010         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25011             && (!df_regs_ever_live_p (i + 1)
25012                 || call_used_regs[i + 1]))
25013           {
25014             /* Restore the regs discovered so far (from reg+2 to
25015                end_reg).  */
25016             if (end_reg > i + 2)
25017               arm_emit_vfp_multi_reg_pop (i + 2,
25018                                           (end_reg - (i + 2)) / 2,
25019                                           stack_pointer_rtx);
25020             end_reg = i;
25021           }
25022
25023       /* Restore the remaining regs that we have discovered (or possibly
25024          even all of them, if the conditional in the for loop never
25025          fired).  */
25026       if (end_reg > i + 2)
25027         arm_emit_vfp_multi_reg_pop (i + 2,
25028                                     (end_reg - (i + 2)) / 2,
25029                                     stack_pointer_rtx);
25030     }
25031
25032   if (TARGET_IWMMXT)
25033     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25034       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25035         {
25036           rtx_insn *insn;
25037           rtx addr = gen_rtx_MEM (V2SImode,
25038                                   gen_rtx_POST_INC (SImode,
25039                                                     stack_pointer_rtx));
25040           set_mem_alias_set (addr, get_frame_alias_set ());
25041           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25042           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25043                                              gen_rtx_REG (V2SImode, i),
25044                                              NULL_RTX);
25045           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25046                                        stack_pointer_rtx, stack_pointer_rtx);
25047         }
25048
25049   if (saved_regs_mask)
25050     {
25051       rtx insn;
25052       bool return_in_pc = false;
25053
25054       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25055           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25056           && !IS_STACKALIGN (func_type)
25057           && really_return
25058           && crtl->args.pretend_args_size == 0
25059           && saved_regs_mask & (1 << LR_REGNUM)
25060           && !crtl->calls_eh_return)
25061         {
25062           saved_regs_mask &= ~(1 << LR_REGNUM);
25063           saved_regs_mask |= (1 << PC_REGNUM);
25064           return_in_pc = true;
25065         }
25066
25067       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25068         {
25069           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25070             if (saved_regs_mask & (1 << i))
25071               {
25072                 rtx addr = gen_rtx_MEM (SImode,
25073                                         gen_rtx_POST_INC (SImode,
25074                                                           stack_pointer_rtx));
25075                 set_mem_alias_set (addr, get_frame_alias_set ());
25076
25077                 if (i == PC_REGNUM)
25078                   {
25079                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25080                     XVECEXP (insn, 0, 0) = ret_rtx;
25081                     XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
25082                                                         gen_rtx_REG (SImode, i),
25083                                                         addr);
25084                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25085                     insn = emit_jump_insn (insn);
25086                   }
25087                 else
25088                   {
25089                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25090                                                  addr));
25091                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25092                                                        gen_rtx_REG (SImode, i),
25093                                                        NULL_RTX);
25094                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25095                                                  stack_pointer_rtx,
25096                                                  stack_pointer_rtx);
25097                   }
25098               }
25099         }
25100       else
25101         {
25102           if (TARGET_LDRD
25103               && current_tune->prefer_ldrd_strd
25104               && !optimize_function_for_size_p (cfun))
25105             {
25106               if (TARGET_THUMB2)
25107                 thumb2_emit_ldrd_pop (saved_regs_mask);
25108               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25109                 arm_emit_ldrd_pop (saved_regs_mask);
25110               else
25111                 arm_emit_multi_reg_pop (saved_regs_mask);
25112             }
25113           else
25114             arm_emit_multi_reg_pop (saved_regs_mask);
25115         }
25116
25117       if (return_in_pc == true)
25118         return;
25119     }
25120
25121   if (crtl->args.pretend_args_size)
25122     {
25123       int i, j;
25124       rtx dwarf = NULL_RTX;
25125       rtx_insn *tmp =
25126         emit_insn (gen_addsi3 (stack_pointer_rtx,
25127                                stack_pointer_rtx,
25128                                GEN_INT (crtl->args.pretend_args_size)));
25129
25130       RTX_FRAME_RELATED_P (tmp) = 1;
25131
25132       if (cfun->machine->uses_anonymous_args)
25133         {
25134           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25135              pretend_args in stack.  */
25136           int num_regs = crtl->args.pretend_args_size / 4;
25137           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25138           for (j = 0, i = 0; j < num_regs; i++)
25139             if (saved_regs_mask & (1 << i))
25140               {
25141                 rtx reg = gen_rtx_REG (SImode, i);
25142                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25143                 j++;
25144               }
25145           REG_NOTES (tmp) = dwarf;
25146         }
25147       arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25148                                    stack_pointer_rtx, stack_pointer_rtx);
25149     }
25150
25151   if (!really_return)
25152     return;
25153
25154   if (crtl->calls_eh_return)
25155     emit_insn (gen_addsi3 (stack_pointer_rtx,
25156                            stack_pointer_rtx,
25157                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25158
25159   if (IS_STACKALIGN (func_type))
25160     /* Restore the original stack pointer.  Before prologue, the stack was
25161        realigned and the original stack pointer saved in r0.  For details,
25162        see comment in arm_expand_prologue.  */
25163     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
25164
25165   emit_jump_insn (simple_return_rtx);
25166 }
25167
25168 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25169    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25170
25171 const char *
25172 thumb1_output_interwork (void)
25173 {
25174   const char * name;
25175   FILE *f = asm_out_file;
25176
25177   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25178   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25179               == SYMBOL_REF);
25180   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25181
25182   /* Generate code sequence to switch us into Thumb mode.  */
25183   /* The .code 32 directive has already been emitted by
25184      ASM_DECLARE_FUNCTION_NAME.  */
25185   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25186   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25187
25188   /* Generate a label, so that the debugger will notice the
25189      change in instruction sets.  This label is also used by
25190      the assembler to bypass the ARM code when this function
25191      is called from a Thumb encoded function elsewhere in the
25192      same file.  Hence the definition of STUB_NAME here must
25193      agree with the definition in gas/config/tc-arm.c.  */
25194
25195 #define STUB_NAME ".real_start_of"
25196
25197   fprintf (f, "\t.code\t16\n");
25198 #ifdef ARM_PE
25199   if (arm_dllexport_name_p (name))
25200     name = arm_strip_name_encoding (name);
25201 #endif
25202   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25203   fprintf (f, "\t.thumb_func\n");
25204   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25205
25206   return "";
25207 }
25208
25209 /* Handle the case of a double word load into a low register from
25210    a computed memory address.  The computed address may involve a
25211    register which is overwritten by the load.  */
25212 const char *
25213 thumb_load_double_from_address (rtx *operands)
25214 {
25215   rtx addr;
25216   rtx base;
25217   rtx offset;
25218   rtx arg1;
25219   rtx arg2;
25220
25221   gcc_assert (REG_P (operands[0]));
25222   gcc_assert (MEM_P (operands[1]));
25223
25224   /* Get the memory address.  */
25225   addr = XEXP (operands[1], 0);
25226
25227   /* Work out how the memory address is computed.  */
25228   switch (GET_CODE (addr))
25229     {
25230     case REG:
25231       operands[2] = adjust_address (operands[1], SImode, 4);
25232
25233       if (REGNO (operands[0]) == REGNO (addr))
25234         {
25235           output_asm_insn ("ldr\t%H0, %2", operands);
25236           output_asm_insn ("ldr\t%0, %1", operands);
25237         }
25238       else
25239         {
25240           output_asm_insn ("ldr\t%0, %1", operands);
25241           output_asm_insn ("ldr\t%H0, %2", operands);
25242         }
25243       break;
25244
25245     case CONST:
25246       /* Compute <address> + 4 for the high order load.  */
25247       operands[2] = adjust_address (operands[1], SImode, 4);
25248
25249       output_asm_insn ("ldr\t%0, %1", operands);
25250       output_asm_insn ("ldr\t%H0, %2", operands);
25251       break;
25252
25253     case PLUS:
25254       arg1   = XEXP (addr, 0);
25255       arg2   = XEXP (addr, 1);
25256
25257       if (CONSTANT_P (arg1))
25258         base = arg2, offset = arg1;
25259       else
25260         base = arg1, offset = arg2;
25261
25262       gcc_assert (REG_P (base));
25263
25264       /* Catch the case of <address> = <reg> + <reg> */
25265       if (REG_P (offset))
25266         {
25267           int reg_offset = REGNO (offset);
25268           int reg_base   = REGNO (base);
25269           int reg_dest   = REGNO (operands[0]);
25270
25271           /* Add the base and offset registers together into the
25272              higher destination register.  */
25273           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25274                        reg_dest + 1, reg_base, reg_offset);
25275
25276           /* Load the lower destination register from the address in
25277              the higher destination register.  */
25278           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25279                        reg_dest, reg_dest + 1);
25280
25281           /* Load the higher destination register from its own address
25282              plus 4.  */
25283           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25284                        reg_dest + 1, reg_dest + 1);
25285         }
25286       else
25287         {
25288           /* Compute <address> + 4 for the high order load.  */
25289           operands[2] = adjust_address (operands[1], SImode, 4);
25290
25291           /* If the computed address is held in the low order register
25292              then load the high order register first, otherwise always
25293              load the low order register first.  */
25294           if (REGNO (operands[0]) == REGNO (base))
25295             {
25296               output_asm_insn ("ldr\t%H0, %2", operands);
25297               output_asm_insn ("ldr\t%0, %1", operands);
25298             }
25299           else
25300             {
25301               output_asm_insn ("ldr\t%0, %1", operands);
25302               output_asm_insn ("ldr\t%H0, %2", operands);
25303             }
25304         }
25305       break;
25306
25307     case LABEL_REF:
25308       /* With no registers to worry about we can just load the value
25309          directly.  */
25310       operands[2] = adjust_address (operands[1], SImode, 4);
25311
25312       output_asm_insn ("ldr\t%H0, %2", operands);
25313       output_asm_insn ("ldr\t%0, %1", operands);
25314       break;
25315
25316     default:
25317       gcc_unreachable ();
25318     }
25319
25320   return "";
25321 }
25322
25323 const char *
25324 thumb_output_move_mem_multiple (int n, rtx *operands)
25325 {
25326   rtx tmp;
25327
25328   switch (n)
25329     {
25330     case 2:
25331       if (REGNO (operands[4]) > REGNO (operands[5]))
25332         {
25333           tmp = operands[4];
25334           operands[4] = operands[5];
25335           operands[5] = tmp;
25336         }
25337       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25338       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25339       break;
25340
25341     case 3:
25342       if (REGNO (operands[4]) > REGNO (operands[5]))
25343         std::swap (operands[4], operands[5]);
25344       if (REGNO (operands[5]) > REGNO (operands[6]))
25345         std::swap (operands[5], operands[6]);
25346       if (REGNO (operands[4]) > REGNO (operands[5]))
25347         std::swap (operands[4], operands[5]);
25348
25349       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25350       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25351       break;
25352
25353     default:
25354       gcc_unreachable ();
25355     }
25356
25357   return "";
25358 }
25359
25360 /* Output a call-via instruction for thumb state.  */
25361 const char *
25362 thumb_call_via_reg (rtx reg)
25363 {
25364   int regno = REGNO (reg);
25365   rtx *labelp;
25366
25367   gcc_assert (regno < LR_REGNUM);
25368
25369   /* If we are in the normal text section we can use a single instance
25370      per compilation unit.  If we are doing function sections, then we need
25371      an entry per section, since we can't rely on reachability.  */
25372   if (in_section == text_section)
25373     {
25374       thumb_call_reg_needed = 1;
25375
25376       if (thumb_call_via_label[regno] == NULL)
25377         thumb_call_via_label[regno] = gen_label_rtx ();
25378       labelp = thumb_call_via_label + regno;
25379     }
25380   else
25381     {
25382       if (cfun->machine->call_via[regno] == NULL)
25383         cfun->machine->call_via[regno] = gen_label_rtx ();
25384       labelp = cfun->machine->call_via + regno;
25385     }
25386
25387   output_asm_insn ("bl\t%a0", labelp);
25388   return "";
25389 }
25390
25391 /* Routines for generating rtl.  */
25392 void
25393 thumb_expand_movmemqi (rtx *operands)
25394 {
25395   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25396   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25397   HOST_WIDE_INT len = INTVAL (operands[2]);
25398   HOST_WIDE_INT offset = 0;
25399
25400   while (len >= 12)
25401     {
25402       emit_insn (gen_movmem12b (out, in, out, in));
25403       len -= 12;
25404     }
25405
25406   if (len >= 8)
25407     {
25408       emit_insn (gen_movmem8b (out, in, out, in));
25409       len -= 8;
25410     }
25411
25412   if (len >= 4)
25413     {
25414       rtx reg = gen_reg_rtx (SImode);
25415       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25416       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25417       len -= 4;
25418       offset += 4;
25419     }
25420
25421   if (len >= 2)
25422     {
25423       rtx reg = gen_reg_rtx (HImode);
25424       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25425                                               plus_constant (Pmode, in,
25426                                                              offset))));
25427       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25428                                                                 offset)),
25429                             reg));
25430       len -= 2;
25431       offset += 2;
25432     }
25433
25434   if (len)
25435     {
25436       rtx reg = gen_reg_rtx (QImode);
25437       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25438                                               plus_constant (Pmode, in,
25439                                                              offset))));
25440       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25441                                                                 offset)),
25442                             reg));
25443     }
25444 }
25445
25446 void
25447 thumb_reload_out_hi (rtx *operands)
25448 {
25449   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25450 }
25451
25452 /* Handle reading a half-word from memory during reload.  */
25453 void
25454 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25455 {
25456   gcc_unreachable ();
25457 }
25458
25459 /* Return the length of a function name prefix
25460     that starts with the character 'c'.  */
25461 static int
25462 arm_get_strip_length (int c)
25463 {
25464   switch (c)
25465     {
25466     ARM_NAME_ENCODING_LENGTHS
25467       default: return 0;
25468     }
25469 }
25470
25471 /* Return a pointer to a function's name with any
25472    and all prefix encodings stripped from it.  */
25473 const char *
25474 arm_strip_name_encoding (const char *name)
25475 {
25476   int skip;
25477
25478   while ((skip = arm_get_strip_length (* name)))
25479     name += skip;
25480
25481   return name;
25482 }
25483
25484 /* If there is a '*' anywhere in the name's prefix, then
25485    emit the stripped name verbatim, otherwise prepend an
25486    underscore if leading underscores are being used.  */
25487 void
25488 arm_asm_output_labelref (FILE *stream, const char *name)
25489 {
25490   int skip;
25491   int verbatim = 0;
25492
25493   while ((skip = arm_get_strip_length (* name)))
25494     {
25495       verbatim |= (*name == '*');
25496       name += skip;
25497     }
25498
25499   if (verbatim)
25500     fputs (name, stream);
25501   else
25502     asm_fprintf (stream, "%U%s", name);
25503 }
25504
25505 /* This function is used to emit an EABI tag and its associated value.
25506    We emit the numerical value of the tag in case the assembler does not
25507    support textual tags.  (Eg gas prior to 2.20).  If requested we include
25508    the tag name in a comment so that anyone reading the assembler output
25509    will know which tag is being set.
25510
25511    This function is not static because arm-c.c needs it too.  */
25512
25513 void
25514 arm_emit_eabi_attribute (const char *name, int num, int val)
25515 {
25516   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25517   if (flag_verbose_asm || flag_debug_asm)
25518     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25519   asm_fprintf (asm_out_file, "\n");
25520 }
25521
25522 static void
25523 arm_file_start (void)
25524 {
25525   int val;
25526
25527   if (TARGET_UNIFIED_ASM)
25528     asm_fprintf (asm_out_file, "\t.syntax unified\n");
25529
25530   if (TARGET_BPABI)
25531     {
25532       const char *fpu_name;
25533       if (arm_selected_arch)
25534         {
25535           /* armv7ve doesn't support any extensions.  */
25536           if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25537             {
25538               /* Keep backward compatability for assemblers
25539                  which don't support armv7ve.  */
25540               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25541               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25542               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25543               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25544               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25545             }
25546           else
25547             {
25548               const char* pos = strchr (arm_selected_arch->name, '+');
25549               if (pos)
25550                 {
25551                   char buf[15];
25552                   gcc_assert (strlen (arm_selected_arch->name)
25553                               <= sizeof (buf) / sizeof (*pos));
25554                   strncpy (buf, arm_selected_arch->name,
25555                                 (pos - arm_selected_arch->name) * sizeof (*pos));
25556                   buf[pos - arm_selected_arch->name] = '\0';
25557                   asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25558                   asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25559                 }
25560               else
25561                 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25562             }
25563         }
25564       else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25565         asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25566       else
25567         {
25568           const char* truncated_name
25569             = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25570           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25571         }
25572
25573       if (TARGET_SOFT_FLOAT)
25574         {
25575           fpu_name = "softvfp";
25576         }
25577       else
25578         {
25579           fpu_name = arm_fpu_desc->name;
25580           if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25581             {
25582               if (TARGET_HARD_FLOAT)
25583                 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
25584               if (TARGET_HARD_FLOAT_ABI)
25585                 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25586             }
25587         }
25588       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25589
25590       /* Some of these attributes only apply when the corresponding features
25591          are used.  However we don't have any easy way of figuring this out.
25592          Conservatively record the setting that would have been used.  */
25593
25594       if (flag_rounding_math)
25595         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25596
25597       if (!flag_unsafe_math_optimizations)
25598         {
25599           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25600           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25601         }
25602       if (flag_signaling_nans)
25603         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25604
25605       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25606                            flag_finite_math_only ? 1 : 3);
25607
25608       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25609       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25610       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25611                                flag_short_enums ? 1 : 2);
25612
25613       /* Tag_ABI_optimization_goals.  */
25614       if (optimize_size)
25615         val = 4;
25616       else if (optimize >= 2)
25617         val = 2;
25618       else if (optimize)
25619         val = 1;
25620       else
25621         val = 6;
25622       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25623
25624       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25625                                unaligned_access);
25626
25627       if (arm_fp16_format)
25628         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25629                              (int) arm_fp16_format);
25630
25631       if (arm_lang_output_object_attributes_hook)
25632         arm_lang_output_object_attributes_hook();
25633     }
25634
25635   default_file_start ();
25636 }
25637
25638 static void
25639 arm_file_end (void)
25640 {
25641   int regno;
25642
25643   if (NEED_INDICATE_EXEC_STACK)
25644     /* Add .note.GNU-stack.  */
25645     file_end_indicate_exec_stack ();
25646
25647   if (! thumb_call_reg_needed)
25648     return;
25649
25650   switch_to_section (text_section);
25651   asm_fprintf (asm_out_file, "\t.code 16\n");
25652   ASM_OUTPUT_ALIGN (asm_out_file, 1);
25653
25654   for (regno = 0; regno < LR_REGNUM; regno++)
25655     {
25656       rtx label = thumb_call_via_label[regno];
25657
25658       if (label != 0)
25659         {
25660           targetm.asm_out.internal_label (asm_out_file, "L",
25661                                           CODE_LABEL_NUMBER (label));
25662           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25663         }
25664     }
25665 }
25666
25667 #ifndef ARM_PE
25668 /* Symbols in the text segment can be accessed without indirecting via the
25669    constant pool; it may take an extra binary operation, but this is still
25670    faster than indirecting via memory.  Don't do this when not optimizing,
25671    since we won't be calculating al of the offsets necessary to do this
25672    simplification.  */
25673
25674 static void
25675 arm_encode_section_info (tree decl, rtx rtl, int first)
25676 {
25677   if (optimize > 0 && TREE_CONSTANT (decl))
25678     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25679
25680   default_encode_section_info (decl, rtl, first);
25681 }
25682 #endif /* !ARM_PE */
25683
25684 static void
25685 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25686 {
25687   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25688       && !strcmp (prefix, "L"))
25689     {
25690       arm_ccfsm_state = 0;
25691       arm_target_insn = NULL;
25692     }
25693   default_internal_label (stream, prefix, labelno);
25694 }
25695
25696 /* Output code to add DELTA to the first argument, and then jump
25697    to FUNCTION.  Used for C++ multiple inheritance.  */
25698 static void
25699 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25700                      HOST_WIDE_INT delta,
25701                      HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25702                      tree function)
25703 {
25704   static int thunk_label = 0;
25705   char label[256];
25706   char labelpc[256];
25707   int mi_delta = delta;
25708   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25709   int shift = 0;
25710   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25711                     ? 1 : 0);
25712   if (mi_delta < 0)
25713     mi_delta = - mi_delta;
25714
25715   final_start_function (emit_barrier (), file, 1);
25716
25717   if (TARGET_THUMB1)
25718     {
25719       int labelno = thunk_label++;
25720       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25721       /* Thunks are entered in arm mode when avaiable.  */
25722       if (TARGET_THUMB1_ONLY)
25723         {
25724           /* push r3 so we can use it as a temporary.  */
25725           /* TODO: Omit this save if r3 is not used.  */
25726           fputs ("\tpush {r3}\n", file);
25727           fputs ("\tldr\tr3, ", file);
25728         }
25729       else
25730         {
25731           fputs ("\tldr\tr12, ", file);
25732         }
25733       assemble_name (file, label);
25734       fputc ('\n', file);
25735       if (flag_pic)
25736         {
25737           /* If we are generating PIC, the ldr instruction below loads
25738              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
25739              the address of the add + 8, so we have:
25740
25741              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25742                  = target + 1.
25743
25744              Note that we have "+ 1" because some versions of GNU ld
25745              don't set the low bit of the result for R_ARM_REL32
25746              relocations against thumb function symbols.
25747              On ARMv6M this is +4, not +8.  */
25748           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25749           assemble_name (file, labelpc);
25750           fputs (":\n", file);
25751           if (TARGET_THUMB1_ONLY)
25752             {
25753               /* This is 2 insns after the start of the thunk, so we know it
25754                  is 4-byte aligned.  */
25755               fputs ("\tadd\tr3, pc, r3\n", file);
25756               fputs ("\tmov r12, r3\n", file);
25757             }
25758           else
25759             fputs ("\tadd\tr12, pc, r12\n", file);
25760         }
25761       else if (TARGET_THUMB1_ONLY)
25762         fputs ("\tmov r12, r3\n", file);
25763     }
25764   if (TARGET_THUMB1_ONLY)
25765     {
25766       if (mi_delta > 255)
25767         {
25768           fputs ("\tldr\tr3, ", file);
25769           assemble_name (file, label);
25770           fputs ("+4\n", file);
25771           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25772                        mi_op, this_regno, this_regno);
25773         }
25774       else if (mi_delta != 0)
25775         {
25776           /* Thumb1 unified syntax requires s suffix in instruction name when
25777              one of the operands is immediate.  */
25778           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25779                        mi_op, this_regno, this_regno,
25780                        mi_delta);
25781         }
25782     }
25783   else
25784     {
25785       /* TODO: Use movw/movt for large constants when available.  */
25786       while (mi_delta != 0)
25787         {
25788           if ((mi_delta & (3 << shift)) == 0)
25789             shift += 2;
25790           else
25791             {
25792               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25793                            mi_op, this_regno, this_regno,
25794                            mi_delta & (0xff << shift));
25795               mi_delta &= ~(0xff << shift);
25796               shift += 8;
25797             }
25798         }
25799     }
25800   if (TARGET_THUMB1)
25801     {
25802       if (TARGET_THUMB1_ONLY)
25803         fputs ("\tpop\t{r3}\n", file);
25804
25805       fprintf (file, "\tbx\tr12\n");
25806       ASM_OUTPUT_ALIGN (file, 2);
25807       assemble_name (file, label);
25808       fputs (":\n", file);
25809       if (flag_pic)
25810         {
25811           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
25812           rtx tem = XEXP (DECL_RTL (function), 0);
25813           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25814              pipeline offset is four rather than eight.  Adjust the offset
25815              accordingly.  */
25816           tem = plus_constant (GET_MODE (tem), tem,
25817                                TARGET_THUMB1_ONLY ? -3 : -7);
25818           tem = gen_rtx_MINUS (GET_MODE (tem),
25819                                tem,
25820                                gen_rtx_SYMBOL_REF (Pmode,
25821                                                    ggc_strdup (labelpc)));
25822           assemble_integer (tem, 4, BITS_PER_WORD, 1);
25823         }
25824       else
25825         /* Output ".word .LTHUNKn".  */
25826         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25827
25828       if (TARGET_THUMB1_ONLY && mi_delta > 255)
25829         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25830     }
25831   else
25832     {
25833       fputs ("\tb\t", file);
25834       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25835       if (NEED_PLT_RELOC)
25836         fputs ("(PLT)", file);
25837       fputc ('\n', file);
25838     }
25839
25840   final_end_function ();
25841 }
25842
25843 int
25844 arm_emit_vector_const (FILE *file, rtx x)
25845 {
25846   int i;
25847   const char * pattern;
25848
25849   gcc_assert (GET_CODE (x) == CONST_VECTOR);
25850
25851   switch (GET_MODE (x))
25852     {
25853     case V2SImode: pattern = "%08x"; break;
25854     case V4HImode: pattern = "%04x"; break;
25855     case V8QImode: pattern = "%02x"; break;
25856     default:       gcc_unreachable ();
25857     }
25858
25859   fprintf (file, "0x");
25860   for (i = CONST_VECTOR_NUNITS (x); i--;)
25861     {
25862       rtx element;
25863
25864       element = CONST_VECTOR_ELT (x, i);
25865       fprintf (file, pattern, INTVAL (element));
25866     }
25867
25868   return 1;
25869 }
25870
25871 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25872    HFmode constant pool entries are actually loaded with ldr.  */
25873 void
25874 arm_emit_fp16_const (rtx c)
25875 {
25876   REAL_VALUE_TYPE r;
25877   long bits;
25878
25879   REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25880   bits = real_to_target (NULL, &r, HFmode);
25881   if (WORDS_BIG_ENDIAN)
25882     assemble_zeros (2);
25883   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25884   if (!WORDS_BIG_ENDIAN)
25885     assemble_zeros (2);
25886 }
25887
25888 const char *
25889 arm_output_load_gr (rtx *operands)
25890 {
25891   rtx reg;
25892   rtx offset;
25893   rtx wcgr;
25894   rtx sum;
25895
25896   if (!MEM_P (operands [1])
25897       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25898       || !REG_P (reg = XEXP (sum, 0))
25899       || !CONST_INT_P (offset = XEXP (sum, 1))
25900       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25901     return "wldrw%?\t%0, %1";
25902
25903   /* Fix up an out-of-range load of a GR register.  */
25904   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25905   wcgr = operands[0];
25906   operands[0] = reg;
25907   output_asm_insn ("ldr%?\t%0, %1", operands);
25908
25909   operands[0] = wcgr;
25910   operands[1] = reg;
25911   output_asm_insn ("tmcr%?\t%0, %1", operands);
25912   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25913
25914   return "";
25915 }
25916
25917 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25918
25919    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25920    named arg and all anonymous args onto the stack.
25921    XXX I know the prologue shouldn't be pushing registers, but it is faster
25922    that way.  */
25923
25924 static void
25925 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25926                             machine_mode mode,
25927                             tree type,
25928                             int *pretend_size,
25929                             int second_time ATTRIBUTE_UNUSED)
25930 {
25931   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25932   int nregs;
25933
25934   cfun->machine->uses_anonymous_args = 1;
25935   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25936     {
25937       nregs = pcum->aapcs_ncrn;
25938       if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25939         nregs++;
25940     }
25941   else
25942     nregs = pcum->nregs;
25943
25944   if (nregs < NUM_ARG_REGS)
25945     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25946 }
25947
25948 /* We can't rely on the caller doing the proper promotion when
25949    using APCS or ATPCS.  */
25950
25951 static bool
25952 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25953 {
25954     return !TARGET_AAPCS_BASED;
25955 }
25956
25957 static machine_mode
25958 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25959                            machine_mode mode,
25960                            int *punsignedp ATTRIBUTE_UNUSED,
25961                            const_tree fntype ATTRIBUTE_UNUSED,
25962                            int for_return ATTRIBUTE_UNUSED)
25963 {
25964   if (GET_MODE_CLASS (mode) == MODE_INT
25965       && GET_MODE_SIZE (mode) < 4)
25966     return SImode;
25967
25968   return mode;
25969 }
25970
25971 /* AAPCS based ABIs use short enums by default.  */
25972
25973 static bool
25974 arm_default_short_enums (void)
25975 {
25976   return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25977 }
25978
25979
25980 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
25981
25982 static bool
25983 arm_align_anon_bitfield (void)
25984 {
25985   return TARGET_AAPCS_BASED;
25986 }
25987
25988
25989 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
25990
25991 static tree
25992 arm_cxx_guard_type (void)
25993 {
25994   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25995 }
25996
25997
25998 /* The EABI says test the least significant bit of a guard variable.  */
25999
26000 static bool
26001 arm_cxx_guard_mask_bit (void)
26002 {
26003   return TARGET_AAPCS_BASED;
26004 }
26005
26006
26007 /* The EABI specifies that all array cookies are 8 bytes long.  */
26008
26009 static tree
26010 arm_get_cookie_size (tree type)
26011 {
26012   tree size;
26013
26014   if (!TARGET_AAPCS_BASED)
26015     return default_cxx_get_cookie_size (type);
26016
26017   size = build_int_cst (sizetype, 8);
26018   return size;
26019 }
26020
26021
26022 /* The EABI says that array cookies should also contain the element size.  */
26023
26024 static bool
26025 arm_cookie_has_size (void)
26026 {
26027   return TARGET_AAPCS_BASED;
26028 }
26029
26030
26031 /* The EABI says constructors and destructors should return a pointer to
26032    the object constructed/destroyed.  */
26033
26034 static bool
26035 arm_cxx_cdtor_returns_this (void)
26036 {
26037   return TARGET_AAPCS_BASED;
26038 }
26039
26040 /* The EABI says that an inline function may never be the key
26041    method.  */
26042
26043 static bool
26044 arm_cxx_key_method_may_be_inline (void)
26045 {
26046   return !TARGET_AAPCS_BASED;
26047 }
26048
26049 static void
26050 arm_cxx_determine_class_data_visibility (tree decl)
26051 {
26052   if (!TARGET_AAPCS_BASED
26053       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26054     return;
26055
26056   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26057      is exported.  However, on systems without dynamic vague linkage,
26058      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26059   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26060     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26061   else
26062     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26063   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26064 }
26065
26066 static bool
26067 arm_cxx_class_data_always_comdat (void)
26068 {
26069   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26070      vague linkage if the class has no key function.  */
26071   return !TARGET_AAPCS_BASED;
26072 }
26073
26074
26075 /* The EABI says __aeabi_atexit should be used to register static
26076    destructors.  */
26077
26078 static bool
26079 arm_cxx_use_aeabi_atexit (void)
26080 {
26081   return TARGET_AAPCS_BASED;
26082 }
26083
26084
26085 void
26086 arm_set_return_address (rtx source, rtx scratch)
26087 {
26088   arm_stack_offsets *offsets;
26089   HOST_WIDE_INT delta;
26090   rtx addr;
26091   unsigned long saved_regs;
26092
26093   offsets = arm_get_frame_offsets ();
26094   saved_regs = offsets->saved_regs_mask;
26095
26096   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26097     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26098   else
26099     {
26100       if (frame_pointer_needed)
26101         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26102       else
26103         {
26104           /* LR will be the first saved register.  */
26105           delta = offsets->outgoing_args - (offsets->frame + 4);
26106
26107
26108           if (delta >= 4096)
26109             {
26110               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26111                                      GEN_INT (delta & ~4095)));
26112               addr = scratch;
26113               delta &= 4095;
26114             }
26115           else
26116             addr = stack_pointer_rtx;
26117
26118           addr = plus_constant (Pmode, addr, delta);
26119         }
26120       /* The store needs to be marked as frame related in order to prevent
26121          DSE from deleting it as dead if it is based on fp.  */
26122       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26123       RTX_FRAME_RELATED_P (insn) = 1;
26124       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26125     }
26126 }
26127
26128
26129 void
26130 thumb_set_return_address (rtx source, rtx scratch)
26131 {
26132   arm_stack_offsets *offsets;
26133   HOST_WIDE_INT delta;
26134   HOST_WIDE_INT limit;
26135   int reg;
26136   rtx addr;
26137   unsigned long mask;
26138
26139   emit_use (source);
26140
26141   offsets = arm_get_frame_offsets ();
26142   mask = offsets->saved_regs_mask;
26143   if (mask & (1 << LR_REGNUM))
26144     {
26145       limit = 1024;
26146       /* Find the saved regs.  */
26147       if (frame_pointer_needed)
26148         {
26149           delta = offsets->soft_frame - offsets->saved_args;
26150           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26151           if (TARGET_THUMB1)
26152             limit = 128;
26153         }
26154       else
26155         {
26156           delta = offsets->outgoing_args - offsets->saved_args;
26157           reg = SP_REGNUM;
26158         }
26159       /* Allow for the stack frame.  */
26160       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26161         delta -= 16;
26162       /* The link register is always the first saved register.  */
26163       delta -= 4;
26164
26165       /* Construct the address.  */
26166       addr = gen_rtx_REG (SImode, reg);
26167       if (delta > limit)
26168         {
26169           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26170           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26171           addr = scratch;
26172         }
26173       else
26174         addr = plus_constant (Pmode, addr, delta);
26175
26176       /* The store needs to be marked as frame related in order to prevent
26177          DSE from deleting it as dead if it is based on fp.  */
26178       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26179       RTX_FRAME_RELATED_P (insn) = 1;
26180       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26181     }
26182   else
26183     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26184 }
26185
26186 /* Implements target hook vector_mode_supported_p.  */
26187 bool
26188 arm_vector_mode_supported_p (machine_mode mode)
26189 {
26190   /* Neon also supports V2SImode, etc. listed in the clause below.  */
26191   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26192       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26193     return true;
26194
26195   if ((TARGET_NEON || TARGET_IWMMXT)
26196       && ((mode == V2SImode)
26197           || (mode == V4HImode)
26198           || (mode == V8QImode)))
26199     return true;
26200
26201   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26202       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26203       || mode == V2HAmode))
26204     return true;
26205
26206   return false;
26207 }
26208
26209 /* Implements target hook array_mode_supported_p.  */
26210
26211 static bool
26212 arm_array_mode_supported_p (machine_mode mode,
26213                             unsigned HOST_WIDE_INT nelems)
26214 {
26215   if (TARGET_NEON
26216       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26217       && (nelems >= 2 && nelems <= 4))
26218     return true;
26219
26220   return false;
26221 }
26222
26223 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26224    registers when autovectorizing for Neon, at least until multiple vector
26225    widths are supported properly by the middle-end.  */
26226
26227 static machine_mode
26228 arm_preferred_simd_mode (machine_mode mode)
26229 {
26230   if (TARGET_NEON)
26231     switch (mode)
26232       {
26233       case SFmode:
26234         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26235       case SImode:
26236         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26237       case HImode:
26238         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26239       case QImode:
26240         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26241       case DImode:
26242         if (!TARGET_NEON_VECTORIZE_DOUBLE)
26243           return V2DImode;
26244         break;
26245
26246       default:;
26247       }
26248
26249   if (TARGET_REALLY_IWMMXT)
26250     switch (mode)
26251       {
26252       case SImode:
26253         return V2SImode;
26254       case HImode:
26255         return V4HImode;
26256       case QImode:
26257         return V8QImode;
26258
26259       default:;
26260       }
26261
26262   return word_mode;
26263 }
26264
26265 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26266
26267    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
26268    using r0-r4 for function arguments, r7 for the stack frame and don't have
26269    enough left over to do doubleword arithmetic.  For Thumb-2 all the
26270    potentially problematic instructions accept high registers so this is not
26271    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
26272    that require many low registers.  */
26273 static bool
26274 arm_class_likely_spilled_p (reg_class_t rclass)
26275 {
26276   if ((TARGET_THUMB1 && rclass == LO_REGS)
26277       || rclass  == CC_REG)
26278     return true;
26279
26280   return false;
26281 }
26282
26283 /* Implements target hook small_register_classes_for_mode_p.  */
26284 bool
26285 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26286 {
26287   return TARGET_THUMB1;
26288 }
26289
26290 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
26291    ARM insns and therefore guarantee that the shift count is modulo 256.
26292    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26293    guarantee no particular behavior for out-of-range counts.  */
26294
26295 static unsigned HOST_WIDE_INT
26296 arm_shift_truncation_mask (machine_mode mode)
26297 {
26298   return mode == SImode ? 255 : 0;
26299 }
26300
26301
26302 /* Map internal gcc register numbers to DWARF2 register numbers.  */
26303
26304 unsigned int
26305 arm_dbx_register_number (unsigned int regno)
26306 {
26307   if (regno < 16)
26308     return regno;
26309
26310   if (IS_VFP_REGNUM (regno))
26311     {
26312       /* See comment in arm_dwarf_register_span.  */
26313       if (VFP_REGNO_OK_FOR_SINGLE (regno))
26314         return 64 + regno - FIRST_VFP_REGNUM;
26315       else
26316         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26317     }
26318
26319   if (IS_IWMMXT_GR_REGNUM (regno))
26320     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26321
26322   if (IS_IWMMXT_REGNUM (regno))
26323     return 112 + regno - FIRST_IWMMXT_REGNUM;
26324
26325   gcc_unreachable ();
26326 }
26327
26328 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26329    GCC models tham as 64 32-bit registers, so we need to describe this to
26330    the DWARF generation code.  Other registers can use the default.  */
26331 static rtx
26332 arm_dwarf_register_span (rtx rtl)
26333 {
26334   machine_mode mode;
26335   unsigned regno;
26336   rtx parts[16];
26337   int nregs;
26338   int i;
26339
26340   regno = REGNO (rtl);
26341   if (!IS_VFP_REGNUM (regno))
26342     return NULL_RTX;
26343
26344   /* XXX FIXME: The EABI defines two VFP register ranges:
26345         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26346         256-287: D0-D31
26347      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26348      corresponding D register.  Until GDB supports this, we shall use the
26349      legacy encodings.  We also use these encodings for D0-D15 for
26350      compatibility with older debuggers.  */
26351   mode = GET_MODE (rtl);
26352   if (GET_MODE_SIZE (mode) < 8)
26353     return NULL_RTX;
26354
26355   if (VFP_REGNO_OK_FOR_SINGLE (regno))
26356     {
26357       nregs = GET_MODE_SIZE (mode) / 4;
26358       for (i = 0; i < nregs; i += 2)
26359         if (TARGET_BIG_END)
26360           {
26361             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26362             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26363           }
26364         else
26365           {
26366             parts[i] = gen_rtx_REG (SImode, regno + i);
26367             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26368           }
26369     }
26370   else
26371     {
26372       nregs = GET_MODE_SIZE (mode) / 8;
26373       for (i = 0; i < nregs; i++)
26374         parts[i] = gen_rtx_REG (DImode, regno + i);
26375     }
26376
26377   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26378 }
26379
26380 #if ARM_UNWIND_INFO
26381 /* Emit unwind directives for a store-multiple instruction or stack pointer
26382    push during alignment.
26383    These should only ever be generated by the function prologue code, so
26384    expect them to have a particular form.
26385    The store-multiple instruction sometimes pushes pc as the last register,
26386    although it should not be tracked into unwind information, or for -Os
26387    sometimes pushes some dummy registers before first register that needs
26388    to be tracked in unwind information; such dummy registers are there just
26389    to avoid separate stack adjustment, and will not be restored in the
26390    epilogue.  */
26391
26392 static void
26393 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26394 {
26395   int i;
26396   HOST_WIDE_INT offset;
26397   HOST_WIDE_INT nregs;
26398   int reg_size;
26399   unsigned reg;
26400   unsigned lastreg;
26401   unsigned padfirst = 0, padlast = 0;
26402   rtx e;
26403
26404   e = XVECEXP (p, 0, 0);
26405   gcc_assert (GET_CODE (e) == SET);
26406
26407   /* First insn will adjust the stack pointer.  */
26408   gcc_assert (GET_CODE (e) == SET
26409               && REG_P (SET_DEST (e))
26410               && REGNO (SET_DEST (e)) == SP_REGNUM
26411               && GET_CODE (SET_SRC (e)) == PLUS);
26412
26413   offset = -INTVAL (XEXP (SET_SRC (e), 1));
26414   nregs = XVECLEN (p, 0) - 1;
26415   gcc_assert (nregs);
26416
26417   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26418   if (reg < 16)
26419     {
26420       /* For -Os dummy registers can be pushed at the beginning to
26421          avoid separate stack pointer adjustment.  */
26422       e = XVECEXP (p, 0, 1);
26423       e = XEXP (SET_DEST (e), 0);
26424       if (GET_CODE (e) == PLUS)
26425         padfirst = INTVAL (XEXP (e, 1));
26426       gcc_assert (padfirst == 0 || optimize_size);
26427       /* The function prologue may also push pc, but not annotate it as it is
26428          never restored.  We turn this into a stack pointer adjustment.  */
26429       e = XVECEXP (p, 0, nregs);
26430       e = XEXP (SET_DEST (e), 0);
26431       if (GET_CODE (e) == PLUS)
26432         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26433       else
26434         padlast = offset - 4;
26435       gcc_assert (padlast == 0 || padlast == 4);
26436       if (padlast == 4)
26437         fprintf (asm_out_file, "\t.pad #4\n");
26438       reg_size = 4;
26439       fprintf (asm_out_file, "\t.save {");
26440     }
26441   else if (IS_VFP_REGNUM (reg))
26442     {
26443       reg_size = 8;
26444       fprintf (asm_out_file, "\t.vsave {");
26445     }
26446   else
26447     /* Unknown register type.  */
26448     gcc_unreachable ();
26449
26450   /* If the stack increment doesn't match the size of the saved registers,
26451      something has gone horribly wrong.  */
26452   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26453
26454   offset = padfirst;
26455   lastreg = 0;
26456   /* The remaining insns will describe the stores.  */
26457   for (i = 1; i <= nregs; i++)
26458     {
26459       /* Expect (set (mem <addr>) (reg)).
26460          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
26461       e = XVECEXP (p, 0, i);
26462       gcc_assert (GET_CODE (e) == SET
26463                   && MEM_P (SET_DEST (e))
26464                   && REG_P (SET_SRC (e)));
26465
26466       reg = REGNO (SET_SRC (e));
26467       gcc_assert (reg >= lastreg);
26468
26469       if (i != 1)
26470         fprintf (asm_out_file, ", ");
26471       /* We can't use %r for vfp because we need to use the
26472          double precision register names.  */
26473       if (IS_VFP_REGNUM (reg))
26474         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26475       else
26476         asm_fprintf (asm_out_file, "%r", reg);
26477
26478 #ifdef ENABLE_CHECKING
26479       /* Check that the addresses are consecutive.  */
26480       e = XEXP (SET_DEST (e), 0);
26481       if (GET_CODE (e) == PLUS)
26482         gcc_assert (REG_P (XEXP (e, 0))
26483                     && REGNO (XEXP (e, 0)) == SP_REGNUM
26484                     && CONST_INT_P (XEXP (e, 1))
26485                     && offset == INTVAL (XEXP (e, 1)));
26486       else
26487         gcc_assert (i == 1
26488                     && REG_P (e)
26489                     && REGNO (e) == SP_REGNUM);
26490       offset += reg_size;
26491 #endif
26492     }
26493   fprintf (asm_out_file, "}\n");
26494   if (padfirst)
26495     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26496 }
26497
26498 /*  Emit unwind directives for a SET.  */
26499
26500 static void
26501 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26502 {
26503   rtx e0;
26504   rtx e1;
26505   unsigned reg;
26506
26507   e0 = XEXP (p, 0);
26508   e1 = XEXP (p, 1);
26509   switch (GET_CODE (e0))
26510     {
26511     case MEM:
26512       /* Pushing a single register.  */
26513       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26514           || !REG_P (XEXP (XEXP (e0, 0), 0))
26515           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26516         abort ();
26517
26518       asm_fprintf (asm_out_file, "\t.save ");
26519       if (IS_VFP_REGNUM (REGNO (e1)))
26520         asm_fprintf(asm_out_file, "{d%d}\n",
26521                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26522       else
26523         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26524       break;
26525
26526     case REG:
26527       if (REGNO (e0) == SP_REGNUM)
26528         {
26529           /* A stack increment.  */
26530           if (GET_CODE (e1) != PLUS
26531               || !REG_P (XEXP (e1, 0))
26532               || REGNO (XEXP (e1, 0)) != SP_REGNUM
26533               || !CONST_INT_P (XEXP (e1, 1)))
26534             abort ();
26535
26536           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26537                        -INTVAL (XEXP (e1, 1)));
26538         }
26539       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26540         {
26541           HOST_WIDE_INT offset;
26542
26543           if (GET_CODE (e1) == PLUS)
26544             {
26545               if (!REG_P (XEXP (e1, 0))
26546                   || !CONST_INT_P (XEXP (e1, 1)))
26547                 abort ();
26548               reg = REGNO (XEXP (e1, 0));
26549               offset = INTVAL (XEXP (e1, 1));
26550               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26551                            HARD_FRAME_POINTER_REGNUM, reg,
26552                            offset);
26553             }
26554           else if (REG_P (e1))
26555             {
26556               reg = REGNO (e1);
26557               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26558                            HARD_FRAME_POINTER_REGNUM, reg);
26559             }
26560           else
26561             abort ();
26562         }
26563       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26564         {
26565           /* Move from sp to reg.  */
26566           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26567         }
26568      else if (GET_CODE (e1) == PLUS
26569               && REG_P (XEXP (e1, 0))
26570               && REGNO (XEXP (e1, 0)) == SP_REGNUM
26571               && CONST_INT_P (XEXP (e1, 1)))
26572         {
26573           /* Set reg to offset from sp.  */
26574           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26575                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26576         }
26577       else
26578         abort ();
26579       break;
26580
26581     default:
26582       abort ();
26583     }
26584 }
26585
26586
26587 /* Emit unwind directives for the given insn.  */
26588
26589 static void
26590 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26591 {
26592   rtx note, pat;
26593   bool handled_one = false;
26594
26595   if (arm_except_unwind_info (&global_options) != UI_TARGET)
26596     return;
26597
26598   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26599       && (TREE_NOTHROW (current_function_decl)
26600           || crtl->all_throwers_are_sibcalls))
26601     return;
26602
26603   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26604     return;
26605
26606   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26607     {
26608       switch (REG_NOTE_KIND (note))
26609         {
26610         case REG_FRAME_RELATED_EXPR:
26611           pat = XEXP (note, 0);
26612           goto found;
26613
26614         case REG_CFA_REGISTER:
26615           pat = XEXP (note, 0);
26616           if (pat == NULL)
26617             {
26618               pat = PATTERN (insn);
26619               if (GET_CODE (pat) == PARALLEL)
26620                 pat = XVECEXP (pat, 0, 0);
26621             }
26622
26623           /* Only emitted for IS_STACKALIGN re-alignment.  */
26624           {
26625             rtx dest, src;
26626             unsigned reg;
26627
26628             src = SET_SRC (pat);
26629             dest = SET_DEST (pat);
26630
26631             gcc_assert (src == stack_pointer_rtx);
26632             reg = REGNO (dest);
26633             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26634                          reg + 0x90, reg);
26635           }
26636           handled_one = true;
26637           break;
26638
26639         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
26640            to get correct dwarf information for shrink-wrap.  We should not
26641            emit unwind information for it because these are used either for
26642            pretend arguments or notes to adjust sp and restore registers from
26643            stack.  */
26644         case REG_CFA_DEF_CFA:
26645         case REG_CFA_ADJUST_CFA:
26646         case REG_CFA_RESTORE:
26647           return;
26648
26649         case REG_CFA_EXPRESSION:
26650         case REG_CFA_OFFSET:
26651           /* ??? Only handling here what we actually emit.  */
26652           gcc_unreachable ();
26653
26654         default:
26655           break;
26656         }
26657     }
26658   if (handled_one)
26659     return;
26660   pat = PATTERN (insn);
26661  found:
26662
26663   switch (GET_CODE (pat))
26664     {
26665     case SET:
26666       arm_unwind_emit_set (asm_out_file, pat);
26667       break;
26668
26669     case SEQUENCE:
26670       /* Store multiple.  */
26671       arm_unwind_emit_sequence (asm_out_file, pat);
26672       break;
26673
26674     default:
26675       abort();
26676     }
26677 }
26678
26679
26680 /* Output a reference from a function exception table to the type_info
26681    object X.  The EABI specifies that the symbol should be relocated by
26682    an R_ARM_TARGET2 relocation.  */
26683
26684 static bool
26685 arm_output_ttype (rtx x)
26686 {
26687   fputs ("\t.word\t", asm_out_file);
26688   output_addr_const (asm_out_file, x);
26689   /* Use special relocations for symbol references.  */
26690   if (!CONST_INT_P (x))
26691     fputs ("(TARGET2)", asm_out_file);
26692   fputc ('\n', asm_out_file);
26693
26694   return TRUE;
26695 }
26696
26697 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
26698
26699 static void
26700 arm_asm_emit_except_personality (rtx personality)
26701 {
26702   fputs ("\t.personality\t", asm_out_file);
26703   output_addr_const (asm_out_file, personality);
26704   fputc ('\n', asm_out_file);
26705 }
26706
26707 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
26708
26709 static void
26710 arm_asm_init_sections (void)
26711 {
26712   exception_section = get_unnamed_section (0, output_section_asm_op,
26713                                            "\t.handlerdata");
26714 }
26715 #endif /* ARM_UNWIND_INFO */
26716
26717 /* Output unwind directives for the start/end of a function.  */
26718
26719 void
26720 arm_output_fn_unwind (FILE * f, bool prologue)
26721 {
26722   if (arm_except_unwind_info (&global_options) != UI_TARGET)
26723     return;
26724
26725   if (prologue)
26726     fputs ("\t.fnstart\n", f);
26727   else
26728     {
26729       /* If this function will never be unwound, then mark it as such.
26730          The came condition is used in arm_unwind_emit to suppress
26731          the frame annotations.  */
26732       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26733           && (TREE_NOTHROW (current_function_decl)
26734               || crtl->all_throwers_are_sibcalls))
26735         fputs("\t.cantunwind\n", f);
26736
26737       fputs ("\t.fnend\n", f);
26738     }
26739 }
26740
26741 static bool
26742 arm_emit_tls_decoration (FILE *fp, rtx x)
26743 {
26744   enum tls_reloc reloc;
26745   rtx val;
26746
26747   val = XVECEXP (x, 0, 0);
26748   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26749
26750   output_addr_const (fp, val);
26751
26752   switch (reloc)
26753     {
26754     case TLS_GD32:
26755       fputs ("(tlsgd)", fp);
26756       break;
26757     case TLS_LDM32:
26758       fputs ("(tlsldm)", fp);
26759       break;
26760     case TLS_LDO32:
26761       fputs ("(tlsldo)", fp);
26762       break;
26763     case TLS_IE32:
26764       fputs ("(gottpoff)", fp);
26765       break;
26766     case TLS_LE32:
26767       fputs ("(tpoff)", fp);
26768       break;
26769     case TLS_DESCSEQ:
26770       fputs ("(tlsdesc)", fp);
26771       break;
26772     default:
26773       gcc_unreachable ();
26774     }
26775
26776   switch (reloc)
26777     {
26778     case TLS_GD32:
26779     case TLS_LDM32:
26780     case TLS_IE32:
26781     case TLS_DESCSEQ:
26782       fputs (" + (. - ", fp);
26783       output_addr_const (fp, XVECEXP (x, 0, 2));
26784       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26785       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26786       output_addr_const (fp, XVECEXP (x, 0, 3));
26787       fputc (')', fp);
26788       break;
26789     default:
26790       break;
26791     }
26792
26793   return TRUE;
26794 }
26795
26796 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
26797
26798 static void
26799 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26800 {
26801   gcc_assert (size == 4);
26802   fputs ("\t.word\t", file);
26803   output_addr_const (file, x);
26804   fputs ("(tlsldo)", file);
26805 }
26806
26807 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
26808
26809 static bool
26810 arm_output_addr_const_extra (FILE *fp, rtx x)
26811 {
26812   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26813     return arm_emit_tls_decoration (fp, x);
26814   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26815     {
26816       char label[256];
26817       int labelno = INTVAL (XVECEXP (x, 0, 0));
26818
26819       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26820       assemble_name_raw (fp, label);
26821
26822       return TRUE;
26823     }
26824   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26825     {
26826       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26827       if (GOT_PCREL)
26828         fputs ("+.", fp);
26829       fputs ("-(", fp);
26830       output_addr_const (fp, XVECEXP (x, 0, 0));
26831       fputc (')', fp);
26832       return TRUE;
26833     }
26834   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26835     {
26836       output_addr_const (fp, XVECEXP (x, 0, 0));
26837       if (GOT_PCREL)
26838         fputs ("+.", fp);
26839       fputs ("-(", fp);
26840       output_addr_const (fp, XVECEXP (x, 0, 1));
26841       fputc (')', fp);
26842       return TRUE;
26843     }
26844   else if (GET_CODE (x) == CONST_VECTOR)
26845     return arm_emit_vector_const (fp, x);
26846
26847   return FALSE;
26848 }
26849
26850 /* Output assembly for a shift instruction.
26851    SET_FLAGS determines how the instruction modifies the condition codes.
26852    0 - Do not set condition codes.
26853    1 - Set condition codes.
26854    2 - Use smallest instruction.  */
26855 const char *
26856 arm_output_shift(rtx * operands, int set_flags)
26857 {
26858   char pattern[100];
26859   static const char flag_chars[3] = {'?', '.', '!'};
26860   const char *shift;
26861   HOST_WIDE_INT val;
26862   char c;
26863
26864   c = flag_chars[set_flags];
26865   if (TARGET_UNIFIED_ASM)
26866     {
26867       shift = shift_op(operands[3], &val);
26868       if (shift)
26869         {
26870           if (val != -1)
26871             operands[2] = GEN_INT(val);
26872           sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26873         }
26874       else
26875         sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26876     }
26877   else
26878     sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26879   output_asm_insn (pattern, operands);
26880   return "";
26881 }
26882
26883 /* Output assembly for a WMMX immediate shift instruction.  */
26884 const char *
26885 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26886 {
26887   int shift = INTVAL (operands[2]);
26888   char templ[50];
26889   machine_mode opmode = GET_MODE (operands[0]);
26890
26891   gcc_assert (shift >= 0);
26892
26893   /* If the shift value in the register versions is > 63 (for D qualifier),
26894      31 (for W qualifier) or 15 (for H qualifier).  */
26895   if (((opmode == V4HImode) && (shift > 15))
26896         || ((opmode == V2SImode) && (shift > 31))
26897         || ((opmode == DImode) && (shift > 63)))
26898   {
26899     if (wror_or_wsra)
26900       {
26901         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26902         output_asm_insn (templ, operands);
26903         if (opmode == DImode)
26904           {
26905             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26906             output_asm_insn (templ, operands);
26907           }
26908       }
26909     else
26910       {
26911         /* The destination register will contain all zeros.  */
26912         sprintf (templ, "wzero\t%%0");
26913         output_asm_insn (templ, operands);
26914       }
26915     return "";
26916   }
26917
26918   if ((opmode == DImode) && (shift > 32))
26919     {
26920       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26921       output_asm_insn (templ, operands);
26922       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26923       output_asm_insn (templ, operands);
26924     }
26925   else
26926     {
26927       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26928       output_asm_insn (templ, operands);
26929     }
26930   return "";
26931 }
26932
26933 /* Output assembly for a WMMX tinsr instruction.  */
26934 const char *
26935 arm_output_iwmmxt_tinsr (rtx *operands)
26936 {
26937   int mask = INTVAL (operands[3]);
26938   int i;
26939   char templ[50];
26940   int units = mode_nunits[GET_MODE (operands[0])];
26941   gcc_assert ((mask & (mask - 1)) == 0);
26942   for (i = 0; i < units; ++i)
26943     {
26944       if ((mask & 0x01) == 1)
26945         {
26946           break;
26947         }
26948       mask >>= 1;
26949     }
26950   gcc_assert (i < units);
26951   {
26952     switch (GET_MODE (operands[0]))
26953       {
26954       case V8QImode:
26955         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26956         break;
26957       case V4HImode:
26958         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26959         break;
26960       case V2SImode:
26961         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26962         break;
26963       default:
26964         gcc_unreachable ();
26965         break;
26966       }
26967     output_asm_insn (templ, operands);
26968   }
26969   return "";
26970 }
26971
26972 /* Output a Thumb-1 casesi dispatch sequence.  */
26973 const char *
26974 thumb1_output_casesi (rtx *operands)
26975 {
26976   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
26977
26978   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26979
26980   switch (GET_MODE(diff_vec))
26981     {
26982     case QImode:
26983       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26984               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26985     case HImode:
26986       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26987               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26988     case SImode:
26989       return "bl\t%___gnu_thumb1_case_si";
26990     default:
26991       gcc_unreachable ();
26992     }
26993 }
26994
26995 /* Output a Thumb-2 casesi instruction.  */
26996 const char *
26997 thumb2_output_casesi (rtx *operands)
26998 {
26999   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27000
27001   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27002
27003   output_asm_insn ("cmp\t%0, %1", operands);
27004   output_asm_insn ("bhi\t%l3", operands);
27005   switch (GET_MODE(diff_vec))
27006     {
27007     case QImode:
27008       return "tbb\t[%|pc, %0]";
27009     case HImode:
27010       return "tbh\t[%|pc, %0, lsl #1]";
27011     case SImode:
27012       if (flag_pic)
27013         {
27014           output_asm_insn ("adr\t%4, %l2", operands);
27015           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27016           output_asm_insn ("add\t%4, %4, %5", operands);
27017           return "bx\t%4";
27018         }
27019       else
27020         {
27021           output_asm_insn ("adr\t%4, %l2", operands);
27022           return "ldr\t%|pc, [%4, %0, lsl #2]";
27023         }
27024     default:
27025       gcc_unreachable ();
27026     }
27027 }
27028
27029 /* Most ARM cores are single issue, but some newer ones can dual issue.
27030    The scheduler descriptions rely on this being correct.  */
27031 static int
27032 arm_issue_rate (void)
27033 {
27034   switch (arm_tune)
27035     {
27036     case cortexa15:
27037     case cortexa57:
27038       return 3;
27039
27040     case cortexm7:
27041     case cortexr4:
27042     case cortexr4f:
27043     case cortexr5:
27044     case genericv7a:
27045     case cortexa5:
27046     case cortexa7:
27047     case cortexa8:
27048     case cortexa9:
27049     case cortexa12:
27050     case cortexa17:
27051     case cortexa53:
27052     case fa726te:
27053     case marvell_pj4:
27054       return 2;
27055
27056     default:
27057       return 1;
27058     }
27059 }
27060
27061 const char *
27062 arm_mangle_type (const_tree type)
27063 {
27064   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27065      has to be managled as if it is in the "std" namespace.  */
27066   if (TARGET_AAPCS_BASED
27067       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27068     return "St9__va_list";
27069
27070   /* Half-precision float.  */
27071   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27072     return "Dh";
27073
27074   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27075      builtin type.  */
27076   if (TYPE_NAME (type) != NULL)
27077     return arm_mangle_builtin_type (type);
27078
27079   /* Use the default mangling.  */
27080   return NULL;
27081 }
27082
27083 /* Order of allocation of core registers for Thumb: this allocation is
27084    written over the corresponding initial entries of the array
27085    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27086    first.  Saving and restoring a low register is usually cheaper than
27087    using a call-clobbered high register.  */
27088
27089 static const int thumb_core_reg_alloc_order[] =
27090 {
27091    3,  2,  1,  0,  4,  5,  6,  7,
27092   14, 12,  8,  9, 10, 11
27093 };
27094
27095 /* Adjust register allocation order when compiling for Thumb.  */
27096
27097 void
27098 arm_order_regs_for_local_alloc (void)
27099 {
27100   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27101   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27102   if (TARGET_THUMB)
27103     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27104             sizeof (thumb_core_reg_alloc_order));
27105 }
27106
27107 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27108
27109 bool
27110 arm_frame_pointer_required (void)
27111 {
27112   return (cfun->has_nonlocal_label
27113           || SUBTARGET_FRAME_POINTER_REQUIRED
27114           || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27115 }
27116
27117 /* Only thumb1 can't support conditional execution, so return true if
27118    the target is not thumb1.  */
27119 static bool
27120 arm_have_conditional_execution (void)
27121 {
27122   return !TARGET_THUMB1;
27123 }
27124
27125 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27126 static HOST_WIDE_INT
27127 arm_vector_alignment (const_tree type)
27128 {
27129   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27130
27131   if (TARGET_AAPCS_BASED)
27132     align = MIN (align, 64);
27133
27134   return align;
27135 }
27136
27137 static unsigned int
27138 arm_autovectorize_vector_sizes (void)
27139 {
27140   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27141 }
27142
27143 static bool
27144 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27145 {
27146   /* Vectors which aren't in packed structures will not be less aligned than
27147      the natural alignment of their element type, so this is safe.  */
27148   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27149     return !is_packed;
27150
27151   return default_builtin_vector_alignment_reachable (type, is_packed);
27152 }
27153
27154 static bool
27155 arm_builtin_support_vector_misalignment (machine_mode mode,
27156                                          const_tree type, int misalignment,
27157                                          bool is_packed)
27158 {
27159   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27160     {
27161       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27162
27163       if (is_packed)
27164         return align == 1;
27165
27166       /* If the misalignment is unknown, we should be able to handle the access
27167          so long as it is not to a member of a packed data structure.  */
27168       if (misalignment == -1)
27169         return true;
27170
27171       /* Return true if the misalignment is a multiple of the natural alignment
27172          of the vector's element type.  This is probably always going to be
27173          true in practice, since we've already established that this isn't a
27174          packed access.  */
27175       return ((misalignment % align) == 0);
27176     }
27177
27178   return default_builtin_support_vector_misalignment (mode, type, misalignment,
27179                                                       is_packed);
27180 }
27181
27182 static void
27183 arm_conditional_register_usage (void)
27184 {
27185   int regno;
27186
27187   if (TARGET_THUMB1 && optimize_size)
27188     {
27189       /* When optimizing for size on Thumb-1, it's better not
27190         to use the HI regs, because of the overhead of
27191         stacking them.  */
27192       for (regno = FIRST_HI_REGNUM;
27193            regno <= LAST_HI_REGNUM; ++regno)
27194         fixed_regs[regno] = call_used_regs[regno] = 1;
27195     }
27196
27197   /* The link register can be clobbered by any branch insn,
27198      but we have no way to track that at present, so mark
27199      it as unavailable.  */
27200   if (TARGET_THUMB1)
27201     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27202
27203   if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27204     {
27205       /* VFPv3 registers are disabled when earlier VFP
27206          versions are selected due to the definition of
27207          LAST_VFP_REGNUM.  */
27208       for (regno = FIRST_VFP_REGNUM;
27209            regno <= LAST_VFP_REGNUM; ++ regno)
27210         {
27211           fixed_regs[regno] = 0;
27212           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27213             || regno >= FIRST_VFP_REGNUM + 32;
27214         }
27215     }
27216
27217   if (TARGET_REALLY_IWMMXT)
27218     {
27219       regno = FIRST_IWMMXT_GR_REGNUM;
27220       /* The 2002/10/09 revision of the XScale ABI has wCG0
27221          and wCG1 as call-preserved registers.  The 2002/11/21
27222          revision changed this so that all wCG registers are
27223          scratch registers.  */
27224       for (regno = FIRST_IWMMXT_GR_REGNUM;
27225            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27226         fixed_regs[regno] = 0;
27227       /* The XScale ABI has wR0 - wR9 as scratch registers,
27228          the rest as call-preserved registers.  */
27229       for (regno = FIRST_IWMMXT_REGNUM;
27230            regno <= LAST_IWMMXT_REGNUM; ++ regno)
27231         {
27232           fixed_regs[regno] = 0;
27233           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27234         }
27235     }
27236
27237   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27238     {
27239       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27240       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27241     }
27242   else if (TARGET_APCS_STACK)
27243     {
27244       fixed_regs[10]     = 1;
27245       call_used_regs[10] = 1;
27246     }
27247   /* -mcaller-super-interworking reserves r11 for calls to
27248      _interwork_r11_call_via_rN().  Making the register global
27249      is an easy way of ensuring that it remains valid for all
27250      calls.  */
27251   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27252       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27253     {
27254       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27255       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27256       if (TARGET_CALLER_INTERWORKING)
27257         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27258     }
27259   SUBTARGET_CONDITIONAL_REGISTER_USAGE
27260 }
27261
27262 static reg_class_t
27263 arm_preferred_rename_class (reg_class_t rclass)
27264 {
27265   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27266      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
27267      and code size can be reduced.  */
27268   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27269     return LO_REGS;
27270   else
27271     return NO_REGS;
27272 }
27273
27274 /* Compute the atrribute "length" of insn "*push_multi".
27275    So this function MUST be kept in sync with that insn pattern.  */
27276 int
27277 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27278 {
27279   int i, regno, hi_reg;
27280   int num_saves = XVECLEN (parallel_op, 0);
27281
27282   /* ARM mode.  */
27283   if (TARGET_ARM)
27284     return 4;
27285   /* Thumb1 mode.  */
27286   if (TARGET_THUMB1)
27287     return 2;
27288
27289   /* Thumb2 mode.  */
27290   regno = REGNO (first_op);
27291   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27292   for (i = 1; i < num_saves && !hi_reg; i++)
27293     {
27294       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27295       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27296     }
27297
27298   if (!hi_reg)
27299     return 2;
27300   return 4;
27301 }
27302
27303 /* Compute the number of instructions emitted by output_move_double.  */
27304 int
27305 arm_count_output_move_double_insns (rtx *operands)
27306 {
27307   int count;
27308   rtx ops[2];
27309   /* output_move_double may modify the operands array, so call it
27310      here on a copy of the array.  */
27311   ops[0] = operands[0];
27312   ops[1] = operands[1];
27313   output_move_double (ops, false, &count);
27314   return count;
27315 }
27316
27317 int
27318 vfp3_const_double_for_fract_bits (rtx operand)
27319 {
27320   REAL_VALUE_TYPE r0;
27321
27322   if (!CONST_DOUBLE_P (operand))
27323     return 0;
27324
27325   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27326   if (exact_real_inverse (DFmode, &r0))
27327     {
27328       if (exact_real_truncate (DFmode, &r0))
27329         {
27330           HOST_WIDE_INT value = real_to_integer (&r0);
27331           value = value & 0xffffffff;
27332           if ((value != 0) && ( (value & (value - 1)) == 0))
27333             return int_log2 (value);
27334         }
27335     }
27336   return 0;
27337 }
27338
27339 int
27340 vfp3_const_double_for_bits (rtx operand)
27341 {
27342   REAL_VALUE_TYPE r0;
27343
27344   if (!CONST_DOUBLE_P (operand))
27345     return 0;
27346
27347   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27348   if (exact_real_truncate (DFmode, &r0))
27349     {
27350       HOST_WIDE_INT value = real_to_integer (&r0);
27351       value = value & 0xffffffff;
27352       if ((value != 0) && ( (value & (value - 1)) == 0))
27353         return int_log2 (value);
27354     }
27355
27356   return 0;
27357 }
27358 \f
27359 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
27360
27361 static void
27362 arm_pre_atomic_barrier (enum memmodel model)
27363 {
27364   if (need_atomic_barrier_p (model, true))
27365     emit_insn (gen_memory_barrier ());
27366 }
27367
27368 static void
27369 arm_post_atomic_barrier (enum memmodel model)
27370 {
27371   if (need_atomic_barrier_p (model, false))
27372     emit_insn (gen_memory_barrier ());
27373 }
27374
27375 /* Emit the load-exclusive and store-exclusive instructions.
27376    Use acquire and release versions if necessary.  */
27377
27378 static void
27379 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27380 {
27381   rtx (*gen) (rtx, rtx);
27382
27383   if (acq)
27384     {
27385       switch (mode)
27386         {
27387         case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27388         case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27389         case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27390         case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27391         default:
27392           gcc_unreachable ();
27393         }
27394     }
27395   else
27396     {
27397       switch (mode)
27398         {
27399         case QImode: gen = gen_arm_load_exclusiveqi; break;
27400         case HImode: gen = gen_arm_load_exclusivehi; break;
27401         case SImode: gen = gen_arm_load_exclusivesi; break;
27402         case DImode: gen = gen_arm_load_exclusivedi; break;
27403         default:
27404           gcc_unreachable ();
27405         }
27406     }
27407
27408   emit_insn (gen (rval, mem));
27409 }
27410
27411 static void
27412 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27413                           rtx mem, bool rel)
27414 {
27415   rtx (*gen) (rtx, rtx, rtx);
27416
27417   if (rel)
27418     {
27419       switch (mode)
27420         {
27421         case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27422         case HImode: gen = gen_arm_store_release_exclusivehi; break;
27423         case SImode: gen = gen_arm_store_release_exclusivesi; break;
27424         case DImode: gen = gen_arm_store_release_exclusivedi; break;
27425         default:
27426           gcc_unreachable ();
27427         }
27428     }
27429   else
27430     {
27431       switch (mode)
27432         {
27433         case QImode: gen = gen_arm_store_exclusiveqi; break;
27434         case HImode: gen = gen_arm_store_exclusivehi; break;
27435         case SImode: gen = gen_arm_store_exclusivesi; break;
27436         case DImode: gen = gen_arm_store_exclusivedi; break;
27437         default:
27438           gcc_unreachable ();
27439         }
27440     }
27441
27442   emit_insn (gen (bval, rval, mem));
27443 }
27444
27445 /* Mark the previous jump instruction as unlikely.  */
27446
27447 static void
27448 emit_unlikely_jump (rtx insn)
27449 {
27450   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27451
27452   insn = emit_jump_insn (insn);
27453   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27454 }
27455
27456 /* Expand a compare and swap pattern.  */
27457
27458 void
27459 arm_expand_compare_and_swap (rtx operands[])
27460 {
27461   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27462   machine_mode mode;
27463   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27464
27465   bval = operands[0];
27466   rval = operands[1];
27467   mem = operands[2];
27468   oldval = operands[3];
27469   newval = operands[4];
27470   is_weak = operands[5];
27471   mod_s = operands[6];
27472   mod_f = operands[7];
27473   mode = GET_MODE (mem);
27474
27475   /* Normally the succ memory model must be stronger than fail, but in the
27476      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27477      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
27478
27479   if (TARGET_HAVE_LDACQ
27480       && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27481       && INTVAL (mod_s) == MEMMODEL_RELEASE)
27482     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27483
27484   switch (mode)
27485     {
27486     case QImode:
27487     case HImode:
27488       /* For narrow modes, we're going to perform the comparison in SImode,
27489          so do the zero-extension now.  */
27490       rval = gen_reg_rtx (SImode);
27491       oldval = convert_modes (SImode, mode, oldval, true);
27492       /* FALLTHRU */
27493
27494     case SImode:
27495       /* Force the value into a register if needed.  We waited until after
27496          the zero-extension above to do this properly.  */
27497       if (!arm_add_operand (oldval, SImode))
27498         oldval = force_reg (SImode, oldval);
27499       break;
27500
27501     case DImode:
27502       if (!cmpdi_operand (oldval, mode))
27503         oldval = force_reg (mode, oldval);
27504       break;
27505
27506     default:
27507       gcc_unreachable ();
27508     }
27509
27510   switch (mode)
27511     {
27512     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27513     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27514     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27515     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27516     default:
27517       gcc_unreachable ();
27518     }
27519
27520   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27521
27522   if (mode == QImode || mode == HImode)
27523     emit_move_insn (operands[1], gen_lowpart (mode, rval));
27524
27525   /* In all cases, we arrange for success to be signaled by Z set.
27526      This arrangement allows for the boolean result to be used directly
27527      in a subsequent branch, post optimization.  */
27528   x = gen_rtx_REG (CCmode, CC_REGNUM);
27529   x = gen_rtx_EQ (SImode, x, const0_rtx);
27530   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27531 }
27532
27533 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
27534    another memory store between the load-exclusive and store-exclusive can
27535    reset the monitor from Exclusive to Open state.  This means we must wait
27536    until after reload to split the pattern, lest we get a register spill in
27537    the middle of the atomic sequence.  */
27538
27539 void
27540 arm_split_compare_and_swap (rtx operands[])
27541 {
27542   rtx rval, mem, oldval, newval, scratch;
27543   machine_mode mode;
27544   enum memmodel mod_s, mod_f;
27545   bool is_weak;
27546   rtx_code_label *label1, *label2;
27547   rtx x, cond;
27548
27549   rval = operands[0];
27550   mem = operands[1];
27551   oldval = operands[2];
27552   newval = operands[3];
27553   is_weak = (operands[4] != const0_rtx);
27554   mod_s = (enum memmodel) INTVAL (operands[5]);
27555   mod_f = (enum memmodel) INTVAL (operands[6]);
27556   scratch = operands[7];
27557   mode = GET_MODE (mem);
27558
27559   bool use_acquire = TARGET_HAVE_LDACQ
27560                      && !(mod_s == MEMMODEL_RELAXED
27561                           || mod_s == MEMMODEL_CONSUME
27562                           || mod_s == MEMMODEL_RELEASE);
27563
27564   bool use_release = TARGET_HAVE_LDACQ
27565                      && !(mod_s == MEMMODEL_RELAXED
27566                           || mod_s == MEMMODEL_CONSUME
27567                           || mod_s == MEMMODEL_ACQUIRE);
27568
27569   /* Checks whether a barrier is needed and emits one accordingly.  */
27570   if (!(use_acquire || use_release))
27571     arm_pre_atomic_barrier (mod_s);
27572
27573   label1 = NULL;
27574   if (!is_weak)
27575     {
27576       label1 = gen_label_rtx ();
27577       emit_label (label1);
27578     }
27579   label2 = gen_label_rtx ();
27580
27581   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27582
27583   cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27584   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27585   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27586                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27587   emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27588
27589   arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27590
27591   /* Weak or strong, we want EQ to be true for success, so that we
27592      match the flags that we got from the compare above.  */
27593   cond = gen_rtx_REG (CCmode, CC_REGNUM);
27594   x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27595   emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27596
27597   if (!is_weak)
27598     {
27599       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27600       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27601                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27602       emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27603     }
27604
27605   if (mod_f != MEMMODEL_RELAXED)
27606     emit_label (label2);
27607
27608   /* Checks whether a barrier is needed and emits one accordingly.  */
27609   if (!(use_acquire || use_release))
27610     arm_post_atomic_barrier (mod_s);
27611
27612   if (mod_f == MEMMODEL_RELAXED)
27613     emit_label (label2);
27614 }
27615
27616 void
27617 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27618                      rtx value, rtx model_rtx, rtx cond)
27619 {
27620   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27621   machine_mode mode = GET_MODE (mem);
27622   machine_mode wmode = (mode == DImode ? DImode : SImode);
27623   rtx_code_label *label;
27624   rtx x;
27625
27626   bool use_acquire = TARGET_HAVE_LDACQ
27627                      && !(model == MEMMODEL_RELAXED
27628                           || model == MEMMODEL_CONSUME
27629                           || model == MEMMODEL_RELEASE);
27630
27631   bool use_release = TARGET_HAVE_LDACQ
27632                      && !(model == MEMMODEL_RELAXED
27633                           || model == MEMMODEL_CONSUME
27634                           || model == MEMMODEL_ACQUIRE);
27635
27636   /* Checks whether a barrier is needed and emits one accordingly.  */
27637   if (!(use_acquire || use_release))
27638     arm_pre_atomic_barrier (model);
27639
27640   label = gen_label_rtx ();
27641   emit_label (label);
27642
27643   if (new_out)
27644     new_out = gen_lowpart (wmode, new_out);
27645   if (old_out)
27646     old_out = gen_lowpart (wmode, old_out);
27647   else
27648     old_out = new_out;
27649   value = simplify_gen_subreg (wmode, value, mode, 0);
27650
27651   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27652
27653   switch (code)
27654     {
27655     case SET:
27656       new_out = value;
27657       break;
27658
27659     case NOT:
27660       x = gen_rtx_AND (wmode, old_out, value);
27661       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27662       x = gen_rtx_NOT (wmode, new_out);
27663       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27664       break;
27665
27666     case MINUS:
27667       if (CONST_INT_P (value))
27668         {
27669           value = GEN_INT (-INTVAL (value));
27670           code = PLUS;
27671         }
27672       /* FALLTHRU */
27673
27674     case PLUS:
27675       if (mode == DImode)
27676         {
27677           /* DImode plus/minus need to clobber flags.  */
27678           /* The adddi3 and subdi3 patterns are incorrectly written so that
27679              they require matching operands, even when we could easily support
27680              three operands.  Thankfully, this can be fixed up post-splitting,
27681              as the individual add+adc patterns do accept three operands and
27682              post-reload cprop can make these moves go away.  */
27683           emit_move_insn (new_out, old_out);
27684           if (code == PLUS)
27685             x = gen_adddi3 (new_out, new_out, value);
27686           else
27687             x = gen_subdi3 (new_out, new_out, value);
27688           emit_insn (x);
27689           break;
27690         }
27691       /* FALLTHRU */
27692
27693     default:
27694       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27695       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27696       break;
27697     }
27698
27699   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27700                             use_release);
27701
27702   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27703   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27704
27705   /* Checks whether a barrier is needed and emits one accordingly.  */
27706   if (!(use_acquire || use_release))
27707     arm_post_atomic_barrier (model);
27708 }
27709 \f
27710 #define MAX_VECT_LEN 16
27711
27712 struct expand_vec_perm_d
27713 {
27714   rtx target, op0, op1;
27715   unsigned char perm[MAX_VECT_LEN];
27716   machine_mode vmode;
27717   unsigned char nelt;
27718   bool one_vector_p;
27719   bool testing_p;
27720 };
27721
27722 /* Generate a variable permutation.  */
27723
27724 static void
27725 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27726 {
27727   machine_mode vmode = GET_MODE (target);
27728   bool one_vector_p = rtx_equal_p (op0, op1);
27729
27730   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27731   gcc_checking_assert (GET_MODE (op0) == vmode);
27732   gcc_checking_assert (GET_MODE (op1) == vmode);
27733   gcc_checking_assert (GET_MODE (sel) == vmode);
27734   gcc_checking_assert (TARGET_NEON);
27735
27736   if (one_vector_p)
27737     {
27738       if (vmode == V8QImode)
27739         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27740       else
27741         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27742     }
27743   else
27744     {
27745       rtx pair;
27746
27747       if (vmode == V8QImode)
27748         {
27749           pair = gen_reg_rtx (V16QImode);
27750           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27751           pair = gen_lowpart (TImode, pair);
27752           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27753         }
27754       else
27755         {
27756           pair = gen_reg_rtx (OImode);
27757           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27758           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27759         }
27760     }
27761 }
27762
27763 void
27764 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27765 {
27766   machine_mode vmode = GET_MODE (target);
27767   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27768   bool one_vector_p = rtx_equal_p (op0, op1);
27769   rtx rmask[MAX_VECT_LEN], mask;
27770
27771   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
27772      numbering of elements for big-endian, we must reverse the order.  */
27773   gcc_checking_assert (!BYTES_BIG_ENDIAN);
27774
27775   /* The VTBL instruction does not use a modulo index, so we must take care
27776      of that ourselves.  */
27777   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27778   for (i = 0; i < nelt; ++i)
27779     rmask[i] = mask;
27780   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27781   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27782
27783   arm_expand_vec_perm_1 (target, op0, op1, sel);
27784 }
27785
27786 /* Generate or test for an insn that supports a constant permutation.  */
27787
27788 /* Recognize patterns for the VUZP insns.  */
27789
27790 static bool
27791 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27792 {
27793   unsigned int i, odd, mask, nelt = d->nelt;
27794   rtx out0, out1, in0, in1, x;
27795   rtx (*gen)(rtx, rtx, rtx, rtx);
27796
27797   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27798     return false;
27799
27800   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
27801   if (d->perm[0] == 0)
27802     odd = 0;
27803   else if (d->perm[0] == 1)
27804     odd = 1;
27805   else
27806     return false;
27807   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27808
27809   for (i = 0; i < nelt; i++)
27810     {
27811       unsigned elt = (i * 2 + odd) & mask;
27812       if (d->perm[i] != elt)
27813         return false;
27814     }
27815
27816   /* Success!  */
27817   if (d->testing_p)
27818     return true;
27819
27820   switch (d->vmode)
27821     {
27822     case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27823     case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
27824     case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
27825     case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
27826     case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
27827     case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
27828     case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
27829     case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
27830     default:
27831       gcc_unreachable ();
27832     }
27833
27834   in0 = d->op0;
27835   in1 = d->op1;
27836   if (BYTES_BIG_ENDIAN)
27837     {
27838       x = in0, in0 = in1, in1 = x;
27839       odd = !odd;
27840     }
27841
27842   out0 = d->target;
27843   out1 = gen_reg_rtx (d->vmode);
27844   if (odd)
27845     x = out0, out0 = out1, out1 = x;
27846
27847   emit_insn (gen (out0, in0, in1, out1));
27848   return true;
27849 }
27850
27851 /* Recognize patterns for the VZIP insns.  */
27852
27853 static bool
27854 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27855 {
27856   unsigned int i, high, mask, nelt = d->nelt;
27857   rtx out0, out1, in0, in1, x;
27858   rtx (*gen)(rtx, rtx, rtx, rtx);
27859
27860   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27861     return false;
27862
27863   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
27864   high = nelt / 2;
27865   if (d->perm[0] == high)
27866     ;
27867   else if (d->perm[0] == 0)
27868     high = 0;
27869   else
27870     return false;
27871   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27872
27873   for (i = 0; i < nelt / 2; i++)
27874     {
27875       unsigned elt = (i + high) & mask;
27876       if (d->perm[i * 2] != elt)
27877         return false;
27878       elt = (elt + nelt) & mask;
27879       if (d->perm[i * 2 + 1] != elt)
27880         return false;
27881     }
27882
27883   /* Success!  */
27884   if (d->testing_p)
27885     return true;
27886
27887   switch (d->vmode)
27888     {
27889     case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27890     case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
27891     case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
27892     case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
27893     case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
27894     case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
27895     case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
27896     case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
27897     default:
27898       gcc_unreachable ();
27899     }
27900
27901   in0 = d->op0;
27902   in1 = d->op1;
27903   if (BYTES_BIG_ENDIAN)
27904     {
27905       x = in0, in0 = in1, in1 = x;
27906       high = !high;
27907     }
27908
27909   out0 = d->target;
27910   out1 = gen_reg_rtx (d->vmode);
27911   if (high)
27912     x = out0, out0 = out1, out1 = x;
27913
27914   emit_insn (gen (out0, in0, in1, out1));
27915   return true;
27916 }
27917
27918 /* Recognize patterns for the VREV insns.  */
27919
27920 static bool
27921 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27922 {
27923   unsigned int i, j, diff, nelt = d->nelt;
27924   rtx (*gen)(rtx, rtx);
27925
27926   if (!d->one_vector_p)
27927     return false;
27928
27929   diff = d->perm[0];
27930   switch (diff)
27931     {
27932     case 7:
27933       switch (d->vmode)
27934         {
27935         case V16QImode: gen = gen_neon_vrev64v16qi; break;
27936         case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
27937         default:
27938           return false;
27939         }
27940       break;
27941     case 3:
27942       switch (d->vmode)
27943         {
27944         case V16QImode: gen = gen_neon_vrev32v16qi; break;
27945         case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
27946         case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
27947         case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
27948         default:
27949           return false;
27950         }
27951       break;
27952     case 1:
27953       switch (d->vmode)
27954         {
27955         case V16QImode: gen = gen_neon_vrev16v16qi; break;
27956         case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
27957         case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
27958         case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
27959         case V4SImode:  gen = gen_neon_vrev64v4si;  break;
27960         case V2SImode:  gen = gen_neon_vrev64v2si;  break;
27961         case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
27962         case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
27963         default:
27964           return false;
27965         }
27966       break;
27967     default:
27968       return false;
27969     }
27970
27971   for (i = 0; i < nelt ; i += diff + 1)
27972     for (j = 0; j <= diff; j += 1)
27973       {
27974         /* This is guaranteed to be true as the value of diff
27975            is 7, 3, 1 and we should have enough elements in the
27976            queue to generate this. Getting a vector mask with a
27977            value of diff other than these values implies that
27978            something is wrong by the time we get here.  */
27979         gcc_assert (i + j < nelt);
27980         if (d->perm[i + j] != i + diff - j)
27981           return false;
27982       }
27983
27984   /* Success! */
27985   if (d->testing_p)
27986     return true;
27987
27988   emit_insn (gen (d->target, d->op0));
27989   return true;
27990 }
27991
27992 /* Recognize patterns for the VTRN insns.  */
27993
27994 static bool
27995 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27996 {
27997   unsigned int i, odd, mask, nelt = d->nelt;
27998   rtx out0, out1, in0, in1, x;
27999   rtx (*gen)(rtx, rtx, rtx, rtx);
28000
28001   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28002     return false;
28003
28004   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
28005   if (d->perm[0] == 0)
28006     odd = 0;
28007   else if (d->perm[0] == 1)
28008     odd = 1;
28009   else
28010     return false;
28011   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28012
28013   for (i = 0; i < nelt; i += 2)
28014     {
28015       if (d->perm[i] != i + odd)
28016         return false;
28017       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28018         return false;
28019     }
28020
28021   /* Success!  */
28022   if (d->testing_p)
28023     return true;
28024
28025   switch (d->vmode)
28026     {
28027     case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28028     case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
28029     case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
28030     case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
28031     case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
28032     case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
28033     case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
28034     case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
28035     default:
28036       gcc_unreachable ();
28037     }
28038
28039   in0 = d->op0;
28040   in1 = d->op1;
28041   if (BYTES_BIG_ENDIAN)
28042     {
28043       x = in0, in0 = in1, in1 = x;
28044       odd = !odd;
28045     }
28046
28047   out0 = d->target;
28048   out1 = gen_reg_rtx (d->vmode);
28049   if (odd)
28050     x = out0, out0 = out1, out1 = x;
28051
28052   emit_insn (gen (out0, in0, in1, out1));
28053   return true;
28054 }
28055
28056 /* Recognize patterns for the VEXT insns.  */
28057
28058 static bool
28059 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28060 {
28061   unsigned int i, nelt = d->nelt;
28062   rtx (*gen) (rtx, rtx, rtx, rtx);
28063   rtx offset;
28064
28065   unsigned int location;
28066
28067   unsigned int next  = d->perm[0] + 1;
28068
28069   /* TODO: Handle GCC's numbering of elements for big-endian.  */
28070   if (BYTES_BIG_ENDIAN)
28071     return false;
28072
28073   /* Check if the extracted indexes are increasing by one.  */
28074   for (i = 1; i < nelt; next++, i++)
28075     {
28076       /* If we hit the most significant element of the 2nd vector in
28077          the previous iteration, no need to test further.  */
28078       if (next == 2 * nelt)
28079         return false;
28080
28081       /* If we are operating on only one vector: it could be a
28082          rotation.  If there are only two elements of size < 64, let
28083          arm_evpc_neon_vrev catch it.  */
28084       if (d->one_vector_p && (next == nelt))
28085         {
28086           if ((nelt == 2) && (d->vmode != V2DImode))
28087             return false;
28088           else
28089             next = 0;
28090         }
28091
28092       if (d->perm[i] != next)
28093         return false;
28094     }
28095
28096   location = d->perm[0];
28097
28098   switch (d->vmode)
28099     {
28100     case V16QImode: gen = gen_neon_vextv16qi; break;
28101     case V8QImode: gen = gen_neon_vextv8qi; break;
28102     case V4HImode: gen = gen_neon_vextv4hi; break;
28103     case V8HImode: gen = gen_neon_vextv8hi; break;
28104     case V2SImode: gen = gen_neon_vextv2si; break;
28105     case V4SImode: gen = gen_neon_vextv4si; break;
28106     case V2SFmode: gen = gen_neon_vextv2sf; break;
28107     case V4SFmode: gen = gen_neon_vextv4sf; break;
28108     case V2DImode: gen = gen_neon_vextv2di; break;
28109     default:
28110       return false;
28111     }
28112
28113   /* Success! */
28114   if (d->testing_p)
28115     return true;
28116
28117   offset = GEN_INT (location);
28118   emit_insn (gen (d->target, d->op0, d->op1, offset));
28119   return true;
28120 }
28121
28122 /* The NEON VTBL instruction is a fully variable permuation that's even
28123    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
28124    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
28125    can do slightly better by expanding this as a constant where we don't
28126    have to apply a mask.  */
28127
28128 static bool
28129 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28130 {
28131   rtx rperm[MAX_VECT_LEN], sel;
28132   machine_mode vmode = d->vmode;
28133   unsigned int i, nelt = d->nelt;
28134
28135   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28136      numbering of elements for big-endian, we must reverse the order.  */
28137   if (BYTES_BIG_ENDIAN)
28138     return false;
28139
28140   if (d->testing_p)
28141     return true;
28142
28143   /* Generic code will try constant permutation twice.  Once with the
28144      original mode and again with the elements lowered to QImode.
28145      So wait and don't do the selector expansion ourselves.  */
28146   if (vmode != V8QImode && vmode != V16QImode)
28147     return false;
28148
28149   for (i = 0; i < nelt; ++i)
28150     rperm[i] = GEN_INT (d->perm[i]);
28151   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28152   sel = force_reg (vmode, sel);
28153
28154   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28155   return true;
28156 }
28157
28158 static bool
28159 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28160 {
28161   /* Check if the input mask matches vext before reordering the
28162      operands.  */
28163   if (TARGET_NEON)
28164     if (arm_evpc_neon_vext (d))
28165       return true;
28166
28167   /* The pattern matching functions above are written to look for a small
28168      number to begin the sequence (0, 1, N/2).  If we begin with an index
28169      from the second operand, we can swap the operands.  */
28170   if (d->perm[0] >= d->nelt)
28171     {
28172       unsigned i, nelt = d->nelt;
28173       rtx x;
28174
28175       for (i = 0; i < nelt; ++i)
28176         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28177
28178       x = d->op0;
28179       d->op0 = d->op1;
28180       d->op1 = x;
28181     }
28182
28183   if (TARGET_NEON)
28184     {
28185       if (arm_evpc_neon_vuzp (d))
28186         return true;
28187       if (arm_evpc_neon_vzip (d))
28188         return true;
28189       if (arm_evpc_neon_vrev (d))
28190         return true;
28191       if (arm_evpc_neon_vtrn (d))
28192         return true;
28193       return arm_evpc_neon_vtbl (d);
28194     }
28195   return false;
28196 }
28197
28198 /* Expand a vec_perm_const pattern.  */
28199
28200 bool
28201 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28202 {
28203   struct expand_vec_perm_d d;
28204   int i, nelt, which;
28205
28206   d.target = target;
28207   d.op0 = op0;
28208   d.op1 = op1;
28209
28210   d.vmode = GET_MODE (target);
28211   gcc_assert (VECTOR_MODE_P (d.vmode));
28212   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28213   d.testing_p = false;
28214
28215   for (i = which = 0; i < nelt; ++i)
28216     {
28217       rtx e = XVECEXP (sel, 0, i);
28218       int ei = INTVAL (e) & (2 * nelt - 1);
28219       which |= (ei < nelt ? 1 : 2);
28220       d.perm[i] = ei;
28221     }
28222
28223   switch (which)
28224     {
28225     default:
28226       gcc_unreachable();
28227
28228     case 3:
28229       d.one_vector_p = false;
28230       if (!rtx_equal_p (op0, op1))
28231         break;
28232
28233       /* The elements of PERM do not suggest that only the first operand
28234          is used, but both operands are identical.  Allow easier matching
28235          of the permutation by folding the permutation into the single
28236          input vector.  */
28237       /* FALLTHRU */
28238     case 2:
28239       for (i = 0; i < nelt; ++i)
28240         d.perm[i] &= nelt - 1;
28241       d.op0 = op1;
28242       d.one_vector_p = true;
28243       break;
28244
28245     case 1:
28246       d.op1 = op0;
28247       d.one_vector_p = true;
28248       break;
28249     }
28250
28251   return arm_expand_vec_perm_const_1 (&d);
28252 }
28253
28254 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
28255
28256 static bool
28257 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28258                                  const unsigned char *sel)
28259 {
28260   struct expand_vec_perm_d d;
28261   unsigned int i, nelt, which;
28262   bool ret;
28263
28264   d.vmode = vmode;
28265   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28266   d.testing_p = true;
28267   memcpy (d.perm, sel, nelt);
28268
28269   /* Categorize the set of elements in the selector.  */
28270   for (i = which = 0; i < nelt; ++i)
28271     {
28272       unsigned char e = d.perm[i];
28273       gcc_assert (e < 2 * nelt);
28274       which |= (e < nelt ? 1 : 2);
28275     }
28276
28277   /* For all elements from second vector, fold the elements to first.  */
28278   if (which == 2)
28279     for (i = 0; i < nelt; ++i)
28280       d.perm[i] -= nelt;
28281
28282   /* Check whether the mask can be applied to the vector type.  */
28283   d.one_vector_p = (which != 3);
28284
28285   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28286   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28287   if (!d.one_vector_p)
28288     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28289
28290   start_sequence ();
28291   ret = arm_expand_vec_perm_const_1 (&d);
28292   end_sequence ();
28293
28294   return ret;
28295 }
28296
28297 bool
28298 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28299 {
28300   /* If we are soft float and we do not have ldrd
28301      then all auto increment forms are ok.  */
28302   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28303     return true;
28304
28305   switch (code)
28306     {
28307       /* Post increment and Pre Decrement are supported for all
28308          instruction forms except for vector forms.  */
28309     case ARM_POST_INC:
28310     case ARM_PRE_DEC:
28311       if (VECTOR_MODE_P (mode))
28312         {
28313           if (code != ARM_PRE_DEC)
28314             return true;
28315           else
28316             return false;
28317         }
28318
28319       return true;
28320
28321     case ARM_POST_DEC:
28322     case ARM_PRE_INC:
28323       /* Without LDRD and mode size greater than
28324          word size, there is no point in auto-incrementing
28325          because ldm and stm will not have these forms.  */
28326       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28327         return false;
28328
28329       /* Vector and floating point modes do not support
28330          these auto increment forms.  */
28331       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28332         return false;
28333
28334       return true;
28335
28336     default:
28337       return false;
28338
28339     }
28340
28341   return false;
28342 }
28343
28344 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28345    on ARM, since we know that shifts by negative amounts are no-ops.
28346    Additionally, the default expansion code is not available or suitable
28347    for post-reload insn splits (this can occur when the register allocator
28348    chooses not to do a shift in NEON).
28349
28350    This function is used in both initial expand and post-reload splits, and
28351    handles all kinds of 64-bit shifts.
28352
28353    Input requirements:
28354     - It is safe for the input and output to be the same register, but
28355       early-clobber rules apply for the shift amount and scratch registers.
28356     - Shift by register requires both scratch registers.  In all other cases
28357       the scratch registers may be NULL.
28358     - Ashiftrt by a register also clobbers the CC register.  */
28359 void
28360 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28361                                rtx amount, rtx scratch1, rtx scratch2)
28362 {
28363   rtx out_high = gen_highpart (SImode, out);
28364   rtx out_low = gen_lowpart (SImode, out);
28365   rtx in_high = gen_highpart (SImode, in);
28366   rtx in_low = gen_lowpart (SImode, in);
28367
28368   /* Terminology:
28369         in = the register pair containing the input value.
28370         out = the destination register pair.
28371         up = the high- or low-part of each pair.
28372         down = the opposite part to "up".
28373      In a shift, we can consider bits to shift from "up"-stream to
28374      "down"-stream, so in a left-shift "up" is the low-part and "down"
28375      is the high-part of each register pair.  */
28376
28377   rtx out_up   = code == ASHIFT ? out_low : out_high;
28378   rtx out_down = code == ASHIFT ? out_high : out_low;
28379   rtx in_up   = code == ASHIFT ? in_low : in_high;
28380   rtx in_down = code == ASHIFT ? in_high : in_low;
28381
28382   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28383   gcc_assert (out
28384               && (REG_P (out) || GET_CODE (out) == SUBREG)
28385               && GET_MODE (out) == DImode);
28386   gcc_assert (in
28387               && (REG_P (in) || GET_CODE (in) == SUBREG)
28388               && GET_MODE (in) == DImode);
28389   gcc_assert (amount
28390               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28391                    && GET_MODE (amount) == SImode)
28392                   || CONST_INT_P (amount)));
28393   gcc_assert (scratch1 == NULL
28394               || (GET_CODE (scratch1) == SCRATCH)
28395               || (GET_MODE (scratch1) == SImode
28396                   && REG_P (scratch1)));
28397   gcc_assert (scratch2 == NULL
28398               || (GET_CODE (scratch2) == SCRATCH)
28399               || (GET_MODE (scratch2) == SImode
28400                   && REG_P (scratch2)));
28401   gcc_assert (!REG_P (out) || !REG_P (amount)
28402               || !HARD_REGISTER_P (out)
28403               || (REGNO (out) != REGNO (amount)
28404                   && REGNO (out) + 1 != REGNO (amount)));
28405
28406   /* Macros to make following code more readable.  */
28407   #define SUB_32(DEST,SRC) \
28408             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28409   #define RSB_32(DEST,SRC) \
28410             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28411   #define SUB_S_32(DEST,SRC) \
28412             gen_addsi3_compare0 ((DEST), (SRC), \
28413                                  GEN_INT (-32))
28414   #define SET(DEST,SRC) \
28415             gen_rtx_SET (SImode, (DEST), (SRC))
28416   #define SHIFT(CODE,SRC,AMOUNT) \
28417             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28418   #define LSHIFT(CODE,SRC,AMOUNT) \
28419             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28420                             SImode, (SRC), (AMOUNT))
28421   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28422             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28423                             SImode, (SRC), (AMOUNT))
28424   #define ORR(A,B) \
28425             gen_rtx_IOR (SImode, (A), (B))
28426   #define BRANCH(COND,LABEL) \
28427             gen_arm_cond_branch ((LABEL), \
28428                                  gen_rtx_ ## COND (CCmode, cc_reg, \
28429                                                    const0_rtx), \
28430                                  cc_reg)
28431
28432   /* Shifts by register and shifts by constant are handled separately.  */
28433   if (CONST_INT_P (amount))
28434     {
28435       /* We have a shift-by-constant.  */
28436
28437       /* First, handle out-of-range shift amounts.
28438          In both cases we try to match the result an ARM instruction in a
28439          shift-by-register would give.  This helps reduce execution
28440          differences between optimization levels, but it won't stop other
28441          parts of the compiler doing different things.  This is "undefined
28442          behaviour, in any case.  */
28443       if (INTVAL (amount) <= 0)
28444         emit_insn (gen_movdi (out, in));
28445       else if (INTVAL (amount) >= 64)
28446         {
28447           if (code == ASHIFTRT)
28448             {
28449               rtx const31_rtx = GEN_INT (31);
28450               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28451               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28452             }
28453           else
28454             emit_insn (gen_movdi (out, const0_rtx));
28455         }
28456
28457       /* Now handle valid shifts. */
28458       else if (INTVAL (amount) < 32)
28459         {
28460           /* Shifts by a constant less than 32.  */
28461           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28462
28463           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28464           emit_insn (SET (out_down,
28465                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
28466                                out_down)));
28467           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28468         }
28469       else
28470         {
28471           /* Shifts by a constant greater than 31.  */
28472           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28473
28474           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28475           if (code == ASHIFTRT)
28476             emit_insn (gen_ashrsi3 (out_up, in_up,
28477                                     GEN_INT (31)));
28478           else
28479             emit_insn (SET (out_up, const0_rtx));
28480         }
28481     }
28482   else
28483     {
28484       /* We have a shift-by-register.  */
28485       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28486
28487       /* This alternative requires the scratch registers.  */
28488       gcc_assert (scratch1 && REG_P (scratch1));
28489       gcc_assert (scratch2 && REG_P (scratch2));
28490
28491       /* We will need the values "amount-32" and "32-amount" later.
28492          Swapping them around now allows the later code to be more general. */
28493       switch (code)
28494         {
28495         case ASHIFT:
28496           emit_insn (SUB_32 (scratch1, amount));
28497           emit_insn (RSB_32 (scratch2, amount));
28498           break;
28499         case ASHIFTRT:
28500           emit_insn (RSB_32 (scratch1, amount));
28501           /* Also set CC = amount > 32.  */
28502           emit_insn (SUB_S_32 (scratch2, amount));
28503           break;
28504         case LSHIFTRT:
28505           emit_insn (RSB_32 (scratch1, amount));
28506           emit_insn (SUB_32 (scratch2, amount));
28507           break;
28508         default:
28509           gcc_unreachable ();
28510         }
28511
28512       /* Emit code like this:
28513
28514          arithmetic-left:
28515             out_down = in_down << amount;
28516             out_down = (in_up << (amount - 32)) | out_down;
28517             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28518             out_up = in_up << amount;
28519
28520          arithmetic-right:
28521             out_down = in_down >> amount;
28522             out_down = (in_up << (32 - amount)) | out_down;
28523             if (amount < 32)
28524               out_down = ((signed)in_up >> (amount - 32)) | out_down;
28525             out_up = in_up << amount;
28526
28527          logical-right:
28528             out_down = in_down >> amount;
28529             out_down = (in_up << (32 - amount)) | out_down;
28530             if (amount < 32)
28531               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28532             out_up = in_up << amount;
28533
28534           The ARM and Thumb2 variants are the same but implemented slightly
28535           differently.  If this were only called during expand we could just
28536           use the Thumb2 case and let combine do the right thing, but this
28537           can also be called from post-reload splitters.  */
28538
28539       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28540
28541       if (!TARGET_THUMB2)
28542         {
28543           /* Emit code for ARM mode.  */
28544           emit_insn (SET (out_down,
28545                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28546           if (code == ASHIFTRT)
28547             {
28548               rtx_code_label *done_label = gen_label_rtx ();
28549               emit_jump_insn (BRANCH (LT, done_label));
28550               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28551                                              out_down)));
28552               emit_label (done_label);
28553             }
28554           else
28555             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28556                                            out_down)));
28557         }
28558       else
28559         {
28560           /* Emit code for Thumb2 mode.
28561              Thumb2 can't do shift and or in one insn.  */
28562           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28563           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28564
28565           if (code == ASHIFTRT)
28566             {
28567               rtx_code_label *done_label = gen_label_rtx ();
28568               emit_jump_insn (BRANCH (LT, done_label));
28569               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28570               emit_insn (SET (out_down, ORR (out_down, scratch2)));
28571               emit_label (done_label);
28572             }
28573           else
28574             {
28575               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28576               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28577             }
28578         }
28579
28580       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28581     }
28582
28583   #undef SUB_32
28584   #undef RSB_32
28585   #undef SUB_S_32
28586   #undef SET
28587   #undef SHIFT
28588   #undef LSHIFT
28589   #undef REV_LSHIFT
28590   #undef ORR
28591   #undef BRANCH
28592 }
28593
28594
28595 /* Returns true if a valid comparison operation and makes
28596    the operands in a form that is valid.  */
28597 bool
28598 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28599 {
28600   enum rtx_code code = GET_CODE (*comparison);
28601   int code_int;
28602   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28603     ? GET_MODE (*op2) : GET_MODE (*op1);
28604
28605   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28606
28607   if (code == UNEQ || code == LTGT)
28608     return false;
28609
28610   code_int = (int)code;
28611   arm_canonicalize_comparison (&code_int, op1, op2, 0);
28612   PUT_CODE (*comparison, (enum rtx_code)code_int);
28613
28614   switch (mode)
28615     {
28616     case SImode:
28617       if (!arm_add_operand (*op1, mode))
28618         *op1 = force_reg (mode, *op1);
28619       if (!arm_add_operand (*op2, mode))
28620         *op2 = force_reg (mode, *op2);
28621       return true;
28622
28623     case DImode:
28624       if (!cmpdi_operand (*op1, mode))
28625         *op1 = force_reg (mode, *op1);
28626       if (!cmpdi_operand (*op2, mode))
28627         *op2 = force_reg (mode, *op2);
28628       return true;
28629
28630     case SFmode:
28631     case DFmode:
28632       if (!arm_float_compare_operand (*op1, mode))
28633         *op1 = force_reg (mode, *op1);
28634       if (!arm_float_compare_operand (*op2, mode))
28635         *op2 = force_reg (mode, *op2);
28636       return true;
28637     default:
28638       break;
28639     }
28640
28641   return false;
28642
28643 }
28644
28645 /* Maximum number of instructions to set block of memory.  */
28646 static int
28647 arm_block_set_max_insns (void)
28648 {
28649   if (optimize_function_for_size_p (cfun))
28650     return 4;
28651   else
28652     return current_tune->max_insns_inline_memset;
28653 }
28654
28655 /* Return TRUE if it's profitable to set block of memory for
28656    non-vectorized case.  VAL is the value to set the memory
28657    with.  LENGTH is the number of bytes to set.  ALIGN is the
28658    alignment of the destination memory in bytes.  UNALIGNED_P
28659    is TRUE if we can only set the memory with instructions
28660    meeting alignment requirements.  USE_STRD_P is TRUE if we
28661    can use strd to set the memory.  */
28662 static bool
28663 arm_block_set_non_vect_profit_p (rtx val,
28664                                  unsigned HOST_WIDE_INT length,
28665                                  unsigned HOST_WIDE_INT align,
28666                                  bool unaligned_p, bool use_strd_p)
28667 {
28668   int num = 0;
28669   /* For leftovers in bytes of 0-7, we can set the memory block using
28670      strb/strh/str with minimum instruction number.  */
28671   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28672
28673   if (unaligned_p)
28674     {
28675       num = arm_const_inline_cost (SET, val);
28676       num += length / align + length % align;
28677     }
28678   else if (use_strd_p)
28679     {
28680       num = arm_const_double_inline_cost (val);
28681       num += (length >> 3) + leftover[length & 7];
28682     }
28683   else
28684     {
28685       num = arm_const_inline_cost (SET, val);
28686       num += (length >> 2) + leftover[length & 3];
28687     }
28688
28689   /* We may be able to combine last pair STRH/STRB into a single STR
28690      by shifting one byte back.  */
28691   if (unaligned_access && length > 3 && (length & 3) == 3)
28692     num--;
28693
28694   return (num <= arm_block_set_max_insns ());
28695 }
28696
28697 /* Return TRUE if it's profitable to set block of memory for
28698    vectorized case.  LENGTH is the number of bytes to set.
28699    ALIGN is the alignment of destination memory in bytes.
28700    MODE is the vector mode used to set the memory.  */
28701 static bool
28702 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28703                              unsigned HOST_WIDE_INT align,
28704                              machine_mode mode)
28705 {
28706   int num;
28707   bool unaligned_p = ((align & 3) != 0);
28708   unsigned int nelt = GET_MODE_NUNITS (mode);
28709
28710   /* Instruction loading constant value.  */
28711   num = 1;
28712   /* Instructions storing the memory.  */
28713   num += (length + nelt - 1) / nelt;
28714   /* Instructions adjusting the address expression.  Only need to
28715      adjust address expression if it's 4 bytes aligned and bytes
28716      leftover can only be stored by mis-aligned store instruction.  */
28717   if (!unaligned_p && (length & 3) != 0)
28718     num++;
28719
28720   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
28721   if (!unaligned_p && mode == V16QImode)
28722     num--;
28723
28724   return (num <= arm_block_set_max_insns ());
28725 }
28726
28727 /* Set a block of memory using vectorization instructions for the
28728    unaligned case.  We fill the first LENGTH bytes of the memory
28729    area starting from DSTBASE with byte constant VALUE.  ALIGN is
28730    the alignment requirement of memory.  Return TRUE if succeeded.  */
28731 static bool
28732 arm_block_set_unaligned_vect (rtx dstbase,
28733                               unsigned HOST_WIDE_INT length,
28734                               unsigned HOST_WIDE_INT value,
28735                               unsigned HOST_WIDE_INT align)
28736 {
28737   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28738   rtx dst, mem;
28739   rtx val_elt, val_vec, reg;
28740   rtx rval[MAX_VECT_LEN];
28741   rtx (*gen_func) (rtx, rtx);
28742   machine_mode mode;
28743   unsigned HOST_WIDE_INT v = value;
28744
28745   gcc_assert ((align & 0x3) != 0);
28746   nelt_v8 = GET_MODE_NUNITS (V8QImode);
28747   nelt_v16 = GET_MODE_NUNITS (V16QImode);
28748   if (length >= nelt_v16)
28749     {
28750       mode = V16QImode;
28751       gen_func = gen_movmisalignv16qi;
28752     }
28753   else
28754     {
28755       mode = V8QImode;
28756       gen_func = gen_movmisalignv8qi;
28757     }
28758   nelt_mode = GET_MODE_NUNITS (mode);
28759   gcc_assert (length >= nelt_mode);
28760   /* Skip if it isn't profitable.  */
28761   if (!arm_block_set_vect_profit_p (length, align, mode))
28762     return false;
28763
28764   dst = copy_addr_to_reg (XEXP (dstbase, 0));
28765   mem = adjust_automodify_address (dstbase, mode, dst, 0);
28766
28767   v = sext_hwi (v, BITS_PER_WORD);
28768   val_elt = GEN_INT (v);
28769   for (j = 0; j < nelt_mode; j++)
28770     rval[j] = val_elt;
28771
28772   reg = gen_reg_rtx (mode);
28773   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28774   /* Emit instruction loading the constant value.  */
28775   emit_move_insn (reg, val_vec);
28776
28777   /* Handle nelt_mode bytes in a vector.  */
28778   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28779     {
28780       emit_insn ((*gen_func) (mem, reg));
28781       if (i + 2 * nelt_mode <= length)
28782         emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28783     }
28784
28785   /* If there are not less than nelt_v8 bytes leftover, we must be in
28786      V16QI mode.  */
28787   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28788
28789   /* Handle (8, 16) bytes leftover.  */
28790   if (i + nelt_v8 < length)
28791     {
28792       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28793       /* We are shifting bytes back, set the alignment accordingly.  */
28794       if ((length & 1) != 0 && align >= 2)
28795         set_mem_align (mem, BITS_PER_UNIT);
28796
28797       emit_insn (gen_movmisalignv16qi (mem, reg));
28798     }
28799   /* Handle (0, 8] bytes leftover.  */
28800   else if (i < length && i + nelt_v8 >= length)
28801     {
28802       if (mode == V16QImode)
28803         {
28804           reg = gen_lowpart (V8QImode, reg);
28805           mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28806         }
28807       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28808                                               + (nelt_mode - nelt_v8))));
28809       /* We are shifting bytes back, set the alignment accordingly.  */
28810       if ((length & 1) != 0 && align >= 2)
28811         set_mem_align (mem, BITS_PER_UNIT);
28812
28813       emit_insn (gen_movmisalignv8qi (mem, reg));
28814     }
28815
28816   return true;
28817 }
28818
28819 /* Set a block of memory using vectorization instructions for the
28820    aligned case.  We fill the first LENGTH bytes of the memory area
28821    starting from DSTBASE with byte constant VALUE.  ALIGN is the
28822    alignment requirement of memory.  Return TRUE if succeeded.  */
28823 static bool
28824 arm_block_set_aligned_vect (rtx dstbase,
28825                             unsigned HOST_WIDE_INT length,
28826                             unsigned HOST_WIDE_INT value,
28827                             unsigned HOST_WIDE_INT align)
28828 {
28829   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28830   rtx dst, addr, mem;
28831   rtx val_elt, val_vec, reg;
28832   rtx rval[MAX_VECT_LEN];
28833   machine_mode mode;
28834   unsigned HOST_WIDE_INT v = value;
28835
28836   gcc_assert ((align & 0x3) == 0);
28837   nelt_v8 = GET_MODE_NUNITS (V8QImode);
28838   nelt_v16 = GET_MODE_NUNITS (V16QImode);
28839   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28840     mode = V16QImode;
28841   else
28842     mode = V8QImode;
28843
28844   nelt_mode = GET_MODE_NUNITS (mode);
28845   gcc_assert (length >= nelt_mode);
28846   /* Skip if it isn't profitable.  */
28847   if (!arm_block_set_vect_profit_p (length, align, mode))
28848     return false;
28849
28850   dst = copy_addr_to_reg (XEXP (dstbase, 0));
28851
28852   v = sext_hwi (v, BITS_PER_WORD);
28853   val_elt = GEN_INT (v);
28854   for (j = 0; j < nelt_mode; j++)
28855     rval[j] = val_elt;
28856
28857   reg = gen_reg_rtx (mode);
28858   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28859   /* Emit instruction loading the constant value.  */
28860   emit_move_insn (reg, val_vec);
28861
28862   i = 0;
28863   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
28864   if (mode == V16QImode)
28865     {
28866       mem = adjust_automodify_address (dstbase, mode, dst, 0);
28867       emit_insn (gen_movmisalignv16qi (mem, reg));
28868       i += nelt_mode;
28869       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
28870       if (i + nelt_v8 < length && i + nelt_v16 > length)
28871         {
28872           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28873           mem = adjust_automodify_address (dstbase, mode, dst, 0);
28874           /* We are shifting bytes back, set the alignment accordingly.  */
28875           if ((length & 0x3) == 0)
28876             set_mem_align (mem, BITS_PER_UNIT * 4);
28877           else if ((length & 0x1) == 0)
28878             set_mem_align (mem, BITS_PER_UNIT * 2);
28879           else
28880             set_mem_align (mem, BITS_PER_UNIT);
28881
28882           emit_insn (gen_movmisalignv16qi (mem, reg));
28883           return true;
28884         }
28885       /* Fall through for bytes leftover.  */
28886       mode = V8QImode;
28887       nelt_mode = GET_MODE_NUNITS (mode);
28888       reg = gen_lowpart (V8QImode, reg);
28889     }
28890
28891   /* Handle 8 bytes in a vector.  */
28892   for (; (i + nelt_mode <= length); i += nelt_mode)
28893     {
28894       addr = plus_constant (Pmode, dst, i);
28895       mem = adjust_automodify_address (dstbase, mode, addr, i);
28896       emit_move_insn (mem, reg);
28897     }
28898
28899   /* Handle single word leftover by shifting 4 bytes back.  We can
28900      use aligned access for this case.  */
28901   if (i + UNITS_PER_WORD == length)
28902     {
28903       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28904       mem = adjust_automodify_address (dstbase, mode,
28905                                        addr, i - UNITS_PER_WORD);
28906       /* We are shifting 4 bytes back, set the alignment accordingly.  */
28907       if (align > UNITS_PER_WORD)
28908         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28909
28910       emit_move_insn (mem, reg);
28911     }
28912   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28913      We have to use unaligned access for this case.  */
28914   else if (i < length)
28915     {
28916       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28917       mem = adjust_automodify_address (dstbase, mode, dst, 0);
28918       /* We are shifting bytes back, set the alignment accordingly.  */
28919       if ((length & 1) == 0)
28920         set_mem_align (mem, BITS_PER_UNIT * 2);
28921       else
28922         set_mem_align (mem, BITS_PER_UNIT);
28923
28924       emit_insn (gen_movmisalignv8qi (mem, reg));
28925     }
28926
28927   return true;
28928 }
28929
28930 /* Set a block of memory using plain strh/strb instructions, only
28931    using instructions allowed by ALIGN on processor.  We fill the
28932    first LENGTH bytes of the memory area starting from DSTBASE
28933    with byte constant VALUE.  ALIGN is the alignment requirement
28934    of memory.  */
28935 static bool
28936 arm_block_set_unaligned_non_vect (rtx dstbase,
28937                                   unsigned HOST_WIDE_INT length,
28938                                   unsigned HOST_WIDE_INT value,
28939                                   unsigned HOST_WIDE_INT align)
28940 {
28941   unsigned int i;
28942   rtx dst, addr, mem;
28943   rtx val_exp, val_reg, reg;
28944   machine_mode mode;
28945   HOST_WIDE_INT v = value;
28946
28947   gcc_assert (align == 1 || align == 2);
28948
28949   if (align == 2)
28950     v |= (value << BITS_PER_UNIT);
28951
28952   v = sext_hwi (v, BITS_PER_WORD);
28953   val_exp = GEN_INT (v);
28954   /* Skip if it isn't profitable.  */
28955   if (!arm_block_set_non_vect_profit_p (val_exp, length,
28956                                         align, true, false))
28957     return false;
28958
28959   dst = copy_addr_to_reg (XEXP (dstbase, 0));
28960   mode = (align == 2 ? HImode : QImode);
28961   val_reg = force_reg (SImode, val_exp);
28962   reg = gen_lowpart (mode, val_reg);
28963
28964   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
28965     {
28966       addr = plus_constant (Pmode, dst, i);
28967       mem = adjust_automodify_address (dstbase, mode, addr, i);
28968       emit_move_insn (mem, reg);
28969     }
28970
28971   /* Handle single byte leftover.  */
28972   if (i + 1 == length)
28973     {
28974       reg = gen_lowpart (QImode, val_reg);
28975       addr = plus_constant (Pmode, dst, i);
28976       mem = adjust_automodify_address (dstbase, QImode, addr, i);
28977       emit_move_insn (mem, reg);
28978       i++;
28979     }
28980
28981   gcc_assert (i == length);
28982   return true;
28983 }
28984
28985 /* Set a block of memory using plain strd/str/strh/strb instructions,
28986    to permit unaligned copies on processors which support unaligned
28987    semantics for those instructions.  We fill the first LENGTH bytes
28988    of the memory area starting from DSTBASE with byte constant VALUE.
28989    ALIGN is the alignment requirement of memory.  */
28990 static bool
28991 arm_block_set_aligned_non_vect (rtx dstbase,
28992                                 unsigned HOST_WIDE_INT length,
28993                                 unsigned HOST_WIDE_INT value,
28994                                 unsigned HOST_WIDE_INT align)
28995 {
28996   unsigned int i;
28997   rtx dst, addr, mem;
28998   rtx val_exp, val_reg, reg;
28999   unsigned HOST_WIDE_INT v;
29000   bool use_strd_p;
29001
29002   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29003                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29004
29005   v = (value | (value << 8) | (value << 16) | (value << 24));
29006   if (length < UNITS_PER_WORD)
29007     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29008
29009   if (use_strd_p)
29010     v |= (v << BITS_PER_WORD);
29011   else
29012     v = sext_hwi (v, BITS_PER_WORD);
29013
29014   val_exp = GEN_INT (v);
29015   /* Skip if it isn't profitable.  */
29016   if (!arm_block_set_non_vect_profit_p (val_exp, length,
29017                                         align, false, use_strd_p))
29018     {
29019       if (!use_strd_p)
29020         return false;
29021
29022       /* Try without strd.  */
29023       v = (v >> BITS_PER_WORD);
29024       v = sext_hwi (v, BITS_PER_WORD);
29025       val_exp = GEN_INT (v);
29026       use_strd_p = false;
29027       if (!arm_block_set_non_vect_profit_p (val_exp, length,
29028                                             align, false, use_strd_p))
29029         return false;
29030     }
29031
29032   i = 0;
29033   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29034   /* Handle double words using strd if possible.  */
29035   if (use_strd_p)
29036     {
29037       val_reg = force_reg (DImode, val_exp);
29038       reg = val_reg;
29039       for (; (i + 8 <= length); i += 8)
29040         {
29041           addr = plus_constant (Pmode, dst, i);
29042           mem = adjust_automodify_address (dstbase, DImode, addr, i);
29043           emit_move_insn (mem, reg);
29044         }
29045     }
29046   else
29047     val_reg = force_reg (SImode, val_exp);
29048
29049   /* Handle words.  */
29050   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29051   for (; (i + 4 <= length); i += 4)
29052     {
29053       addr = plus_constant (Pmode, dst, i);
29054       mem = adjust_automodify_address (dstbase, SImode, addr, i);
29055       if ((align & 3) == 0)
29056         emit_move_insn (mem, reg);
29057       else
29058         emit_insn (gen_unaligned_storesi (mem, reg));
29059     }
29060
29061   /* Merge last pair of STRH and STRB into a STR if possible.  */
29062   if (unaligned_access && i > 0 && (i + 3) == length)
29063     {
29064       addr = plus_constant (Pmode, dst, i - 1);
29065       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29066       /* We are shifting one byte back, set the alignment accordingly.  */
29067       if ((align & 1) == 0)
29068         set_mem_align (mem, BITS_PER_UNIT);
29069
29070       /* Most likely this is an unaligned access, and we can't tell at
29071          compilation time.  */
29072       emit_insn (gen_unaligned_storesi (mem, reg));
29073       return true;
29074     }
29075
29076   /* Handle half word leftover.  */
29077   if (i + 2 <= length)
29078     {
29079       reg = gen_lowpart (HImode, val_reg);
29080       addr = plus_constant (Pmode, dst, i);
29081       mem = adjust_automodify_address (dstbase, HImode, addr, i);
29082       if ((align & 1) == 0)
29083         emit_move_insn (mem, reg);
29084       else
29085         emit_insn (gen_unaligned_storehi (mem, reg));
29086
29087       i += 2;
29088     }
29089
29090   /* Handle single byte leftover.  */
29091   if (i + 1 == length)
29092     {
29093       reg = gen_lowpart (QImode, val_reg);
29094       addr = plus_constant (Pmode, dst, i);
29095       mem = adjust_automodify_address (dstbase, QImode, addr, i);
29096       emit_move_insn (mem, reg);
29097     }
29098
29099   return true;
29100 }
29101
29102 /* Set a block of memory using vectorization instructions for both
29103    aligned and unaligned cases.  We fill the first LENGTH bytes of
29104    the memory area starting from DSTBASE with byte constant VALUE.
29105    ALIGN is the alignment requirement of memory.  */
29106 static bool
29107 arm_block_set_vect (rtx dstbase,
29108                     unsigned HOST_WIDE_INT length,
29109                     unsigned HOST_WIDE_INT value,
29110                     unsigned HOST_WIDE_INT align)
29111 {
29112   /* Check whether we need to use unaligned store instruction.  */
29113   if (((align & 3) != 0 || (length & 3) != 0)
29114       /* Check whether unaligned store instruction is available.  */
29115       && (!unaligned_access || BYTES_BIG_ENDIAN))
29116     return false;
29117
29118   if ((align & 3) == 0)
29119     return arm_block_set_aligned_vect (dstbase, length, value, align);
29120   else
29121     return arm_block_set_unaligned_vect (dstbase, length, value, align);
29122 }
29123
29124 /* Expand string store operation.  Firstly we try to do that by using
29125    vectorization instructions, then try with ARM unaligned access and
29126    double-word store if profitable.  OPERANDS[0] is the destination,
29127    OPERANDS[1] is the number of bytes, operands[2] is the value to
29128    initialize the memory, OPERANDS[3] is the known alignment of the
29129    destination.  */
29130 bool
29131 arm_gen_setmem (rtx *operands)
29132 {
29133   rtx dstbase = operands[0];
29134   unsigned HOST_WIDE_INT length;
29135   unsigned HOST_WIDE_INT value;
29136   unsigned HOST_WIDE_INT align;
29137
29138   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29139     return false;
29140
29141   length = UINTVAL (operands[1]);
29142   if (length > 64)
29143     return false;
29144
29145   value = (UINTVAL (operands[2]) & 0xFF);
29146   align = UINTVAL (operands[3]);
29147   if (TARGET_NEON && length >= 8
29148       && current_tune->string_ops_prefer_neon
29149       && arm_block_set_vect (dstbase, length, value, align))
29150     return true;
29151
29152   if (!unaligned_access && (align & 3) != 0)
29153     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29154
29155   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29156 }
29157
29158 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
29159
29160 static unsigned HOST_WIDE_INT
29161 arm_asan_shadow_offset (void)
29162 {
29163   return (unsigned HOST_WIDE_INT) 1 << 29;
29164 }
29165
29166
29167 /* This is a temporary fix for PR60655.  Ideally we need
29168    to handle most of these cases in the generic part but
29169    currently we reject minus (..) (sym_ref).  We try to
29170    ameliorate the case with minus (sym_ref1) (sym_ref2)
29171    where they are in the same section.  */
29172
29173 static bool
29174 arm_const_not_ok_for_debug_p (rtx p)
29175 {
29176   tree decl_op0 = NULL;
29177   tree decl_op1 = NULL;
29178
29179   if (GET_CODE (p) == MINUS)
29180     {
29181       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29182         {
29183           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29184           if (decl_op1
29185               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29186               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29187             {
29188               if ((TREE_CODE (decl_op1) == VAR_DECL
29189                    || TREE_CODE (decl_op1) == CONST_DECL)
29190                   && (TREE_CODE (decl_op0) == VAR_DECL
29191                       || TREE_CODE (decl_op0) == CONST_DECL))
29192                 return (get_variable_section (decl_op1, false)
29193                         != get_variable_section (decl_op0, false));
29194
29195               if (TREE_CODE (decl_op1) == LABEL_DECL
29196                   && TREE_CODE (decl_op0) == LABEL_DECL)
29197                 return (DECL_CONTEXT (decl_op1)
29198                         != DECL_CONTEXT (decl_op0));
29199             }
29200
29201           return true;
29202         }
29203     }
29204
29205   return false;
29206 }
29207
29208 /* return TRUE if x is a reference to a value in a constant pool */
29209 extern bool
29210 arm_is_constant_pool_ref (rtx x)
29211 {
29212   return (MEM_P (x)
29213           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29214           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29215 }
29216
29217 /* If MEM is in the form of [base+offset], extract the two parts
29218    of address and set to BASE and OFFSET, otherwise return false
29219    after clearing BASE and OFFSET.  */
29220
29221 static bool
29222 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29223 {
29224   rtx addr;
29225
29226   gcc_assert (MEM_P (mem));
29227
29228   addr = XEXP (mem, 0);
29229
29230   /* Strip off const from addresses like (const (addr)).  */
29231   if (GET_CODE (addr) == CONST)
29232     addr = XEXP (addr, 0);
29233
29234   if (GET_CODE (addr) == REG)
29235     {
29236       *base = addr;
29237       *offset = const0_rtx;
29238       return true;
29239     }
29240
29241   if (GET_CODE (addr) == PLUS
29242       && GET_CODE (XEXP (addr, 0)) == REG
29243       && CONST_INT_P (XEXP (addr, 1)))
29244     {
29245       *base = XEXP (addr, 0);
29246       *offset = XEXP (addr, 1);
29247       return true;
29248     }
29249
29250   *base = NULL_RTX;
29251   *offset = NULL_RTX;
29252
29253   return false;
29254 }
29255
29256 /* If INSN is a load or store of address in the form of [base+offset],
29257    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
29258    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
29259    otherwise return FALSE.  */
29260
29261 static bool
29262 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29263 {
29264   rtx x, dest, src;
29265
29266   gcc_assert (INSN_P (insn));
29267   x = PATTERN (insn);
29268   if (GET_CODE (x) != SET)
29269     return false;
29270
29271   src = SET_SRC (x);
29272   dest = SET_DEST (x);
29273   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29274     {
29275       *is_load = false;
29276       extract_base_offset_in_addr (dest, base, offset);
29277     }
29278   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29279     {
29280       *is_load = true;
29281       extract_base_offset_in_addr (src, base, offset);
29282     }
29283   else
29284     return false;
29285
29286   return (*base != NULL_RTX && *offset != NULL_RTX);
29287 }
29288
29289 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29290
29291    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29292    and PRI are only calculated for these instructions.  For other instruction,
29293    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
29294    instruction fusion can be supported by returning different priorities.
29295
29296    It's important that irrelevant instructions get the largest FUSION_PRI.  */
29297
29298 static void
29299 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29300                            int *fusion_pri, int *pri)
29301 {
29302   int tmp, off_val;
29303   bool is_load;
29304   rtx base, offset;
29305
29306   gcc_assert (INSN_P (insn));
29307
29308   tmp = max_pri - 1;
29309   if (!fusion_load_store (insn, &base, &offset, &is_load))
29310     {
29311       *pri = tmp;
29312       *fusion_pri = tmp;
29313       return;
29314     }
29315
29316   /* Load goes first.  */
29317   if (is_load)
29318     *fusion_pri = tmp - 1;
29319   else
29320     *fusion_pri = tmp - 2;
29321
29322   tmp /= 2;
29323
29324   /* INSN with smaller base register goes first.  */
29325   tmp -= ((REGNO (base) & 0xff) << 20);
29326
29327   /* INSN with smaller offset goes first.  */
29328   off_val = (int)(INTVAL (offset));
29329   if (off_val >= 0)
29330     tmp -= (off_val & 0xfffff);
29331   else
29332     tmp += ((- off_val) & 0xfffff);
29333
29334   *pri = tmp;
29335   return;
29336 }
29337 #include "gt-arm.h"