gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2015 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "hash-table.h"
  27 #include "tm.h"
  28 #include "rtl.h"
  29 #include "hash-set.h"
  30 #include "machmode.h"
  31 #include "vec.h"
  32 #include "double-int.h"
  33 #include "input.h"
  34 #include "alias.h"
  35 #include "symtab.h"
  36 #include "wide-int.h"
  37 #include "inchash.h"
  38 #include "tree.h"
  39 #include "fold-const.h"
  40 #include "stringpool.h"
  41 #include "stor-layout.h"
  42 #include "calls.h"
  43 #include "varasm.h"
  44 #include "obstack.h"
  45 #include "regs.h"
  46 #include "hard-reg-set.h"
  47 #include "insn-config.h"
  48 #include "conditions.h"
  49 #include "output.h"
  50 #include "insn-attr.h"
  51 #include "flags.h"
  52 #include "reload.h"
  53 #include "function.h"
  54 #include "hashtab.h"
  55 #include "statistics.h"
  56 #include "real.h"
  57 #include "fixed-value.h"
  58 #include "expmed.h"
  59 #include "dojump.h"
  60 #include "explow.h"
  61 #include "emit-rtl.h"
  62 #include "stmt.h"
  63 #include "expr.h"
  64 #include "insn-codes.h"
  65 #include "optabs.h"
  66 #include "diagnostic-core.h"
  67 #include "recog.h"
  68 #include "predict.h"
  69 #include "dominance.h"
  70 #include "cfg.h"
  71 #include "cfgrtl.h"
  72 #include "cfganal.h"
  73 #include "lcm.h"
  74 #include "cfgbuild.h"
  75 #include "cfgcleanup.h"
  76 #include "basic-block.h"
  77 #include "hash-map.h"
  78 #include "is-a.h"
  79 #include "plugin-api.h"
  80 #include "ipa-ref.h"
  81 #include "cgraph.h"
  82 #include "ggc.h"
  83 #include "except.h"
  84 #include "tm_p.h"
  85 #include "target.h"
  86 #include "sched-int.h"
  87 #include "target-def.h"
  88 #include "debug.h"
  89 #include "langhooks.h"
  90 #include "df.h"
  91 #include "intl.h"
  92 #include "libfuncs.h"
  93 #include "params.h"
  94 #include "opts.h"
  95 #include "dumpfile.h"
  96 #include "gimple-expr.h"
  97 #include "builtins.h"
  98 #include "tm-constrs.h"
  99 #include "rtl-iter.h"
 100 #include "sched-int.h"
 101
 102 /* Forward definitions of types.  */
 103 typedef struct minipool_node    Mnode;
 104 typedef struct minipool_fixup   Mfix;
 105
 106 void (*arm_lang_output_object_attributes_hook)(void);
 107
 108 struct four_ints
 109 {
 110   int i[4];
 111 };
 112
 113 /* Forward function declarations.  */
 114 static bool arm_const_not_ok_for_debug_p (rtx);
 115 static bool arm_needs_doubleword_align (machine_mode, const_tree);
 116 static int arm_compute_static_chain_stack_bytes (void);
 117 static arm_stack_offsets *arm_get_frame_offsets (void);
 118 static void arm_add_gc_roots (void);
 119 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 120                              HOST_WIDE_INT, rtx, rtx, int, int);
 121 static unsigned bit_count (unsigned long);
 122 static int arm_address_register_rtx_p (rtx, int);
 123 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 124 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 125 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 126 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 127 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 128 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 129 inline static int thumb1_index_register_rtx_p (rtx, int);
 130 static int thumb_far_jump_used_p (void);
 131 static bool thumb_force_lr_save (void);
 132 static unsigned arm_size_return_regs (void);
 133 static bool arm_assemble_integer (rtx, unsigned int, int);
 134 static void arm_print_operand (FILE *, rtx, int);
 135 static void arm_print_operand_address (FILE *, rtx);
 136 static bool arm_print_operand_punct_valid_p (unsigned char code);
 137 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 138 static arm_cc get_arm_condition_code (rtx);
 139 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
 140 static const char *output_multi_immediate (rtx *, const char *, const char *,
 141                                            int, HOST_WIDE_INT);
 142 static const char *shift_op (rtx, HOST_WIDE_INT *);
 143 static struct machine_function *arm_init_machine_status (void);
 144 static void thumb_exit (FILE *, int);
 145 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 146 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 147 static Mnode *add_minipool_forward_ref (Mfix *);
 148 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 149 static Mnode *add_minipool_backward_ref (Mfix *);
 150 static void assign_minipool_offsets (Mfix *);
 151 static void arm_print_value (FILE *, rtx);
 152 static void dump_minipool (rtx_insn *);
 153 static int arm_barrier_cost (rtx_insn *);
 154 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 155 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 156 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 157                                machine_mode, rtx);
 158 static void arm_reorg (void);
 159 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 160 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 161 static unsigned long arm_compute_save_reg_mask (void);
 162 static unsigned long arm_isr_value (tree);
 163 static unsigned long arm_compute_func_type (void);
 164 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 165 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 166 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 167 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 168 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 169 #endif
 170 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
 171 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
 172 static int arm_comp_type_attributes (const_tree, const_tree);
 173 static void arm_set_default_type_attributes (tree);
 174 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
 175 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 176 static int optimal_immediate_sequence (enum rtx_code code,
 177                                        unsigned HOST_WIDE_INT val,
 178                                        struct four_ints *return_sequence);
 179 static int optimal_immediate_sequence_1 (enum rtx_code code,
 180                                          unsigned HOST_WIDE_INT val,
 181                                          struct four_ints *return_sequence,
 182                                          int i);
 183 static int arm_get_strip_length (int);
 184 static bool arm_function_ok_for_sibcall (tree, tree);
 185 static machine_mode arm_promote_function_mode (const_tree,
 186                                                     machine_mode, int *,
 187                                                     const_tree, int);
 188 static bool arm_return_in_memory (const_tree, const_tree);
 189 static rtx arm_function_value (const_tree, const_tree, bool);
 190 static rtx arm_libcall_value_1 (machine_mode);
 191 static rtx arm_libcall_value (machine_mode, const_rtx);
 192 static bool arm_function_value_regno_p (const unsigned int);
 193 static void arm_internal_label (FILE *, const char *, unsigned long);
 194 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 195                                  tree);
 196 static bool arm_have_conditional_execution (void);
 197 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 198 static bool arm_legitimate_constant_p (machine_mode, rtx);
 199 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
 200 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
 201 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 202 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 203 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 204 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 205 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
 206 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 207 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 208 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 209 static void emit_constant_insn (rtx cond, rtx pattern);
 210 static rtx_insn *emit_set_insn (rtx, rtx);
 211 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 212 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 213                                   tree, bool);
 214 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 215                              const_tree, bool);
 216 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 217                                       const_tree, bool);
 218 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 219 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 220                                       const_tree);
 221 static rtx aapcs_libcall_value (machine_mode);
 222 static int aapcs_select_return_coproc (const_tree, const_tree);
 223
 224 #ifdef OBJECT_FORMAT_ELF
 225 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 226 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 227 #endif
 228 #ifndef ARM_PE
 229 static void arm_encode_section_info (tree, rtx, int);
 230 #endif
 231
 232 static void arm_file_end (void);
 233 static void arm_file_start (void);
 234
 235 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 236                                         tree, int *, int);
 237 static bool arm_pass_by_reference (cumulative_args_t,
 238                                    machine_mode, const_tree, bool);
 239 static bool arm_promote_prototypes (const_tree);
 240 static bool arm_default_short_enums (void);
 241 static bool arm_align_anon_bitfield (void);
 242 static bool arm_return_in_msb (const_tree);
 243 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 244 static bool arm_return_in_memory (const_tree, const_tree);
 245 #if ARM_UNWIND_INFO
 246 static void arm_unwind_emit (FILE *, rtx_insn *);
 247 static bool arm_output_ttype (rtx);
 248 static void arm_asm_emit_except_personality (rtx);
 249 static void arm_asm_init_sections (void);
 250 #endif
 251 static rtx arm_dwarf_register_span (rtx);
 252
 253 static tree arm_cxx_guard_type (void);
 254 static bool arm_cxx_guard_mask_bit (void);
 255 static tree arm_get_cookie_size (tree);
 256 static bool arm_cookie_has_size (void);
 257 static bool arm_cxx_cdtor_returns_this (void);
 258 static bool arm_cxx_key_method_may_be_inline (void);
 259 static void arm_cxx_determine_class_data_visibility (tree);
 260 static bool arm_cxx_class_data_always_comdat (void);
 261 static bool arm_cxx_use_aeabi_atexit (void);
 262 static void arm_init_libfuncs (void);
 263 static tree arm_build_builtin_va_list (void);
 264 static void arm_expand_builtin_va_start (tree, rtx);
 265 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 266 static void arm_option_override (void);
 267 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 268 static bool arm_macro_fusion_p (void);
 269 static bool arm_cannot_copy_insn_p (rtx_insn *);
 270 static int arm_issue_rate (void);
 271 static int arm_first_cycle_multipass_dfa_lookahead (void);
 272 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 273 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 274 static bool arm_output_addr_const_extra (FILE *, rtx);
 275 static bool arm_allocate_stack_slots_for_args (void);
 276 static bool arm_warn_func_return (tree);
 277 static const char *arm_invalid_parameter_type (const_tree t);
 278 static const char *arm_invalid_return_type (const_tree t);
 279 static tree arm_promoted_type (const_tree t);
 280 static tree arm_convert_to_type (tree type, tree expr);
 281 static bool arm_scalar_mode_supported_p (machine_mode);
 282 static bool arm_frame_pointer_required (void);
 283 static bool arm_can_eliminate (const int, const int);
 284 static void arm_asm_trampoline_template (FILE *);
 285 static void arm_trampoline_init (rtx, tree, rtx);
 286 static rtx arm_trampoline_adjust_address (rtx);
 287 static rtx arm_pic_static_addr (rtx orig, rtx reg);
 288 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 289 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 290 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 291 static bool arm_array_mode_supported_p (machine_mode,
 292                                         unsigned HOST_WIDE_INT);
 293 static machine_mode arm_preferred_simd_mode (machine_mode);
 294 static bool arm_class_likely_spilled_p (reg_class_t);
 295 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 296 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 297 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 298                                                      const_tree type,
 299                                                      int misalignment,
 300                                                      bool is_packed);
 301 static void arm_conditional_register_usage (void);
 302 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 303 static unsigned int arm_autovectorize_vector_sizes (void);
 304 static int arm_default_branch_cost (bool, bool);
 305 static int arm_cortex_a5_branch_cost (bool, bool);
 306 static int arm_cortex_m_branch_cost (bool, bool);
 307 static int arm_cortex_m7_branch_cost (bool, bool);
 308
 309 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
 310                                              const unsigned char *sel);
 311
 312 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 313
 314 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 315                                            tree vectype,
 316                                            int misalign ATTRIBUTE_UNUSED);
 317 static unsigned arm_add_stmt_cost (void *data, int count,
 318                                    enum vect_cost_for_stmt kind,
 319                                    struct _stmt_vec_info *stmt_info,
 320                                    int misalign,
 321                                    enum vect_cost_model_location where);
 322
 323 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 324                                          bool op0_preserve_value);
 325 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 326
 327 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 328 \f
 329 /* Table of machine attributes.  */
 330 static const struct attribute_spec arm_attribute_table[] =
 331 {
 332   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 333        affects_type_identity } */
 334   /* Function calls made to this symbol must be done indirectly, because
 335      it may lie outside of the 26 bit addressing range of a normal function
 336      call.  */
 337   { "long_call",    0, 0, false, true,  true,  NULL, false },
 338   /* Whereas these functions are always known to reside within the 26 bit
 339      addressing range.  */
 340   { "short_call",   0, 0, false, true,  true,  NULL, false },
 341   /* Specify the procedure call conventions for a function.  */
 342   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 343     false },
 344   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 345   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 346     false },
 347   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 348     false },
 349   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 350     false },
 351 #ifdef ARM_PE
 352   /* ARM/PE has three new attributes:
 353      interfacearm - ?
 354      dllexport - for exporting a function/variable that will live in a dll
 355      dllimport - for importing a function/variable from a dll
 356
 357      Microsoft allows multiple declspecs in one __declspec, separating
 358      them with spaces.  We do NOT support this.  Instead, use __declspec
 359      multiple times.
 360   */
 361   { "dllimport",    0, 0, true,  false, false, NULL, false },
 362   { "dllexport",    0, 0, true,  false, false, NULL, false },
 363   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 364     false },
 365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 366   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 367   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 368   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 369     false },
 370 #endif
 371   { NULL,           0, 0, false, false, false, NULL, false }
 372 };
 373 \f
 374 /* Initialize the GCC target structure.  */
 375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 376 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 377 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 378 #endif
 379
 380 #undef TARGET_LEGITIMIZE_ADDRESS
 381 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 382
 383 #undef TARGET_LRA_P
 384 #define TARGET_LRA_P hook_bool_void_true
 385
 386 #undef  TARGET_ATTRIBUTE_TABLE
 387 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 388
 389 #undef TARGET_ASM_FILE_START
 390 #define TARGET_ASM_FILE_START arm_file_start
 391 #undef TARGET_ASM_FILE_END
 392 #define TARGET_ASM_FILE_END arm_file_end
 393
 394 #undef  TARGET_ASM_ALIGNED_SI_OP
 395 #define TARGET_ASM_ALIGNED_SI_OP NULL
 396 #undef  TARGET_ASM_INTEGER
 397 #define TARGET_ASM_INTEGER arm_assemble_integer
 398
 399 #undef TARGET_PRINT_OPERAND
 400 #define TARGET_PRINT_OPERAND arm_print_operand
 401 #undef TARGET_PRINT_OPERAND_ADDRESS
 402 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 403 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 404 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 405
 406 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 407 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 408
 409 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 410 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 411
 412 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 413 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 414
 415 #undef  TARGET_OPTION_OVERRIDE
 416 #define TARGET_OPTION_OVERRIDE arm_option_override
 417
 418 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 419 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 420
 421 #undef TARGET_SCHED_MACRO_FUSION_P
 422 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 423
 424 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 425 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 426
 427 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 428 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 429
 430 #undef  TARGET_SCHED_ADJUST_COST
 431 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 432
 433 #undef TARGET_SCHED_REORDER
 434 #define TARGET_SCHED_REORDER arm_sched_reorder
 435
 436 #undef TARGET_REGISTER_MOVE_COST
 437 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 438
 439 #undef TARGET_MEMORY_MOVE_COST
 440 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 441
 442 #undef TARGET_ENCODE_SECTION_INFO
 443 #ifdef ARM_PE
 444 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 445 #else
 446 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 447 #endif
 448
 449 #undef  TARGET_STRIP_NAME_ENCODING
 450 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 451
 452 #undef  TARGET_ASM_INTERNAL_LABEL
 453 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 454
 455 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 456 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 457
 458 #undef  TARGET_FUNCTION_VALUE
 459 #define TARGET_FUNCTION_VALUE arm_function_value
 460
 461 #undef  TARGET_LIBCALL_VALUE
 462 #define TARGET_LIBCALL_VALUE arm_libcall_value
 463
 464 #undef TARGET_FUNCTION_VALUE_REGNO_P
 465 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 466
 467 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 468 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 469 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
 471
 472 #undef  TARGET_RTX_COSTS
 473 #define TARGET_RTX_COSTS arm_rtx_costs
 474 #undef  TARGET_ADDRESS_COST
 475 #define TARGET_ADDRESS_COST arm_address_cost
 476
 477 #undef TARGET_SHIFT_TRUNCATION_MASK
 478 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 479 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 480 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 481 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 482 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 483 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 484 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 485 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 486 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 487   arm_autovectorize_vector_sizes
 488
 489 #undef  TARGET_MACHINE_DEPENDENT_REORG
 490 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 491
 492 #undef  TARGET_INIT_BUILTINS
 493 #define TARGET_INIT_BUILTINS  arm_init_builtins
 494 #undef  TARGET_EXPAND_BUILTIN
 495 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 496 #undef  TARGET_BUILTIN_DECL
 497 #define TARGET_BUILTIN_DECL arm_builtin_decl
 498
 499 #undef TARGET_INIT_LIBFUNCS
 500 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 501
 502 #undef TARGET_PROMOTE_FUNCTION_MODE
 503 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 504 #undef TARGET_PROMOTE_PROTOTYPES
 505 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 506 #undef TARGET_PASS_BY_REFERENCE
 507 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 508 #undef TARGET_ARG_PARTIAL_BYTES
 509 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 510 #undef TARGET_FUNCTION_ARG
 511 #define TARGET_FUNCTION_ARG arm_function_arg
 512 #undef TARGET_FUNCTION_ARG_ADVANCE
 513 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 514 #undef TARGET_FUNCTION_ARG_BOUNDARY
 515 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 516
 517 #undef  TARGET_SETUP_INCOMING_VARARGS
 518 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 519
 520 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 521 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 522
 523 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 524 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 525 #undef TARGET_TRAMPOLINE_INIT
 526 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 527 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 528 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 529
 530 #undef TARGET_WARN_FUNC_RETURN
 531 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 532
 533 #undef TARGET_DEFAULT_SHORT_ENUMS
 534 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 535
 536 #undef TARGET_ALIGN_ANON_BITFIELD
 537 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 538
 539 #undef TARGET_NARROW_VOLATILE_BITFIELD
 540 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 541
 542 #undef TARGET_CXX_GUARD_TYPE
 543 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 544
 545 #undef TARGET_CXX_GUARD_MASK_BIT
 546 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 547
 548 #undef TARGET_CXX_GET_COOKIE_SIZE
 549 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 550
 551 #undef TARGET_CXX_COOKIE_HAS_SIZE
 552 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 553
 554 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 555 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 556
 557 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 558 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 559
 560 #undef TARGET_CXX_USE_AEABI_ATEXIT
 561 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 562
 563 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 564 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 565   arm_cxx_determine_class_data_visibility
 566
 567 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 568 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 569
 570 #undef TARGET_RETURN_IN_MSB
 571 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 572
 573 #undef TARGET_RETURN_IN_MEMORY
 574 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 575
 576 #undef TARGET_MUST_PASS_IN_STACK
 577 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 578
 579 #if ARM_UNWIND_INFO
 580 #undef TARGET_ASM_UNWIND_EMIT
 581 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 582
 583 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 584 #undef TARGET_ASM_TTYPE
 585 #define TARGET_ASM_TTYPE arm_output_ttype
 586
 587 #undef TARGET_ARM_EABI_UNWINDER
 588 #define TARGET_ARM_EABI_UNWINDER true
 589
 590 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 591 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 592
 593 #undef TARGET_ASM_INIT_SECTIONS
 594 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 595 #endif /* ARM_UNWIND_INFO */
 596
 597 #undef TARGET_DWARF_REGISTER_SPAN
 598 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 599
 600 #undef  TARGET_CANNOT_COPY_INSN_P
 601 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 602
 603 #ifdef HAVE_AS_TLS
 604 #undef TARGET_HAVE_TLS
 605 #define TARGET_HAVE_TLS true
 606 #endif
 607
 608 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 609 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 610
 611 #undef TARGET_LEGITIMATE_CONSTANT_P
 612 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 613
 614 #undef TARGET_CANNOT_FORCE_CONST_MEM
 615 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 616
 617 #undef TARGET_MAX_ANCHOR_OFFSET
 618 #define TARGET_MAX_ANCHOR_OFFSET 4095
 619
 620 /* The minimum is set such that the total size of the block
 621    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 622    divisible by eight, ensuring natural spacing of anchors.  */
 623 #undef TARGET_MIN_ANCHOR_OFFSET
 624 #define TARGET_MIN_ANCHOR_OFFSET -4088
 625
 626 #undef TARGET_SCHED_ISSUE_RATE
 627 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 628
 629 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 630 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 631   arm_first_cycle_multipass_dfa_lookahead
 632
 633 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 634 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 635   arm_first_cycle_multipass_dfa_lookahead_guard
 636
 637 #undef TARGET_MANGLE_TYPE
 638 #define TARGET_MANGLE_TYPE arm_mangle_type
 639
 640 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 641 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 642
 643 #undef TARGET_BUILD_BUILTIN_VA_LIST
 644 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 645 #undef TARGET_EXPAND_BUILTIN_VA_START
 646 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 647 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 648 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 649
 650 #ifdef HAVE_AS_TLS
 651 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 652 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 653 #endif
 654
 655 #undef TARGET_LEGITIMATE_ADDRESS_P
 656 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 657
 658 #undef TARGET_PREFERRED_RELOAD_CLASS
 659 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 660
 661 #undef TARGET_INVALID_PARAMETER_TYPE
 662 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
 663
 664 #undef TARGET_INVALID_RETURN_TYPE
 665 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
 666
 667 #undef TARGET_PROMOTED_TYPE
 668 #define TARGET_PROMOTED_TYPE arm_promoted_type
 669
 670 #undef TARGET_CONVERT_TO_TYPE
 671 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
 672
 673 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 674 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 675
 676 #undef TARGET_FRAME_POINTER_REQUIRED
 677 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 678
 679 #undef TARGET_CAN_ELIMINATE
 680 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 681
 682 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 683 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 684
 685 #undef TARGET_CLASS_LIKELY_SPILLED_P
 686 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 687
 688 #undef TARGET_VECTORIZE_BUILTINS
 689 #define TARGET_VECTORIZE_BUILTINS
 690
 691 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 692 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 693   arm_builtin_vectorized_function
 694
 695 #undef TARGET_VECTOR_ALIGNMENT
 696 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 697
 698 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 699 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 700   arm_vector_alignment_reachable
 701
 702 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 703 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 704   arm_builtin_support_vector_misalignment
 705
 706 #undef TARGET_PREFERRED_RENAME_CLASS
 707 #define TARGET_PREFERRED_RENAME_CLASS \
 708   arm_preferred_rename_class
 709
 710 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 711 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 712   arm_vectorize_vec_perm_const_ok
 713
 714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 716   arm_builtin_vectorization_cost
 717 #undef TARGET_VECTORIZE_ADD_STMT_COST
 718 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 719
 720 #undef TARGET_CANONICALIZE_COMPARISON
 721 #define TARGET_CANONICALIZE_COMPARISON \
 722   arm_canonicalize_comparison
 723
 724 #undef TARGET_ASAN_SHADOW_OFFSET
 725 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 726
 727 #undef MAX_INSN_PER_IT_BLOCK
 728 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 729
 730 #undef TARGET_CAN_USE_DOLOOP_P
 731 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 732
 733 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 734 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 735
 736 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 737 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 738
 739 #undef TARGET_SCHED_FUSION_PRIORITY
 740 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 741
 742 struct gcc_target targetm = TARGET_INITIALIZER;
 743 \f
 744 /* Obstack for minipool constant handling.  */
 745 static struct obstack minipool_obstack;
 746 static char *         minipool_startobj;
 747
 748 /* The maximum number of insns skipped which
 749    will be conditionalised if possible.  */
 750 static int max_insns_skipped = 5;
 751
 752 extern FILE * asm_out_file;
 753
 754 /* True if we are currently building a constant table.  */
 755 int making_const_table;
 756
 757 /* The processor for which instructions should be scheduled.  */
 758 enum processor_type arm_tune = arm_none;
 759
 760 /* The current tuning set.  */
 761 const struct tune_params *current_tune;
 762
 763 /* Which floating point hardware to schedule for.  */
 764 int arm_fpu_attr;
 765
 766 /* Which floating popint hardware to use.  */
 767 const struct arm_fpu_desc *arm_fpu_desc;
 768
 769 /* Used for Thumb call_via trampolines.  */
 770 rtx thumb_call_via_label[14];
 771 static int thumb_call_reg_needed;
 772
 773 /* The bits in this mask specify which
 774    instructions we are allowed to generate.  */
 775 unsigned long insn_flags = 0;
 776
 777 /* The bits in this mask specify which instruction scheduling options should
 778    be used.  */
 779 unsigned long tune_flags = 0;
 780
 781 /* The highest ARM architecture version supported by the
 782    target.  */
 783 enum base_architecture arm_base_arch = BASE_ARCH_0;
 784
 785 /* The following are used in the arm.md file as equivalents to bits
 786    in the above two flag variables.  */
 787
 788 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 789 int arm_arch3m = 0;
 790
 791 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 792 int arm_arch4 = 0;
 793
 794 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 795 int arm_arch4t = 0;
 796
 797 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 798 int arm_arch5 = 0;
 799
 800 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 801 int arm_arch5e = 0;
 802
 803 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 804 int arm_arch6 = 0;
 805
 806 /* Nonzero if this chip supports the ARM 6K extensions.  */
 807 int arm_arch6k = 0;
 808
 809 /* Nonzero if instructions present in ARMv6-M can be used.  */
 810 int arm_arch6m = 0;
 811
 812 /* Nonzero if this chip supports the ARM 7 extensions.  */
 813 int arm_arch7 = 0;
 814
 815 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 816 int arm_arch_notm = 0;
 817
 818 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 819 int arm_arch7em = 0;
 820
 821 /* Nonzero if instructions present in ARMv8 can be used.  */
 822 int arm_arch8 = 0;
 823
 824 /* Nonzero if this chip can benefit from load scheduling.  */
 825 int arm_ld_sched = 0;
 826
 827 /* Nonzero if this chip is a StrongARM.  */
 828 int arm_tune_strongarm = 0;
 829
 830 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 831 int arm_arch_iwmmxt = 0;
 832
 833 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 834 int arm_arch_iwmmxt2 = 0;
 835
 836 /* Nonzero if this chip is an XScale.  */
 837 int arm_arch_xscale = 0;
 838
 839 /* Nonzero if tuning for XScale  */
 840 int arm_tune_xscale = 0;
 841
 842 /* Nonzero if we want to tune for stores that access the write-buffer.
 843    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 844 int arm_tune_wbuf = 0;
 845
 846 /* Nonzero if tuning for Cortex-A9.  */
 847 int arm_tune_cortex_a9 = 0;
 848
 849 /* Nonzero if generating Thumb instructions.  */
 850 int thumb_code = 0;
 851
 852 /* Nonzero if generating Thumb-1 instructions.  */
 853 int thumb1_code = 0;
 854
 855 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 856    preprocessor.
 857    XXX This is a bit of a hack, it's intended to help work around
 858    problems in GLD which doesn't understand that armv5t code is
 859    interworking clean.  */
 860 int arm_cpp_interwork = 0;
 861
 862 /* Nonzero if chip supports Thumb 2.  */
 863 int arm_arch_thumb2;
 864
 865 /* Nonzero if chip supports integer division instruction.  */
 866 int arm_arch_arm_hwdiv;
 867 int arm_arch_thumb_hwdiv;
 868
 869 /* Nonzero if chip disallows volatile memory access in IT block.  */
 870 int arm_arch_no_volatile_ce;
 871
 872 /* Nonzero if we should use Neon to handle 64-bits operations rather
 873    than core registers.  */
 874 int prefer_neon_for_64bits = 0;
 875
 876 /* Nonzero if we shouldn't use literal pools.  */
 877 bool arm_disable_literal_pool = false;
 878
 879 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
 880    we must report the mode of the memory reference from
 881    TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
 882 machine_mode output_memory_reference_mode;
 883
 884 /* The register number to be used for the PIC offset register.  */
 885 unsigned arm_pic_register = INVALID_REGNUM;
 886
 887 enum arm_pcs arm_pcs_default;
 888
 889 /* For an explanation of these variables, see final_prescan_insn below.  */
 890 int arm_ccfsm_state;
 891 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 892 enum arm_cond_code arm_current_cc;
 893
 894 rtx arm_target_insn;
 895 int arm_target_label;
 896 /* The number of conditionally executed insns, including the current insn.  */
 897 int arm_condexec_count = 0;
 898 /* A bitmask specifying the patterns for the IT block.
 899    Zero means do not output an IT block before this insn. */
 900 int arm_condexec_mask = 0;
 901 /* The number of bits used in arm_condexec_mask.  */
 902 int arm_condexec_masklen = 0;
 903
 904 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 905 int arm_arch_crc = 0;
 906
 907 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 908 int arm_m_profile_small_mul = 0;
 909
 910 /* The condition codes of the ARM, and the inverse function.  */
 911 static const char * const arm_condition_codes[] =
 912 {
 913   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 914   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 915 };
 916
 917 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 918 int arm_regs_in_sequence[] =
 919 {
 920   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 921 };
 922
 923 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
 924 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 925
 926 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 927                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 928                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 929 \f
 930 /* Initialization code.  */
 931
 932 struct processors
 933 {
 934   const char *const name;
 935   enum processor_type core;
 936   const char *arch;
 937   enum base_architecture base_arch;
 938   const unsigned long flags;
 939   const struct tune_params *const tune;
 940 };
 941
 942
 943 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
 944 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
 945   prefetch_slots, \
 946   l1_size, \
 947   l1_line_size
 948
 949 /* arm generic vectorizer costs.  */
 950 static const
 951 struct cpu_vec_costs arm_default_vec_cost = {
 952   1,                                    /* scalar_stmt_cost.  */
 953   1,                                    /* scalar load_cost.  */
 954   1,                                    /* scalar_store_cost.  */
 955   1,                                    /* vec_stmt_cost.  */
 956   1,                                    /* vec_to_scalar_cost.  */
 957   1,                                    /* scalar_to_vec_cost.  */
 958   1,                                    /* vec_align_load_cost.  */
 959   1,                                    /* vec_unalign_load_cost.  */
 960   1,                                    /* vec_unalign_store_cost.  */
 961   1,                                    /* vec_store_cost.  */
 962   3,                                    /* cond_taken_branch_cost.  */
 963   1,                                    /* cond_not_taken_branch_cost.  */
 964 };
 965
 966 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
 967 #include "aarch-cost-tables.h"
 968
 969
 970
 971 const struct cpu_cost_table cortexa9_extra_costs =
 972 {
 973   /* ALU */
 974   {
 975     0,                  /* arith.  */
 976     0,                  /* logical.  */
 977     0,                  /* shift.  */
 978     COSTS_N_INSNS (1),  /* shift_reg.  */
 979     COSTS_N_INSNS (1),  /* arith_shift.  */
 980     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
 981     0,                  /* log_shift.  */
 982     COSTS_N_INSNS (1),  /* log_shift_reg.  */
 983     COSTS_N_INSNS (1),  /* extend.  */
 984     COSTS_N_INSNS (2),  /* extend_arith.  */
 985     COSTS_N_INSNS (1),  /* bfi.  */
 986     COSTS_N_INSNS (1),  /* bfx.  */
 987     0,                  /* clz.  */
 988     0,                  /* rev.  */
 989     0,                  /* non_exec.  */
 990     true                /* non_exec_costs_exec.  */
 991   },
 992   {
 993     /* MULT SImode */
 994     {
 995       COSTS_N_INSNS (3),        /* simple.  */
 996       COSTS_N_INSNS (3),        /* flag_setting.  */
 997       COSTS_N_INSNS (2),        /* extend.  */
 998       COSTS_N_INSNS (3),        /* add.  */
 999       COSTS_N_INSNS (2),        /* extend_add.  */
1000       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1001     },
1002     /* MULT DImode */
1003     {
1004       0,                        /* simple (N/A).  */
1005       0,                        /* flag_setting (N/A).  */
1006       COSTS_N_INSNS (4),        /* extend.  */
1007       0,                        /* add (N/A).  */
1008       COSTS_N_INSNS (4),        /* extend_add.  */
1009       0                         /* idiv (N/A).  */
1010     }
1011   },
1012   /* LD/ST */
1013   {
1014     COSTS_N_INSNS (2),  /* load.  */
1015     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1016     COSTS_N_INSNS (2),  /* ldrd.  */
1017     COSTS_N_INSNS (2),  /* ldm_1st.  */
1018     1,                  /* ldm_regs_per_insn_1st.  */
1019     2,                  /* ldm_regs_per_insn_subsequent.  */
1020     COSTS_N_INSNS (5),  /* loadf.  */
1021     COSTS_N_INSNS (5),  /* loadd.  */
1022     COSTS_N_INSNS (1),  /* load_unaligned.  */
1023     COSTS_N_INSNS (2),  /* store.  */
1024     COSTS_N_INSNS (2),  /* strd.  */
1025     COSTS_N_INSNS (2),  /* stm_1st.  */
1026     1,                  /* stm_regs_per_insn_1st.  */
1027     2,                  /* stm_regs_per_insn_subsequent.  */
1028     COSTS_N_INSNS (1),  /* storef.  */
1029     COSTS_N_INSNS (1),  /* stored.  */
1030     COSTS_N_INSNS (1)   /* store_unaligned.  */
1031   },
1032   {
1033     /* FP SFmode */
1034     {
1035       COSTS_N_INSNS (14),       /* div.  */
1036       COSTS_N_INSNS (4),        /* mult.  */
1037       COSTS_N_INSNS (7),        /* mult_addsub. */
1038       COSTS_N_INSNS (30),       /* fma.  */
1039       COSTS_N_INSNS (3),        /* addsub.  */
1040       COSTS_N_INSNS (1),        /* fpconst.  */
1041       COSTS_N_INSNS (1),        /* neg.  */
1042       COSTS_N_INSNS (3),        /* compare.  */
1043       COSTS_N_INSNS (3),        /* widen.  */
1044       COSTS_N_INSNS (3),        /* narrow.  */
1045       COSTS_N_INSNS (3),        /* toint.  */
1046       COSTS_N_INSNS (3),        /* fromint.  */
1047       COSTS_N_INSNS (3)         /* roundint.  */
1048     },
1049     /* FP DFmode */
1050     {
1051       COSTS_N_INSNS (24),       /* div.  */
1052       COSTS_N_INSNS (5),        /* mult.  */
1053       COSTS_N_INSNS (8),        /* mult_addsub.  */
1054       COSTS_N_INSNS (30),       /* fma.  */
1055       COSTS_N_INSNS (3),        /* addsub.  */
1056       COSTS_N_INSNS (1),        /* fpconst.  */
1057       COSTS_N_INSNS (1),        /* neg.  */
1058       COSTS_N_INSNS (3),        /* compare.  */
1059       COSTS_N_INSNS (3),        /* widen.  */
1060       COSTS_N_INSNS (3),        /* narrow.  */
1061       COSTS_N_INSNS (3),        /* toint.  */
1062       COSTS_N_INSNS (3),        /* fromint.  */
1063       COSTS_N_INSNS (3)         /* roundint.  */
1064     }
1065   },
1066   /* Vector */
1067   {
1068     COSTS_N_INSNS (1)   /* alu.  */
1069   }
1070 };
1071
1072 const struct cpu_cost_table cortexa8_extra_costs =
1073 {
1074   /* ALU */
1075   {
1076     0,                  /* arith.  */
1077     0,                  /* logical.  */
1078     COSTS_N_INSNS (1),  /* shift.  */
1079     0,                  /* shift_reg.  */
1080     COSTS_N_INSNS (1),  /* arith_shift.  */
1081     0,                  /* arith_shift_reg.  */
1082     COSTS_N_INSNS (1),  /* log_shift.  */
1083     0,                  /* log_shift_reg.  */
1084     0,                  /* extend.  */
1085     0,                  /* extend_arith.  */
1086     0,                  /* bfi.  */
1087     0,                  /* bfx.  */
1088     0,                  /* clz.  */
1089     0,                  /* rev.  */
1090     0,                  /* non_exec.  */
1091     true                /* non_exec_costs_exec.  */
1092   },
1093   {
1094     /* MULT SImode */
1095     {
1096       COSTS_N_INSNS (1),        /* simple.  */
1097       COSTS_N_INSNS (1),        /* flag_setting.  */
1098       COSTS_N_INSNS (1),        /* extend.  */
1099       COSTS_N_INSNS (1),        /* add.  */
1100       COSTS_N_INSNS (1),        /* extend_add.  */
1101       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1102     },
1103     /* MULT DImode */
1104     {
1105       0,                        /* simple (N/A).  */
1106       0,                        /* flag_setting (N/A).  */
1107       COSTS_N_INSNS (2),        /* extend.  */
1108       0,                        /* add (N/A).  */
1109       COSTS_N_INSNS (2),        /* extend_add.  */
1110       0                         /* idiv (N/A).  */
1111     }
1112   },
1113   /* LD/ST */
1114   {
1115     COSTS_N_INSNS (1),  /* load.  */
1116     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1117     COSTS_N_INSNS (1),  /* ldrd.  */
1118     COSTS_N_INSNS (1),  /* ldm_1st.  */
1119     1,                  /* ldm_regs_per_insn_1st.  */
1120     2,                  /* ldm_regs_per_insn_subsequent.  */
1121     COSTS_N_INSNS (1),  /* loadf.  */
1122     COSTS_N_INSNS (1),  /* loadd.  */
1123     COSTS_N_INSNS (1),  /* load_unaligned.  */
1124     COSTS_N_INSNS (1),  /* store.  */
1125     COSTS_N_INSNS (1),  /* strd.  */
1126     COSTS_N_INSNS (1),  /* stm_1st.  */
1127     1,                  /* stm_regs_per_insn_1st.  */
1128     2,                  /* stm_regs_per_insn_subsequent.  */
1129     COSTS_N_INSNS (1),  /* storef.  */
1130     COSTS_N_INSNS (1),  /* stored.  */
1131     COSTS_N_INSNS (1)   /* store_unaligned.  */
1132   },
1133   {
1134     /* FP SFmode */
1135     {
1136       COSTS_N_INSNS (36),       /* div.  */
1137       COSTS_N_INSNS (11),       /* mult.  */
1138       COSTS_N_INSNS (20),       /* mult_addsub. */
1139       COSTS_N_INSNS (30),       /* fma.  */
1140       COSTS_N_INSNS (9),        /* addsub.  */
1141       COSTS_N_INSNS (3),        /* fpconst.  */
1142       COSTS_N_INSNS (3),        /* neg.  */
1143       COSTS_N_INSNS (6),        /* compare.  */
1144       COSTS_N_INSNS (4),        /* widen.  */
1145       COSTS_N_INSNS (4),        /* narrow.  */
1146       COSTS_N_INSNS (8),        /* toint.  */
1147       COSTS_N_INSNS (8),        /* fromint.  */
1148       COSTS_N_INSNS (8)         /* roundint.  */
1149     },
1150     /* FP DFmode */
1151     {
1152       COSTS_N_INSNS (64),       /* div.  */
1153       COSTS_N_INSNS (16),       /* mult.  */
1154       COSTS_N_INSNS (25),       /* mult_addsub.  */
1155       COSTS_N_INSNS (30),       /* fma.  */
1156       COSTS_N_INSNS (9),        /* addsub.  */
1157       COSTS_N_INSNS (3),        /* fpconst.  */
1158       COSTS_N_INSNS (3),        /* neg.  */
1159       COSTS_N_INSNS (6),        /* compare.  */
1160       COSTS_N_INSNS (6),        /* widen.  */
1161       COSTS_N_INSNS (6),        /* narrow.  */
1162       COSTS_N_INSNS (8),        /* toint.  */
1163       COSTS_N_INSNS (8),        /* fromint.  */
1164       COSTS_N_INSNS (8)         /* roundint.  */
1165     }
1166   },
1167   /* Vector */
1168   {
1169     COSTS_N_INSNS (1)   /* alu.  */
1170   }
1171 };
1172
1173 const struct cpu_cost_table cortexa5_extra_costs =
1174 {
1175   /* ALU */
1176   {
1177     0,                  /* arith.  */
1178     0,                  /* logical.  */
1179     COSTS_N_INSNS (1),  /* shift.  */
1180     COSTS_N_INSNS (1),  /* shift_reg.  */
1181     COSTS_N_INSNS (1),  /* arith_shift.  */
1182     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1183     COSTS_N_INSNS (1),  /* log_shift.  */
1184     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1185     COSTS_N_INSNS (1),  /* extend.  */
1186     COSTS_N_INSNS (1),  /* extend_arith.  */
1187     COSTS_N_INSNS (1),  /* bfi.  */
1188     COSTS_N_INSNS (1),  /* bfx.  */
1189     COSTS_N_INSNS (1),  /* clz.  */
1190     COSTS_N_INSNS (1),  /* rev.  */
1191     0,                  /* non_exec.  */
1192     true                /* non_exec_costs_exec.  */
1193   },
1194
1195   {
1196     /* MULT SImode */
1197     {
1198       0,                        /* simple.  */
1199       COSTS_N_INSNS (1),        /* flag_setting.  */
1200       COSTS_N_INSNS (1),        /* extend.  */
1201       COSTS_N_INSNS (1),        /* add.  */
1202       COSTS_N_INSNS (1),        /* extend_add.  */
1203       COSTS_N_INSNS (7)         /* idiv.  */
1204     },
1205     /* MULT DImode */
1206     {
1207       0,                        /* simple (N/A).  */
1208       0,                        /* flag_setting (N/A).  */
1209       COSTS_N_INSNS (1),        /* extend.  */
1210       0,                        /* add.  */
1211       COSTS_N_INSNS (2),        /* extend_add.  */
1212       0                         /* idiv (N/A).  */
1213     }
1214   },
1215   /* LD/ST */
1216   {
1217     COSTS_N_INSNS (1),  /* load.  */
1218     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1219     COSTS_N_INSNS (6),  /* ldrd.  */
1220     COSTS_N_INSNS (1),  /* ldm_1st.  */
1221     1,                  /* ldm_regs_per_insn_1st.  */
1222     2,                  /* ldm_regs_per_insn_subsequent.  */
1223     COSTS_N_INSNS (2),  /* loadf.  */
1224     COSTS_N_INSNS (4),  /* loadd.  */
1225     COSTS_N_INSNS (1),  /* load_unaligned.  */
1226     COSTS_N_INSNS (1),  /* store.  */
1227     COSTS_N_INSNS (3),  /* strd.  */
1228     COSTS_N_INSNS (1),  /* stm_1st.  */
1229     1,                  /* stm_regs_per_insn_1st.  */
1230     2,                  /* stm_regs_per_insn_subsequent.  */
1231     COSTS_N_INSNS (2),  /* storef.  */
1232     COSTS_N_INSNS (2),  /* stored.  */
1233     COSTS_N_INSNS (1)   /* store_unaligned.  */
1234   },
1235   {
1236     /* FP SFmode */
1237     {
1238       COSTS_N_INSNS (15),       /* div.  */
1239       COSTS_N_INSNS (3),        /* mult.  */
1240       COSTS_N_INSNS (7),        /* mult_addsub. */
1241       COSTS_N_INSNS (7),        /* fma.  */
1242       COSTS_N_INSNS (3),        /* addsub.  */
1243       COSTS_N_INSNS (3),        /* fpconst.  */
1244       COSTS_N_INSNS (3),        /* neg.  */
1245       COSTS_N_INSNS (3),        /* compare.  */
1246       COSTS_N_INSNS (3),        /* widen.  */
1247       COSTS_N_INSNS (3),        /* narrow.  */
1248       COSTS_N_INSNS (3),        /* toint.  */
1249       COSTS_N_INSNS (3),        /* fromint.  */
1250       COSTS_N_INSNS (3)         /* roundint.  */
1251     },
1252     /* FP DFmode */
1253     {
1254       COSTS_N_INSNS (30),       /* div.  */
1255       COSTS_N_INSNS (6),        /* mult.  */
1256       COSTS_N_INSNS (10),       /* mult_addsub.  */
1257       COSTS_N_INSNS (7),        /* fma.  */
1258       COSTS_N_INSNS (3),        /* addsub.  */
1259       COSTS_N_INSNS (3),        /* fpconst.  */
1260       COSTS_N_INSNS (3),        /* neg.  */
1261       COSTS_N_INSNS (3),        /* compare.  */
1262       COSTS_N_INSNS (3),        /* widen.  */
1263       COSTS_N_INSNS (3),        /* narrow.  */
1264       COSTS_N_INSNS (3),        /* toint.  */
1265       COSTS_N_INSNS (3),        /* fromint.  */
1266       COSTS_N_INSNS (3)         /* roundint.  */
1267     }
1268   },
1269   /* Vector */
1270   {
1271     COSTS_N_INSNS (1)   /* alu.  */
1272   }
1273 };
1274
1275
1276 const struct cpu_cost_table cortexa7_extra_costs =
1277 {
1278   /* ALU */
1279   {
1280     0,                  /* arith.  */
1281     0,                  /* logical.  */
1282     COSTS_N_INSNS (1),  /* shift.  */
1283     COSTS_N_INSNS (1),  /* shift_reg.  */
1284     COSTS_N_INSNS (1),  /* arith_shift.  */
1285     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1286     COSTS_N_INSNS (1),  /* log_shift.  */
1287     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1288     COSTS_N_INSNS (1),  /* extend.  */
1289     COSTS_N_INSNS (1),  /* extend_arith.  */
1290     COSTS_N_INSNS (1),  /* bfi.  */
1291     COSTS_N_INSNS (1),  /* bfx.  */
1292     COSTS_N_INSNS (1),  /* clz.  */
1293     COSTS_N_INSNS (1),  /* rev.  */
1294     0,                  /* non_exec.  */
1295     true                /* non_exec_costs_exec.  */
1296   },
1297
1298   {
1299     /* MULT SImode */
1300     {
1301       0,                        /* simple.  */
1302       COSTS_N_INSNS (1),        /* flag_setting.  */
1303       COSTS_N_INSNS (1),        /* extend.  */
1304       COSTS_N_INSNS (1),        /* add.  */
1305       COSTS_N_INSNS (1),        /* extend_add.  */
1306       COSTS_N_INSNS (7)         /* idiv.  */
1307     },
1308     /* MULT DImode */
1309     {
1310       0,                        /* simple (N/A).  */
1311       0,                        /* flag_setting (N/A).  */
1312       COSTS_N_INSNS (1),        /* extend.  */
1313       0,                        /* add.  */
1314       COSTS_N_INSNS (2),        /* extend_add.  */
1315       0                         /* idiv (N/A).  */
1316     }
1317   },
1318   /* LD/ST */
1319   {
1320     COSTS_N_INSNS (1),  /* load.  */
1321     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1322     COSTS_N_INSNS (3),  /* ldrd.  */
1323     COSTS_N_INSNS (1),  /* ldm_1st.  */
1324     1,                  /* ldm_regs_per_insn_1st.  */
1325     2,                  /* ldm_regs_per_insn_subsequent.  */
1326     COSTS_N_INSNS (2),  /* loadf.  */
1327     COSTS_N_INSNS (2),  /* loadd.  */
1328     COSTS_N_INSNS (1),  /* load_unaligned.  */
1329     COSTS_N_INSNS (1),  /* store.  */
1330     COSTS_N_INSNS (3),  /* strd.  */
1331     COSTS_N_INSNS (1),  /* stm_1st.  */
1332     1,                  /* stm_regs_per_insn_1st.  */
1333     2,                  /* stm_regs_per_insn_subsequent.  */
1334     COSTS_N_INSNS (2),  /* storef.  */
1335     COSTS_N_INSNS (2),  /* stored.  */
1336     COSTS_N_INSNS (1)   /* store_unaligned.  */
1337   },
1338   {
1339     /* FP SFmode */
1340     {
1341       COSTS_N_INSNS (15),       /* div.  */
1342       COSTS_N_INSNS (3),        /* mult.  */
1343       COSTS_N_INSNS (7),        /* mult_addsub. */
1344       COSTS_N_INSNS (7),        /* fma.  */
1345       COSTS_N_INSNS (3),        /* addsub.  */
1346       COSTS_N_INSNS (3),        /* fpconst.  */
1347       COSTS_N_INSNS (3),        /* neg.  */
1348       COSTS_N_INSNS (3),        /* compare.  */
1349       COSTS_N_INSNS (3),        /* widen.  */
1350       COSTS_N_INSNS (3),        /* narrow.  */
1351       COSTS_N_INSNS (3),        /* toint.  */
1352       COSTS_N_INSNS (3),        /* fromint.  */
1353       COSTS_N_INSNS (3)         /* roundint.  */
1354     },
1355     /* FP DFmode */
1356     {
1357       COSTS_N_INSNS (30),       /* div.  */
1358       COSTS_N_INSNS (6),        /* mult.  */
1359       COSTS_N_INSNS (10),       /* mult_addsub.  */
1360       COSTS_N_INSNS (7),        /* fma.  */
1361       COSTS_N_INSNS (3),        /* addsub.  */
1362       COSTS_N_INSNS (3),        /* fpconst.  */
1363       COSTS_N_INSNS (3),        /* neg.  */
1364       COSTS_N_INSNS (3),        /* compare.  */
1365       COSTS_N_INSNS (3),        /* widen.  */
1366       COSTS_N_INSNS (3),        /* narrow.  */
1367       COSTS_N_INSNS (3),        /* toint.  */
1368       COSTS_N_INSNS (3),        /* fromint.  */
1369       COSTS_N_INSNS (3)         /* roundint.  */
1370     }
1371   },
1372   /* Vector */
1373   {
1374     COSTS_N_INSNS (1)   /* alu.  */
1375   }
1376 };
1377
1378 const struct cpu_cost_table cortexa12_extra_costs =
1379 {
1380   /* ALU */
1381   {
1382     0,                  /* arith.  */
1383     0,                  /* logical.  */
1384     0,                  /* shift.  */
1385     COSTS_N_INSNS (1),  /* shift_reg.  */
1386     COSTS_N_INSNS (1),  /* arith_shift.  */
1387     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1388     COSTS_N_INSNS (1),  /* log_shift.  */
1389     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1390     0,                  /* extend.  */
1391     COSTS_N_INSNS (1),  /* extend_arith.  */
1392     0,                  /* bfi.  */
1393     COSTS_N_INSNS (1),  /* bfx.  */
1394     COSTS_N_INSNS (1),  /* clz.  */
1395     COSTS_N_INSNS (1),  /* rev.  */
1396     0,                  /* non_exec.  */
1397     true                /* non_exec_costs_exec.  */
1398   },
1399   /* MULT SImode */
1400   {
1401     {
1402       COSTS_N_INSNS (2),        /* simple.  */
1403       COSTS_N_INSNS (3),        /* flag_setting.  */
1404       COSTS_N_INSNS (2),        /* extend.  */
1405       COSTS_N_INSNS (3),        /* add.  */
1406       COSTS_N_INSNS (2),        /* extend_add.  */
1407       COSTS_N_INSNS (18)        /* idiv.  */
1408     },
1409     /* MULT DImode */
1410     {
1411       0,                        /* simple (N/A).  */
1412       0,                        /* flag_setting (N/A).  */
1413       COSTS_N_INSNS (3),        /* extend.  */
1414       0,                        /* add (N/A).  */
1415       COSTS_N_INSNS (3),        /* extend_add.  */
1416       0                         /* idiv (N/A).  */
1417     }
1418   },
1419   /* LD/ST */
1420   {
1421     COSTS_N_INSNS (3),  /* load.  */
1422     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1423     COSTS_N_INSNS (3),  /* ldrd.  */
1424     COSTS_N_INSNS (3),  /* ldm_1st.  */
1425     1,                  /* ldm_regs_per_insn_1st.  */
1426     2,                  /* ldm_regs_per_insn_subsequent.  */
1427     COSTS_N_INSNS (3),  /* loadf.  */
1428     COSTS_N_INSNS (3),  /* loadd.  */
1429     0,                  /* load_unaligned.  */
1430     0,                  /* store.  */
1431     0,                  /* strd.  */
1432     0,                  /* stm_1st.  */
1433     1,                  /* stm_regs_per_insn_1st.  */
1434     2,                  /* stm_regs_per_insn_subsequent.  */
1435     COSTS_N_INSNS (2),  /* storef.  */
1436     COSTS_N_INSNS (2),  /* stored.  */
1437     0                   /* store_unaligned.  */
1438   },
1439   {
1440     /* FP SFmode */
1441     {
1442       COSTS_N_INSNS (17),       /* div.  */
1443       COSTS_N_INSNS (4),        /* mult.  */
1444       COSTS_N_INSNS (8),        /* mult_addsub. */
1445       COSTS_N_INSNS (8),        /* fma.  */
1446       COSTS_N_INSNS (4),        /* addsub.  */
1447       COSTS_N_INSNS (2),        /* fpconst. */
1448       COSTS_N_INSNS (2),        /* neg.  */
1449       COSTS_N_INSNS (2),        /* compare.  */
1450       COSTS_N_INSNS (4),        /* widen.  */
1451       COSTS_N_INSNS (4),        /* narrow.  */
1452       COSTS_N_INSNS (4),        /* toint.  */
1453       COSTS_N_INSNS (4),        /* fromint.  */
1454       COSTS_N_INSNS (4)         /* roundint.  */
1455     },
1456     /* FP DFmode */
1457     {
1458       COSTS_N_INSNS (31),       /* div.  */
1459       COSTS_N_INSNS (4),        /* mult.  */
1460       COSTS_N_INSNS (8),        /* mult_addsub.  */
1461       COSTS_N_INSNS (8),        /* fma.  */
1462       COSTS_N_INSNS (4),        /* addsub.  */
1463       COSTS_N_INSNS (2),        /* fpconst.  */
1464       COSTS_N_INSNS (2),        /* neg.  */
1465       COSTS_N_INSNS (2),        /* compare.  */
1466       COSTS_N_INSNS (4),        /* widen.  */
1467       COSTS_N_INSNS (4),        /* narrow.  */
1468       COSTS_N_INSNS (4),        /* toint.  */
1469       COSTS_N_INSNS (4),        /* fromint.  */
1470       COSTS_N_INSNS (4)         /* roundint.  */
1471     }
1472   },
1473   /* Vector */
1474   {
1475     COSTS_N_INSNS (1)   /* alu.  */
1476   }
1477 };
1478
1479 const struct cpu_cost_table cortexa15_extra_costs =
1480 {
1481   /* ALU */
1482   {
1483     0,                  /* arith.  */
1484     0,                  /* logical.  */
1485     0,                  /* shift.  */
1486     0,                  /* shift_reg.  */
1487     COSTS_N_INSNS (1),  /* arith_shift.  */
1488     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1489     COSTS_N_INSNS (1),  /* log_shift.  */
1490     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1491     0,                  /* extend.  */
1492     COSTS_N_INSNS (1),  /* extend_arith.  */
1493     COSTS_N_INSNS (1),  /* bfi.  */
1494     0,                  /* bfx.  */
1495     0,                  /* clz.  */
1496     0,                  /* rev.  */
1497     0,                  /* non_exec.  */
1498     true                /* non_exec_costs_exec.  */
1499   },
1500   /* MULT SImode */
1501   {
1502     {
1503       COSTS_N_INSNS (2),        /* simple.  */
1504       COSTS_N_INSNS (3),        /* flag_setting.  */
1505       COSTS_N_INSNS (2),        /* extend.  */
1506       COSTS_N_INSNS (2),        /* add.  */
1507       COSTS_N_INSNS (2),        /* extend_add.  */
1508       COSTS_N_INSNS (18)        /* idiv.  */
1509     },
1510     /* MULT DImode */
1511     {
1512       0,                        /* simple (N/A).  */
1513       0,                        /* flag_setting (N/A).  */
1514       COSTS_N_INSNS (3),        /* extend.  */
1515       0,                        /* add (N/A).  */
1516       COSTS_N_INSNS (3),        /* extend_add.  */
1517       0                         /* idiv (N/A).  */
1518     }
1519   },
1520   /* LD/ST */
1521   {
1522     COSTS_N_INSNS (3),  /* load.  */
1523     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1524     COSTS_N_INSNS (3),  /* ldrd.  */
1525     COSTS_N_INSNS (4),  /* ldm_1st.  */
1526     1,                  /* ldm_regs_per_insn_1st.  */
1527     2,                  /* ldm_regs_per_insn_subsequent.  */
1528     COSTS_N_INSNS (4),  /* loadf.  */
1529     COSTS_N_INSNS (4),  /* loadd.  */
1530     0,                  /* load_unaligned.  */
1531     0,                  /* store.  */
1532     0,                  /* strd.  */
1533     COSTS_N_INSNS (1),  /* stm_1st.  */
1534     1,                  /* stm_regs_per_insn_1st.  */
1535     2,                  /* stm_regs_per_insn_subsequent.  */
1536     0,                  /* storef.  */
1537     0,                  /* stored.  */
1538     0                   /* store_unaligned.  */
1539   },
1540   {
1541     /* FP SFmode */
1542     {
1543       COSTS_N_INSNS (17),       /* div.  */
1544       COSTS_N_INSNS (4),        /* mult.  */
1545       COSTS_N_INSNS (8),        /* mult_addsub. */
1546       COSTS_N_INSNS (8),        /* fma.  */
1547       COSTS_N_INSNS (4),        /* addsub.  */
1548       COSTS_N_INSNS (2),        /* fpconst. */
1549       COSTS_N_INSNS (2),        /* neg.  */
1550       COSTS_N_INSNS (5),        /* compare.  */
1551       COSTS_N_INSNS (4),        /* widen.  */
1552       COSTS_N_INSNS (4),        /* narrow.  */
1553       COSTS_N_INSNS (4),        /* toint.  */
1554       COSTS_N_INSNS (4),        /* fromint.  */
1555       COSTS_N_INSNS (4)         /* roundint.  */
1556     },
1557     /* FP DFmode */
1558     {
1559       COSTS_N_INSNS (31),       /* div.  */
1560       COSTS_N_INSNS (4),        /* mult.  */
1561       COSTS_N_INSNS (8),        /* mult_addsub.  */
1562       COSTS_N_INSNS (8),        /* fma.  */
1563       COSTS_N_INSNS (4),        /* addsub.  */
1564       COSTS_N_INSNS (2),        /* fpconst.  */
1565       COSTS_N_INSNS (2),        /* neg.  */
1566       COSTS_N_INSNS (2),        /* compare.  */
1567       COSTS_N_INSNS (4),        /* widen.  */
1568       COSTS_N_INSNS (4),        /* narrow.  */
1569       COSTS_N_INSNS (4),        /* toint.  */
1570       COSTS_N_INSNS (4),        /* fromint.  */
1571       COSTS_N_INSNS (4)         /* roundint.  */
1572     }
1573   },
1574   /* Vector */
1575   {
1576     COSTS_N_INSNS (1)   /* alu.  */
1577   }
1578 };
1579
1580 const struct cpu_cost_table v7m_extra_costs =
1581 {
1582   /* ALU */
1583   {
1584     0,                  /* arith.  */
1585     0,                  /* logical.  */
1586     0,                  /* shift.  */
1587     0,                  /* shift_reg.  */
1588     0,                  /* arith_shift.  */
1589     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1590     0,                  /* log_shift.  */
1591     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1592     0,                  /* extend.  */
1593     COSTS_N_INSNS (1),  /* extend_arith.  */
1594     0,                  /* bfi.  */
1595     0,                  /* bfx.  */
1596     0,                  /* clz.  */
1597     0,                  /* rev.  */
1598     COSTS_N_INSNS (1),  /* non_exec.  */
1599     false               /* non_exec_costs_exec.  */
1600   },
1601   {
1602     /* MULT SImode */
1603     {
1604       COSTS_N_INSNS (1),        /* simple.  */
1605       COSTS_N_INSNS (1),        /* flag_setting.  */
1606       COSTS_N_INSNS (2),        /* extend.  */
1607       COSTS_N_INSNS (1),        /* add.  */
1608       COSTS_N_INSNS (3),        /* extend_add.  */
1609       COSTS_N_INSNS (8)         /* idiv.  */
1610     },
1611     /* MULT DImode */
1612     {
1613       0,                        /* simple (N/A).  */
1614       0,                        /* flag_setting (N/A).  */
1615       COSTS_N_INSNS (2),        /* extend.  */
1616       0,                        /* add (N/A).  */
1617       COSTS_N_INSNS (3),        /* extend_add.  */
1618       0                         /* idiv (N/A).  */
1619     }
1620   },
1621   /* LD/ST */
1622   {
1623     COSTS_N_INSNS (2),  /* load.  */
1624     0,                  /* load_sign_extend.  */
1625     COSTS_N_INSNS (3),  /* ldrd.  */
1626     COSTS_N_INSNS (2),  /* ldm_1st.  */
1627     1,                  /* ldm_regs_per_insn_1st.  */
1628     1,                  /* ldm_regs_per_insn_subsequent.  */
1629     COSTS_N_INSNS (2),  /* loadf.  */
1630     COSTS_N_INSNS (3),  /* loadd.  */
1631     COSTS_N_INSNS (1),  /* load_unaligned.  */
1632     COSTS_N_INSNS (2),  /* store.  */
1633     COSTS_N_INSNS (3),  /* strd.  */
1634     COSTS_N_INSNS (2),  /* stm_1st.  */
1635     1,                  /* stm_regs_per_insn_1st.  */
1636     1,                  /* stm_regs_per_insn_subsequent.  */
1637     COSTS_N_INSNS (2),  /* storef.  */
1638     COSTS_N_INSNS (3),  /* stored.  */
1639     COSTS_N_INSNS (1)  /* store_unaligned.  */
1640   },
1641   {
1642     /* FP SFmode */
1643     {
1644       COSTS_N_INSNS (7),        /* div.  */
1645       COSTS_N_INSNS (2),        /* mult.  */
1646       COSTS_N_INSNS (5),        /* mult_addsub.  */
1647       COSTS_N_INSNS (3),        /* fma.  */
1648       COSTS_N_INSNS (1),        /* addsub.  */
1649       0,                        /* fpconst.  */
1650       0,                        /* neg.  */
1651       0,                        /* compare.  */
1652       0,                        /* widen.  */
1653       0,                        /* narrow.  */
1654       0,                        /* toint.  */
1655       0,                        /* fromint.  */
1656       0                         /* roundint.  */
1657     },
1658     /* FP DFmode */
1659     {
1660       COSTS_N_INSNS (15),       /* div.  */
1661       COSTS_N_INSNS (5),        /* mult.  */
1662       COSTS_N_INSNS (7),        /* mult_addsub.  */
1663       COSTS_N_INSNS (7),        /* fma.  */
1664       COSTS_N_INSNS (3),        /* addsub.  */
1665       0,                        /* fpconst.  */
1666       0,                        /* neg.  */
1667       0,                        /* compare.  */
1668       0,                        /* widen.  */
1669       0,                        /* narrow.  */
1670       0,                        /* toint.  */
1671       0,                        /* fromint.  */
1672       0                         /* roundint.  */
1673     }
1674   },
1675   /* Vector */
1676   {
1677     COSTS_N_INSNS (1)   /* alu.  */
1678   }
1679 };
1680
1681 #define ARM_FUSE_NOTHING        (0)
1682 #define ARM_FUSE_MOVW_MOVT      (1 << 0)
1683
1684 const struct tune_params arm_slowmul_tune =
1685 {
1686   arm_slowmul_rtx_costs,
1687   NULL,
1688   NULL,                                         /* Sched adj cost.  */
1689   3,                                            /* Constant limit.  */
1690   5,                                            /* Max cond insns.  */
1691   ARM_PREFETCH_NOT_BENEFICIAL,
1692   true,                                         /* Prefer constant pool.  */
1693   arm_default_branch_cost,
1694   false,                                        /* Prefer LDRD/STRD.  */
1695   {true, true},                                 /* Prefer non short circuit.  */
1696   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1697   false,                                        /* Prefer Neon for 64-bits bitops.  */
1698   false, false,                                 /* Prefer 32-bit encodings.  */
1699   false,                                        /* Prefer Neon for stringops.  */
1700   8,                                            /* Maximum insns to inline memset.  */
1701   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1702   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
1703   1                                             /* Issue rate.  */
1704 };
1705
1706 const struct tune_params arm_fastmul_tune =
1707 {
1708   arm_fastmul_rtx_costs,
1709   NULL,
1710   NULL,                                         /* Sched adj cost.  */
1711   1,                                            /* Constant limit.  */
1712   5,                                            /* Max cond insns.  */
1713   ARM_PREFETCH_NOT_BENEFICIAL,
1714   true,                                         /* Prefer constant pool.  */
1715   arm_default_branch_cost,
1716   false,                                        /* Prefer LDRD/STRD.  */
1717   {true, true},                                 /* Prefer non short circuit.  */
1718   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1719   false,                                        /* Prefer Neon for 64-bits bitops.  */
1720   false, false,                                 /* Prefer 32-bit encodings.  */
1721   false,                                        /* Prefer Neon for stringops.  */
1722   8,                                            /* Maximum insns to inline memset.  */
1723   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1724   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
1725   1                                             /* Issue rate.  */
1726 };
1727
1728 /* StrongARM has early execution of branches, so a sequence that is worth
1729    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1730
1731 const struct tune_params arm_strongarm_tune =
1732 {
1733   arm_fastmul_rtx_costs,
1734   NULL,
1735   NULL,                                         /* Sched adj cost.  */
1736   1,                                            /* Constant limit.  */
1737   3,                                            /* Max cond insns.  */
1738   ARM_PREFETCH_NOT_BENEFICIAL,
1739   true,                                         /* Prefer constant pool.  */
1740   arm_default_branch_cost,
1741   false,                                        /* Prefer LDRD/STRD.  */
1742   {true, true},                                 /* Prefer non short circuit.  */
1743   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1744   false,                                        /* Prefer Neon for 64-bits bitops.  */
1745   false, false,                                 /* Prefer 32-bit encodings.  */
1746   false,                                        /* Prefer Neon for stringops.  */
1747   8,                                            /* Maximum insns to inline memset.  */
1748   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1749   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
1750   1                                             /* Issue rate.  */
1751 };
1752
1753 const struct tune_params arm_xscale_tune =
1754 {
1755   arm_xscale_rtx_costs,
1756   NULL,
1757   xscale_sched_adjust_cost,
1758   2,                                            /* Constant limit.  */
1759   3,                                            /* Max cond insns.  */
1760   ARM_PREFETCH_NOT_BENEFICIAL,
1761   true,                                         /* Prefer constant pool.  */
1762   arm_default_branch_cost,
1763   false,                                        /* Prefer LDRD/STRD.  */
1764   {true, true},                                 /* Prefer non short circuit.  */
1765   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1766   false,                                        /* Prefer Neon for 64-bits bitops.  */
1767   false, false,                                 /* Prefer 32-bit encodings.  */
1768   false,                                        /* Prefer Neon for stringops.  */
1769   8,                                            /* Maximum insns to inline memset.  */
1770   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1771   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
1772   1                                             /* Issue rate.  */
1773 };
1774
1775 const struct tune_params arm_9e_tune =
1776 {
1777   arm_9e_rtx_costs,
1778   NULL,
1779   NULL,                                         /* Sched adj cost.  */
1780   1,                                            /* Constant limit.  */
1781   5,                                            /* Max cond insns.  */
1782   ARM_PREFETCH_NOT_BENEFICIAL,
1783   true,                                         /* Prefer constant pool.  */
1784   arm_default_branch_cost,
1785   false,                                        /* Prefer LDRD/STRD.  */
1786   {true, true},                                 /* Prefer non short circuit.  */
1787   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1788   false,                                        /* Prefer Neon for 64-bits bitops.  */
1789   false, false,                                 /* Prefer 32-bit encodings.  */
1790   false,                                        /* Prefer Neon for stringops.  */
1791   8,                                            /* Maximum insns to inline memset.  */
1792   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1793   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
1794   1                                             /* Issue rate.  */
1795 };
1796
1797 const struct tune_params arm_marvell_pj4_tune =
1798 {
1799   arm_9e_rtx_costs,
1800   NULL,
1801   NULL,                                         /* Sched adj cost.  */
1802   1,                                            /* Constant limit.  */
1803   5,                                            /* Max cond insns.  */
1804   ARM_PREFETCH_NOT_BENEFICIAL,
1805   true,                                         /* Prefer constant pool.  */
1806   arm_default_branch_cost,
1807   false,                                        /* Prefer LDRD/STRD.  */
1808   {true, true},                                 /* Prefer non short circuit.  */
1809   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1810   false,                                        /* Prefer Neon for 64-bits bitops.  */
1811   false, false,                                 /* Prefer 32-bit encodings.  */
1812   false,                                        /* Prefer Neon for stringops.  */
1813   8,                                            /* Maximum insns to inline memset.  */
1814   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1815   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
1816   2                                             /* Issue rate.  */
1817 };
1818
1819 const struct tune_params arm_v6t2_tune =
1820 {
1821   arm_9e_rtx_costs,
1822   NULL,
1823   NULL,                                         /* Sched adj cost.  */
1824   1,                                            /* Constant limit.  */
1825   5,                                            /* Max cond insns.  */
1826   ARM_PREFETCH_NOT_BENEFICIAL,
1827   false,                                        /* Prefer constant pool.  */
1828   arm_default_branch_cost,
1829   false,                                        /* Prefer LDRD/STRD.  */
1830   {true, true},                                 /* Prefer non short circuit.  */
1831   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1832   false,                                        /* Prefer Neon for 64-bits bitops.  */
1833   false, false,                                 /* Prefer 32-bit encodings.  */
1834   false,                                        /* Prefer Neon for stringops.  */
1835   8,                                            /* Maximum insns to inline memset.  */
1836   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1837   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
1838   1                                             /* Issue rate.  */
1839 };
1840
1841
1842 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1843 const struct tune_params arm_cortex_tune =
1844 {
1845   arm_9e_rtx_costs,
1846   &generic_extra_costs,
1847   NULL,                                         /* Sched adj cost.  */
1848   1,                                            /* Constant limit.  */
1849   5,                                            /* Max cond insns.  */
1850   ARM_PREFETCH_NOT_BENEFICIAL,
1851   false,                                        /* Prefer constant pool.  */
1852   arm_default_branch_cost,
1853   false,                                        /* Prefer LDRD/STRD.  */
1854   {true, true},                                 /* Prefer non short circuit.  */
1855   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1856   false,                                        /* Prefer Neon for 64-bits bitops.  */
1857   false, false,                                 /* Prefer 32-bit encodings.  */
1858   false,                                        /* Prefer Neon for stringops.  */
1859   8,                                            /* Maximum insns to inline memset.  */
1860   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1861   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
1862   2                                             /* Issue rate.  */
1863 };
1864
1865 const struct tune_params arm_cortex_a8_tune =
1866 {
1867   arm_9e_rtx_costs,
1868   &cortexa8_extra_costs,
1869   NULL,                                         /* Sched adj cost.  */
1870   1,                                            /* Constant limit.  */
1871   5,                                            /* Max cond insns.  */
1872   ARM_PREFETCH_NOT_BENEFICIAL,
1873   false,                                        /* Prefer constant pool.  */
1874   arm_default_branch_cost,
1875   false,                                        /* Prefer LDRD/STRD.  */
1876   {true, true},                                 /* Prefer non short circuit.  */
1877   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1878   false,                                        /* Prefer Neon for 64-bits bitops.  */
1879   false, false,                                 /* Prefer 32-bit encodings.  */
1880   true,                                         /* Prefer Neon for stringops.  */
1881   8,                                            /* Maximum insns to inline memset.  */
1882   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1883   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
1884    2                                            /* Issue rate.  */
1885 };
1886
1887 const struct tune_params arm_cortex_a7_tune =
1888 {
1889   arm_9e_rtx_costs,
1890   &cortexa7_extra_costs,
1891   NULL,
1892   1,                                            /* Constant limit.  */
1893   5,                                            /* Max cond insns.  */
1894   ARM_PREFETCH_NOT_BENEFICIAL,
1895   false,                                        /* Prefer constant pool.  */
1896   arm_default_branch_cost,
1897   false,                                        /* Prefer LDRD/STRD.  */
1898   {true, true},                                 /* Prefer non short circuit.  */
1899   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1900   false,                                        /* Prefer Neon for 64-bits bitops.  */
1901   false, false,                                 /* Prefer 32-bit encodings.  */
1902   true,                                         /* Prefer Neon for stringops.  */
1903   8,                                            /* Maximum insns to inline memset.  */
1904   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1905   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
1906   2                                             /* Issue rate.  */
1907 };
1908
1909 const struct tune_params arm_cortex_a15_tune =
1910 {
1911   arm_9e_rtx_costs,
1912   &cortexa15_extra_costs,
1913   NULL,                                         /* Sched adj cost.  */
1914   1,                                            /* Constant limit.  */
1915   2,                                            /* Max cond insns.  */
1916   ARM_PREFETCH_NOT_BENEFICIAL,
1917   false,                                        /* Prefer constant pool.  */
1918   arm_default_branch_cost,
1919   true,                                         /* Prefer LDRD/STRD.  */
1920   {true, true},                                 /* Prefer non short circuit.  */
1921   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1922   false,                                        /* Prefer Neon for 64-bits bitops.  */
1923   true, true,                                   /* Prefer 32-bit encodings.  */
1924   true,                                         /* Prefer Neon for stringops.  */
1925   8,                                            /* Maximum insns to inline memset.  */
1926   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1927   ARM_SCHED_AUTOPREF_FULL,                      /* Sched L2 autopref.  */
1928   3                                             /* Issue rate.  */
1929 };
1930
1931 const struct tune_params arm_cortex_a53_tune =
1932 {
1933   arm_9e_rtx_costs,
1934   &cortexa53_extra_costs,
1935   NULL,                                         /* Scheduler cost adjustment.  */
1936   1,                                            /* Constant limit.  */
1937   5,                                            /* Max cond insns.  */
1938   ARM_PREFETCH_NOT_BENEFICIAL,
1939   false,                                        /* Prefer constant pool.  */
1940   arm_default_branch_cost,
1941   false,                                        /* Prefer LDRD/STRD.  */
1942   {true, true},                                 /* Prefer non short circuit.  */
1943   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1944   false,                                        /* Prefer Neon for 64-bits bitops.  */
1945   false, false,                                 /* Prefer 32-bit encodings.  */
1946   true,                                         /* Prefer Neon for stringops.  */
1947   8,                                            /* Maximum insns to inline memset.  */
1948   ARM_FUSE_MOVW_MOVT,                           /* Fuseable pairs of instructions.  */
1949   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
1950   2                                             /* Issue rate.  */
1951 };
1952
1953 const struct tune_params arm_cortex_a57_tune =
1954 {
1955   arm_9e_rtx_costs,
1956   &cortexa57_extra_costs,
1957   NULL,                                         /* Scheduler cost adjustment.  */
1958   1,                                           /* Constant limit.  */
1959   2,                                           /* Max cond insns.  */
1960   ARM_PREFETCH_NOT_BENEFICIAL,
1961   false,                                       /* Prefer constant pool.  */
1962   arm_default_branch_cost,
1963   true,                                       /* Prefer LDRD/STRD.  */
1964   {true, true},                                /* Prefer non short circuit.  */
1965   &arm_default_vec_cost,                       /* Vectorizer costs.  */
1966   false,                                       /* Prefer Neon for 64-bits bitops.  */
1967   true, true,                                  /* Prefer 32-bit encodings.  */
1968   true,                                         /* Prefer Neon for stringops.  */
1969   8,                                            /* Maximum insns to inline memset.  */
1970   ARM_FUSE_MOVW_MOVT,                           /* Fuseable pairs of instructions.  */
1971   ARM_SCHED_AUTOPREF_FULL,                      /* Sched L2 autopref.  */
1972   3                                             /* Issue rate.  */
1973 };
1974
1975 const struct tune_params arm_xgene1_tune =
1976 {
1977   arm_9e_rtx_costs,
1978   &xgene1_extra_costs,
1979   NULL,                                        /* Scheduler cost adjustment.  */
1980   1,                                           /* Constant limit.  */
1981   2,                                           /* Max cond insns.  */
1982   ARM_PREFETCH_NOT_BENEFICIAL,
1983   false,                                       /* Prefer constant pool.  */
1984   arm_default_branch_cost,
1985   true,                                        /* Prefer LDRD/STRD.  */
1986   {true, true},                                /* Prefer non short circuit.  */
1987   &arm_default_vec_cost,                       /* Vectorizer costs.  */
1988   false,                                       /* Prefer Neon for 64-bits bitops.  */
1989   true, true,                                  /* Prefer 32-bit encodings.  */
1990   false,                                       /* Prefer Neon for stringops.  */
1991   32,                                          /* Maximum insns to inline memset.  */
1992   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
1993   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
1994   4                                             /* Issue rate.  */
1995 };
1996
1997 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1998    less appealing.  Set max_insns_skipped to a low value.  */
1999
2000 const struct tune_params arm_cortex_a5_tune =
2001 {
2002   arm_9e_rtx_costs,
2003   &cortexa5_extra_costs,
2004   NULL,                                         /* Sched adj cost.  */
2005   1,                                            /* Constant limit.  */
2006   1,                                            /* Max cond insns.  */
2007   ARM_PREFETCH_NOT_BENEFICIAL,
2008   false,                                        /* Prefer constant pool.  */
2009   arm_cortex_a5_branch_cost,
2010   false,                                        /* Prefer LDRD/STRD.  */
2011   {false, false},                               /* Prefer non short circuit.  */
2012   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2013   false,                                        /* Prefer Neon for 64-bits bitops.  */
2014   false, false,                                 /* Prefer 32-bit encodings.  */
2015   true,                                         /* Prefer Neon for stringops.  */
2016   8,                                            /* Maximum insns to inline memset.  */
2017   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
2018   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
2019   2                                             /* Issue rate.  */
2020 };
2021
2022 const struct tune_params arm_cortex_a9_tune =
2023 {
2024   arm_9e_rtx_costs,
2025   &cortexa9_extra_costs,
2026   cortex_a9_sched_adjust_cost,
2027   1,                                            /* Constant limit.  */
2028   5,                                            /* Max cond insns.  */
2029   ARM_PREFETCH_BENEFICIAL(4,32,32),
2030   false,                                        /* Prefer constant pool.  */
2031   arm_default_branch_cost,
2032   false,                                        /* Prefer LDRD/STRD.  */
2033   {true, true},                                 /* Prefer non short circuit.  */
2034   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2035   false,                                        /* Prefer Neon for 64-bits bitops.  */
2036   false, false,                                 /* Prefer 32-bit encodings.  */
2037   false,                                        /* Prefer Neon for stringops.  */
2038   8,                                            /* Maximum insns to inline memset.  */
2039   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
2040   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
2041   2                                             /* Issue rate.  */
2042 };
2043
2044 const struct tune_params arm_cortex_a12_tune =
2045 {
2046   arm_9e_rtx_costs,
2047   &cortexa12_extra_costs,
2048   NULL,                                         /* Sched adj cost.  */
2049   1,                                            /* Constant limit.  */
2050   2,                                            /* Max cond insns.  */
2051   ARM_PREFETCH_NOT_BENEFICIAL,
2052   false,                                        /* Prefer constant pool.  */
2053   arm_default_branch_cost,
2054   true,                                         /* Prefer LDRD/STRD.  */
2055   {true, true},                                 /* Prefer non short circuit.  */
2056   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2057   false,                                        /* Prefer Neon for 64-bits bitops.  */
2058   true, true,                                   /* Prefer 32-bit encodings.  */
2059   true,                                         /* Prefer Neon for stringops.  */
2060   8,                                            /* Maximum insns to inline memset.  */
2061   ARM_FUSE_MOVW_MOVT,                           /* Fuseable pairs of instructions.  */
2062   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
2063   2                                             /* Issue rate.  */
2064 };
2065
2066 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2067    cycle to execute each.  An LDR from the constant pool also takes two cycles
2068    to execute, but mildly increases pipelining opportunity (consecutive
2069    loads/stores can be pipelined together, saving one cycle), and may also
2070    improve icache utilisation.  Hence we prefer the constant pool for such
2071    processors.  */
2072
2073 const struct tune_params arm_v7m_tune =
2074 {
2075   arm_9e_rtx_costs,
2076   &v7m_extra_costs,
2077   NULL,                                         /* Sched adj cost.  */
2078   1,                                            /* Constant limit.  */
2079   2,                                            /* Max cond insns.  */
2080   ARM_PREFETCH_NOT_BENEFICIAL,
2081   true,                                         /* Prefer constant pool.  */
2082   arm_cortex_m_branch_cost,
2083   false,                                        /* Prefer LDRD/STRD.  */
2084   {false, false},                               /* Prefer non short circuit.  */
2085   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2086   false,                                        /* Prefer Neon for 64-bits bitops.  */
2087   false, false,                                 /* Prefer 32-bit encodings.  */
2088   false,                                        /* Prefer Neon for stringops.  */
2089   8,                                            /* Maximum insns to inline memset.  */
2090   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
2091   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
2092   1                                             /* Issue rate.  */
2093 };
2094
2095 /* Cortex-M7 tuning.  */
2096
2097 const struct tune_params arm_cortex_m7_tune =
2098 {
2099   arm_9e_rtx_costs,
2100   &v7m_extra_costs,
2101   NULL,                                         /* Sched adj cost.  */
2102   0,                                            /* Constant limit.  */
2103   1,                                            /* Max cond insns.  */
2104   ARM_PREFETCH_NOT_BENEFICIAL,
2105   true,                                         /* Prefer constant pool.  */
2106   arm_cortex_m7_branch_cost,
2107   false,                                        /* Prefer LDRD/STRD.  */
2108   {true, true},                                 /* Prefer non short circuit.  */
2109   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2110   false,                                        /* Prefer Neon for 64-bits bitops.  */
2111   false, false,                                 /* Prefer 32-bit encodings.  */
2112   false,                                        /* Prefer Neon for stringops.  */
2113   8,                                            /* Maximum insns to inline memset.  */
2114   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
2115   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
2116   2                                             /* Issue rate.  */
2117 };
2118
2119 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2120    arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
2121 const struct tune_params arm_v6m_tune =
2122 {
2123   arm_9e_rtx_costs,
2124   NULL,
2125   NULL,                                         /* Sched adj cost.  */
2126   1,                                            /* Constant limit.  */
2127   5,                                            /* Max cond insns.  */
2128   ARM_PREFETCH_NOT_BENEFICIAL,
2129   false,                                        /* Prefer constant pool.  */
2130   arm_default_branch_cost,
2131   false,                                        /* Prefer LDRD/STRD.  */
2132   {false, false},                               /* Prefer non short circuit.  */
2133   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2134   false,                                        /* Prefer Neon for 64-bits bitops.  */
2135   false, false,                                 /* Prefer 32-bit encodings.  */
2136   false,                                        /* Prefer Neon for stringops.  */
2137   8,                                            /* Maximum insns to inline memset.  */
2138   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
2139   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
2140   1                                             /* Issue rate.  */
2141 };
2142
2143 const struct tune_params arm_fa726te_tune =
2144 {
2145   arm_9e_rtx_costs,
2146   NULL,
2147   fa726te_sched_adjust_cost,
2148   1,                                            /* Constant limit.  */
2149   5,                                            /* Max cond insns.  */
2150   ARM_PREFETCH_NOT_BENEFICIAL,
2151   true,                                         /* Prefer constant pool.  */
2152   arm_default_branch_cost,
2153   false,                                        /* Prefer LDRD/STRD.  */
2154   {true, true},                                 /* Prefer non short circuit.  */
2155   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2156   false,                                        /* Prefer Neon for 64-bits bitops.  */
2157   false, false,                                 /* Prefer 32-bit encodings.  */
2158   false,                                        /* Prefer Neon for stringops.  */
2159   8,                                            /* Maximum insns to inline memset.  */
2160   ARM_FUSE_NOTHING,                             /* Fuseable pairs of instructions.  */
2161   ARM_SCHED_AUTOPREF_OFF,                       /* Sched L2 autopref.  */
2162   2                                             /* Issue rate.  */
2163 };
2164
2165
2166 /* Not all of these give usefully different compilation alternatives,
2167    but there is no simple way of generalizing them.  */
2168 static const struct processors all_cores[] =
2169 {
2170   /* ARM Cores */
2171 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2172   {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH,          \
2173     FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2174 #include "arm-cores.def"
2175 #undef ARM_CORE
2176   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2177 };
2178
2179 static const struct processors all_architectures[] =
2180 {
2181   /* ARM Architectures */
2182   /* We don't specify tuning costs here as it will be figured out
2183      from the core.  */
2184
2185 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2186   {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2187 #include "arm-arches.def"
2188 #undef ARM_ARCH
2189   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2190 };
2191
2192
2193 /* These are populated as commandline arguments are processed, or NULL
2194    if not specified.  */
2195 static const struct processors *arm_selected_arch;
2196 static const struct processors *arm_selected_cpu;
2197 static const struct processors *arm_selected_tune;
2198
2199 /* The name of the preprocessor macro to define for this architecture.  */
2200
2201 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2202
2203 /* Available values for -mfpu=.  */
2204
2205 static const struct arm_fpu_desc all_fpus[] =
2206 {
2207 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2208   { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2209 #include "arm-fpus.def"
2210 #undef ARM_FPU
2211 };
2212
2213
2214 /* Supported TLS relocations.  */
2215
2216 enum tls_reloc {
2217   TLS_GD32,
2218   TLS_LDM32,
2219   TLS_LDO32,
2220   TLS_IE32,
2221   TLS_LE32,
2222   TLS_DESCSEQ   /* GNU scheme */
2223 };
2224
2225 /* The maximum number of insns to be used when loading a constant.  */
2226 inline static int
2227 arm_constant_limit (bool size_p)
2228 {
2229   return size_p ? 1 : current_tune->constant_limit;
2230 }
2231
2232 /* Emit an insn that's a simple single-set.  Both the operands must be known
2233    to be valid.  */
2234 inline static rtx_insn *
2235 emit_set_insn (rtx x, rtx y)
2236 {
2237   return emit_insn (gen_rtx_SET (x, y));
2238 }
2239
2240 /* Return the number of bits set in VALUE.  */
2241 static unsigned
2242 bit_count (unsigned long value)
2243 {
2244   unsigned long count = 0;
2245
2246   while (value)
2247     {
2248       count++;
2249       value &= value - 1;  /* Clear the least-significant set bit.  */
2250     }
2251
2252   return count;
2253 }
2254
2255 typedef struct
2256 {
2257   machine_mode mode;
2258   const char *name;
2259 } arm_fixed_mode_set;
2260
2261 /* A small helper for setting fixed-point library libfuncs.  */
2262
2263 static void
2264 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2265                              const char *funcname, const char *modename,
2266                              int num_suffix)
2267 {
2268   char buffer[50];
2269
2270   if (num_suffix == 0)
2271     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2272   else
2273     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2274
2275   set_optab_libfunc (optable, mode, buffer);
2276 }
2277
2278 static void
2279 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2280                             machine_mode from, const char *funcname,
2281                             const char *toname, const char *fromname)
2282 {
2283   char buffer[50];
2284   const char *maybe_suffix_2 = "";
2285
2286   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2287   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2288       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2289       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2290     maybe_suffix_2 = "2";
2291
2292   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2293            maybe_suffix_2);
2294
2295   set_conv_libfunc (optable, to, from, buffer);
2296 }
2297
2298 /* Set up library functions unique to ARM.  */
2299
2300 static void
2301 arm_init_libfuncs (void)
2302 {
2303   /* For Linux, we have access to kernel support for atomic operations.  */
2304   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2305     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2306
2307   /* There are no special library functions unless we are using the
2308      ARM BPABI.  */
2309   if (!TARGET_BPABI)
2310     return;
2311
2312   /* The functions below are described in Section 4 of the "Run-Time
2313      ABI for the ARM architecture", Version 1.0.  */
2314
2315   /* Double-precision floating-point arithmetic.  Table 2.  */
2316   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2317   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2318   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2319   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2320   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2321
2322   /* Double-precision comparisons.  Table 3.  */
2323   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2324   set_optab_libfunc (ne_optab, DFmode, NULL);
2325   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2326   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2327   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2328   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2329   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2330
2331   /* Single-precision floating-point arithmetic.  Table 4.  */
2332   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2333   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2334   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2335   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2336   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2337
2338   /* Single-precision comparisons.  Table 5.  */
2339   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2340   set_optab_libfunc (ne_optab, SFmode, NULL);
2341   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2342   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2343   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2344   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2345   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2346
2347   /* Floating-point to integer conversions.  Table 6.  */
2348   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2349   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2350   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2351   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2352   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2353   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2354   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2355   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2356
2357   /* Conversions between floating types.  Table 7.  */
2358   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2359   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2360
2361   /* Integer to floating-point conversions.  Table 8.  */
2362   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2363   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2364   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2365   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2366   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2367   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2368   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2369   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2370
2371   /* Long long.  Table 9.  */
2372   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2373   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2374   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2375   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2376   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2377   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2378   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2379   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2380
2381   /* Integer (32/32->32) division.  \S 4.3.1.  */
2382   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2383   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2384
2385   /* The divmod functions are designed so that they can be used for
2386      plain division, even though they return both the quotient and the
2387      remainder.  The quotient is returned in the usual location (i.e.,
2388      r0 for SImode, {r0, r1} for DImode), just as would be expected
2389      for an ordinary division routine.  Because the AAPCS calling
2390      conventions specify that all of { r0, r1, r2, r3 } are
2391      callee-saved registers, there is no need to tell the compiler
2392      explicitly that those registers are clobbered by these
2393      routines.  */
2394   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2395   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2396
2397   /* For SImode division the ABI provides div-without-mod routines,
2398      which are faster.  */
2399   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2400   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2401
2402   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2403      divmod libcalls instead.  */
2404   set_optab_libfunc (smod_optab, DImode, NULL);
2405   set_optab_libfunc (umod_optab, DImode, NULL);
2406   set_optab_libfunc (smod_optab, SImode, NULL);
2407   set_optab_libfunc (umod_optab, SImode, NULL);
2408
2409   /* Half-precision float operations.  The compiler handles all operations
2410      with NULL libfuncs by converting the SFmode.  */
2411   switch (arm_fp16_format)
2412     {
2413     case ARM_FP16_FORMAT_IEEE:
2414     case ARM_FP16_FORMAT_ALTERNATIVE:
2415
2416       /* Conversions.  */
2417       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2418                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2419                          ? "__gnu_f2h_ieee"
2420                          : "__gnu_f2h_alternative"));
2421       set_conv_libfunc (sext_optab, SFmode, HFmode,
2422                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2423                          ? "__gnu_h2f_ieee"
2424                          : "__gnu_h2f_alternative"));
2425
2426       /* Arithmetic.  */
2427       set_optab_libfunc (add_optab, HFmode, NULL);
2428       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2429       set_optab_libfunc (smul_optab, HFmode, NULL);
2430       set_optab_libfunc (neg_optab, HFmode, NULL);
2431       set_optab_libfunc (sub_optab, HFmode, NULL);
2432
2433       /* Comparisons.  */
2434       set_optab_libfunc (eq_optab, HFmode, NULL);
2435       set_optab_libfunc (ne_optab, HFmode, NULL);
2436       set_optab_libfunc (lt_optab, HFmode, NULL);
2437       set_optab_libfunc (le_optab, HFmode, NULL);
2438       set_optab_libfunc (ge_optab, HFmode, NULL);
2439       set_optab_libfunc (gt_optab, HFmode, NULL);
2440       set_optab_libfunc (unord_optab, HFmode, NULL);
2441       break;
2442
2443     default:
2444       break;
2445     }
2446
2447   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2448   {
2449     const arm_fixed_mode_set fixed_arith_modes[] =
2450       {
2451         { QQmode, "qq" },
2452         { UQQmode, "uqq" },
2453         { HQmode, "hq" },
2454         { UHQmode, "uhq" },
2455         { SQmode, "sq" },
2456         { USQmode, "usq" },
2457         { DQmode, "dq" },
2458         { UDQmode, "udq" },
2459         { TQmode, "tq" },
2460         { UTQmode, "utq" },
2461         { HAmode, "ha" },
2462         { UHAmode, "uha" },
2463         { SAmode, "sa" },
2464         { USAmode, "usa" },
2465         { DAmode, "da" },
2466         { UDAmode, "uda" },
2467         { TAmode, "ta" },
2468         { UTAmode, "uta" }
2469       };
2470     const arm_fixed_mode_set fixed_conv_modes[] =
2471       {
2472         { QQmode, "qq" },
2473         { UQQmode, "uqq" },
2474         { HQmode, "hq" },
2475         { UHQmode, "uhq" },
2476         { SQmode, "sq" },
2477         { USQmode, "usq" },
2478         { DQmode, "dq" },
2479         { UDQmode, "udq" },
2480         { TQmode, "tq" },
2481         { UTQmode, "utq" },
2482         { HAmode, "ha" },
2483         { UHAmode, "uha" },
2484         { SAmode, "sa" },
2485         { USAmode, "usa" },
2486         { DAmode, "da" },
2487         { UDAmode, "uda" },
2488         { TAmode, "ta" },
2489         { UTAmode, "uta" },
2490         { QImode, "qi" },
2491         { HImode, "hi" },
2492         { SImode, "si" },
2493         { DImode, "di" },
2494         { TImode, "ti" },
2495         { SFmode, "sf" },
2496         { DFmode, "df" }
2497       };
2498     unsigned int i, j;
2499
2500     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2501       {
2502         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2503                                      "add", fixed_arith_modes[i].name, 3);
2504         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2505                                      "ssadd", fixed_arith_modes[i].name, 3);
2506         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2507                                      "usadd", fixed_arith_modes[i].name, 3);
2508         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2509                                      "sub", fixed_arith_modes[i].name, 3);
2510         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2511                                      "sssub", fixed_arith_modes[i].name, 3);
2512         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2513                                      "ussub", fixed_arith_modes[i].name, 3);
2514         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2515                                      "mul", fixed_arith_modes[i].name, 3);
2516         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2517                                      "ssmul", fixed_arith_modes[i].name, 3);
2518         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2519                                      "usmul", fixed_arith_modes[i].name, 3);
2520         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2521                                      "div", fixed_arith_modes[i].name, 3);
2522         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2523                                      "udiv", fixed_arith_modes[i].name, 3);
2524         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2525                                      "ssdiv", fixed_arith_modes[i].name, 3);
2526         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2527                                      "usdiv", fixed_arith_modes[i].name, 3);
2528         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2529                                      "neg", fixed_arith_modes[i].name, 2);
2530         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2531                                      "ssneg", fixed_arith_modes[i].name, 2);
2532         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2533                                      "usneg", fixed_arith_modes[i].name, 2);
2534         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2535                                      "ashl", fixed_arith_modes[i].name, 3);
2536         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2537                                      "ashr", fixed_arith_modes[i].name, 3);
2538         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2539                                      "lshr", fixed_arith_modes[i].name, 3);
2540         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2541                                      "ssashl", fixed_arith_modes[i].name, 3);
2542         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2543                                      "usashl", fixed_arith_modes[i].name, 3);
2544         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2545                                      "cmp", fixed_arith_modes[i].name, 2);
2546       }
2547
2548     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2549       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2550         {
2551           if (i == j
2552               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2553                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2554             continue;
2555
2556           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2557                                       fixed_conv_modes[j].mode, "fract",
2558                                       fixed_conv_modes[i].name,
2559                                       fixed_conv_modes[j].name);
2560           arm_set_fixed_conv_libfunc (satfract_optab,
2561                                       fixed_conv_modes[i].mode,
2562                                       fixed_conv_modes[j].mode, "satfract",
2563                                       fixed_conv_modes[i].name,
2564                                       fixed_conv_modes[j].name);
2565           arm_set_fixed_conv_libfunc (fractuns_optab,
2566                                       fixed_conv_modes[i].mode,
2567                                       fixed_conv_modes[j].mode, "fractuns",
2568                                       fixed_conv_modes[i].name,
2569                                       fixed_conv_modes[j].name);
2570           arm_set_fixed_conv_libfunc (satfractuns_optab,
2571                                       fixed_conv_modes[i].mode,
2572                                       fixed_conv_modes[j].mode, "satfractuns",
2573                                       fixed_conv_modes[i].name,
2574                                       fixed_conv_modes[j].name);
2575         }
2576   }
2577
2578   if (TARGET_AAPCS_BASED)
2579     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2580 }
2581
2582 /* On AAPCS systems, this is the "struct __va_list".  */
2583 static GTY(()) tree va_list_type;
2584
2585 /* Return the type to use as __builtin_va_list.  */
2586 static tree
2587 arm_build_builtin_va_list (void)
2588 {
2589   tree va_list_name;
2590   tree ap_field;
2591
2592   if (!TARGET_AAPCS_BASED)
2593     return std_build_builtin_va_list ();
2594
2595   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2596      defined as:
2597
2598        struct __va_list
2599        {
2600          void *__ap;
2601        };
2602
2603      The C Library ABI further reinforces this definition in \S
2604      4.1.
2605
2606      We must follow this definition exactly.  The structure tag
2607      name is visible in C++ mangled names, and thus forms a part
2608      of the ABI.  The field name may be used by people who
2609      #include <stdarg.h>.  */
2610   /* Create the type.  */
2611   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2612   /* Give it the required name.  */
2613   va_list_name = build_decl (BUILTINS_LOCATION,
2614                              TYPE_DECL,
2615                              get_identifier ("__va_list"),
2616                              va_list_type);
2617   DECL_ARTIFICIAL (va_list_name) = 1;
2618   TYPE_NAME (va_list_type) = va_list_name;
2619   TYPE_STUB_DECL (va_list_type) = va_list_name;
2620   /* Create the __ap field.  */
2621   ap_field = build_decl (BUILTINS_LOCATION,
2622                          FIELD_DECL,
2623                          get_identifier ("__ap"),
2624                          ptr_type_node);
2625   DECL_ARTIFICIAL (ap_field) = 1;
2626   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2627   TYPE_FIELDS (va_list_type) = ap_field;
2628   /* Compute its layout.  */
2629   layout_type (va_list_type);
2630
2631   return va_list_type;
2632 }
2633
2634 /* Return an expression of type "void *" pointing to the next
2635    available argument in a variable-argument list.  VALIST is the
2636    user-level va_list object, of type __builtin_va_list.  */
2637 static tree
2638 arm_extract_valist_ptr (tree valist)
2639 {
2640   if (TREE_TYPE (valist) == error_mark_node)
2641     return error_mark_node;
2642
2643   /* On an AAPCS target, the pointer is stored within "struct
2644      va_list".  */
2645   if (TARGET_AAPCS_BASED)
2646     {
2647       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2648       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2649                        valist, ap_field, NULL_TREE);
2650     }
2651
2652   return valist;
2653 }
2654
2655 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2656 static void
2657 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2658 {
2659   valist = arm_extract_valist_ptr (valist);
2660   std_expand_builtin_va_start (valist, nextarg);
2661 }
2662
2663 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2664 static tree
2665 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2666                           gimple_seq *post_p)
2667 {
2668   valist = arm_extract_valist_ptr (valist);
2669   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2670 }
2671
2672 /* Fix up any incompatible options that the user has specified.  */
2673 static void
2674 arm_option_override (void)
2675 {
2676   arm_selected_arch = NULL;
2677   arm_selected_cpu = NULL;
2678   arm_selected_tune = NULL;
2679
2680   if (global_options_set.x_arm_arch_option)
2681     arm_selected_arch = &all_architectures[arm_arch_option];
2682
2683   if (global_options_set.x_arm_cpu_option)
2684     {
2685       arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2686       arm_selected_tune = &all_cores[(int) arm_cpu_option];
2687     }
2688
2689   if (global_options_set.x_arm_tune_option)
2690     arm_selected_tune = &all_cores[(int) arm_tune_option];
2691
2692 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2693   SUBTARGET_OVERRIDE_OPTIONS;
2694 #endif
2695
2696   if (arm_selected_arch)
2697     {
2698       if (arm_selected_cpu)
2699         {
2700           /* Check for conflict between mcpu and march.  */
2701           if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2702             {
2703               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2704                        arm_selected_cpu->name, arm_selected_arch->name);
2705               /* -march wins for code generation.
2706                  -mcpu wins for default tuning.  */
2707               if (!arm_selected_tune)
2708                 arm_selected_tune = arm_selected_cpu;
2709
2710               arm_selected_cpu = arm_selected_arch;
2711             }
2712           else
2713             /* -mcpu wins.  */
2714             arm_selected_arch = NULL;
2715         }
2716       else
2717         /* Pick a CPU based on the architecture.  */
2718         arm_selected_cpu = arm_selected_arch;
2719     }
2720
2721   /* If the user did not specify a processor, choose one for them.  */
2722   if (!arm_selected_cpu)
2723     {
2724       const struct processors * sel;
2725       unsigned int        sought;
2726
2727       arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2728       if (!arm_selected_cpu->name)
2729         {
2730 #ifdef SUBTARGET_CPU_DEFAULT
2731           /* Use the subtarget default CPU if none was specified by
2732              configure.  */
2733           arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2734 #endif
2735           /* Default to ARM6.  */
2736           if (!arm_selected_cpu->name)
2737             arm_selected_cpu = &all_cores[arm6];
2738         }
2739
2740       sel = arm_selected_cpu;
2741       insn_flags = sel->flags;
2742
2743       /* Now check to see if the user has specified some command line
2744          switch that require certain abilities from the cpu.  */
2745       sought = 0;
2746
2747       if (TARGET_INTERWORK || TARGET_THUMB)
2748         {
2749           sought |= (FL_THUMB | FL_MODE32);
2750
2751           /* There are no ARM processors that support both APCS-26 and
2752              interworking.  Therefore we force FL_MODE26 to be removed
2753              from insn_flags here (if it was set), so that the search
2754              below will always be able to find a compatible processor.  */
2755           insn_flags &= ~FL_MODE26;
2756         }
2757
2758       if (sought != 0 && ((sought & insn_flags) != sought))
2759         {
2760           /* Try to locate a CPU type that supports all of the abilities
2761              of the default CPU, plus the extra abilities requested by
2762              the user.  */
2763           for (sel = all_cores; sel->name != NULL; sel++)
2764             if ((sel->flags & sought) == (sought | insn_flags))
2765               break;
2766
2767           if (sel->name == NULL)
2768             {
2769               unsigned current_bit_count = 0;
2770               const struct processors * best_fit = NULL;
2771
2772               /* Ideally we would like to issue an error message here
2773                  saying that it was not possible to find a CPU compatible
2774                  with the default CPU, but which also supports the command
2775                  line options specified by the programmer, and so they
2776                  ought to use the -mcpu=<name> command line option to
2777                  override the default CPU type.
2778
2779                  If we cannot find a cpu that has both the
2780                  characteristics of the default cpu and the given
2781                  command line options we scan the array again looking
2782                  for a best match.  */
2783               for (sel = all_cores; sel->name != NULL; sel++)
2784                 if ((sel->flags & sought) == sought)
2785                   {
2786                     unsigned count;
2787
2788                     count = bit_count (sel->flags & insn_flags);
2789
2790                     if (count >= current_bit_count)
2791                       {
2792                         best_fit = sel;
2793                         current_bit_count = count;
2794                       }
2795                   }
2796
2797               gcc_assert (best_fit);
2798               sel = best_fit;
2799             }
2800
2801           arm_selected_cpu = sel;
2802         }
2803     }
2804
2805   gcc_assert (arm_selected_cpu);
2806   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
2807   if (!arm_selected_tune)
2808     arm_selected_tune = &all_cores[arm_selected_cpu->core];
2809
2810   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2811   insn_flags = arm_selected_cpu->flags;
2812   arm_base_arch = arm_selected_cpu->base_arch;
2813
2814   arm_tune = arm_selected_tune->core;
2815   tune_flags = arm_selected_tune->flags;
2816   current_tune = arm_selected_tune->tune;
2817
2818   /* Make sure that the processor choice does not conflict with any of the
2819      other command line choices.  */
2820   if (TARGET_ARM && !(insn_flags & FL_NOTM))
2821     error ("target CPU does not support ARM mode");
2822
2823   /* BPABI targets use linker tricks to allow interworking on cores
2824      without thumb support.  */
2825   if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2826     {
2827       warning (0, "target CPU does not support interworking" );
2828       target_flags &= ~MASK_INTERWORK;
2829     }
2830
2831   if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2832     {
2833       warning (0, "target CPU does not support THUMB instructions");
2834       target_flags &= ~MASK_THUMB;
2835     }
2836
2837   if (TARGET_APCS_FRAME && TARGET_THUMB)
2838     {
2839       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2840       target_flags &= ~MASK_APCS_FRAME;
2841     }
2842
2843   /* Callee super interworking implies thumb interworking.  Adding
2844      this to the flags here simplifies the logic elsewhere.  */
2845   if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2846     target_flags |= MASK_INTERWORK;
2847
2848   /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2849      from here where no function is being compiled currently.  */
2850   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2851     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2852
2853   if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2854     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2855
2856   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2857     {
2858       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2859       target_flags |= MASK_APCS_FRAME;
2860     }
2861
2862   if (TARGET_POKE_FUNCTION_NAME)
2863     target_flags |= MASK_APCS_FRAME;
2864
2865   if (TARGET_APCS_REENT && flag_pic)
2866     error ("-fpic and -mapcs-reent are incompatible");
2867
2868   if (TARGET_APCS_REENT)
2869     warning (0, "APCS reentrant code not supported.  Ignored");
2870
2871   /* If this target is normally configured to use APCS frames, warn if they
2872      are turned off and debugging is turned on.  */
2873   if (TARGET_ARM
2874       && write_symbols != NO_DEBUG
2875       && !TARGET_APCS_FRAME
2876       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2877     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2878
2879   if (TARGET_APCS_FLOAT)
2880     warning (0, "passing floating point arguments in fp regs not yet supported");
2881
2882   /* Initialize boolean versions of the flags, for use in the arm.md file.  */
2883   arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2884   arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2885   arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2886   arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2887   arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2888   arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2889   arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2890   arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2891   arm_arch6m = arm_arch6 && !arm_arch_notm;
2892   arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2893   arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2894   arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2895   arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2896   arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2897
2898   arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2899   arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2900   thumb_code = TARGET_ARM == 0;
2901   thumb1_code = TARGET_THUMB1 != 0;
2902   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2903   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2904   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2905   arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2906   arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2907   arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2908   arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0;
2909   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2910   arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2911   arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2912   if (arm_restrict_it == 2)
2913     arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2914
2915   if (!TARGET_THUMB2)
2916     arm_restrict_it = 0;
2917
2918   /* If we are not using the default (ARM mode) section anchor offset
2919      ranges, then set the correct ranges now.  */
2920   if (TARGET_THUMB1)
2921     {
2922       /* Thumb-1 LDR instructions cannot have negative offsets.
2923          Permissible positive offset ranges are 5-bit (for byte loads),
2924          6-bit (for halfword loads), or 7-bit (for word loads).
2925          Empirical results suggest a 7-bit anchor range gives the best
2926          overall code size.  */
2927       targetm.min_anchor_offset = 0;
2928       targetm.max_anchor_offset = 127;
2929     }
2930   else if (TARGET_THUMB2)
2931     {
2932       /* The minimum is set such that the total size of the block
2933          for a particular anchor is 248 + 1 + 4095 bytes, which is
2934          divisible by eight, ensuring natural spacing of anchors.  */
2935       targetm.min_anchor_offset = -248;
2936       targetm.max_anchor_offset = 4095;
2937     }
2938
2939   /* V5 code we generate is completely interworking capable, so we turn off
2940      TARGET_INTERWORK here to avoid many tests later on.  */
2941
2942   /* XXX However, we must pass the right pre-processor defines to CPP
2943      or GLD can get confused.  This is a hack.  */
2944   if (TARGET_INTERWORK)
2945     arm_cpp_interwork = 1;
2946
2947   if (arm_arch5)
2948     target_flags &= ~MASK_INTERWORK;
2949
2950   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2951     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2952
2953   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2954     error ("iwmmxt abi requires an iwmmxt capable cpu");
2955
2956   if (!global_options_set.x_arm_fpu_index)
2957     {
2958       const char *target_fpu_name;
2959       bool ok;
2960
2961 #ifdef FPUTYPE_DEFAULT
2962       target_fpu_name = FPUTYPE_DEFAULT;
2963 #else
2964       target_fpu_name = "vfp";
2965 #endif
2966
2967       ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2968                                   CL_TARGET);
2969       gcc_assert (ok);
2970     }
2971
2972   arm_fpu_desc = &all_fpus[arm_fpu_index];
2973
2974   switch (arm_fpu_desc->model)
2975     {
2976     case ARM_FP_MODEL_VFP:
2977       arm_fpu_attr = FPU_VFP;
2978       break;
2979
2980     default:
2981       gcc_unreachable();
2982     }
2983
2984   if (TARGET_AAPCS_BASED)
2985     {
2986       if (TARGET_CALLER_INTERWORKING)
2987         error ("AAPCS does not support -mcaller-super-interworking");
2988       else
2989         if (TARGET_CALLEE_INTERWORKING)
2990           error ("AAPCS does not support -mcallee-super-interworking");
2991     }
2992
2993   /* iWMMXt and NEON are incompatible.  */
2994   if (TARGET_IWMMXT && TARGET_NEON)
2995     error ("iWMMXt and NEON are incompatible");
2996
2997   /* iWMMXt unsupported under Thumb mode.  */
2998   if (TARGET_THUMB && TARGET_IWMMXT)
2999     error ("iWMMXt unsupported under Thumb mode");
3000
3001   /* __fp16 support currently assumes the core has ldrh.  */
3002   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3003     sorry ("__fp16 and no ldrh");
3004
3005   /* If soft-float is specified then don't use FPU.  */
3006   if (TARGET_SOFT_FLOAT)
3007     arm_fpu_attr = FPU_NONE;
3008
3009   if (TARGET_AAPCS_BASED)
3010     {
3011       if (arm_abi == ARM_ABI_IWMMXT)
3012         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3013       else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3014                && TARGET_HARD_FLOAT
3015                && TARGET_VFP)
3016         arm_pcs_default = ARM_PCS_AAPCS_VFP;
3017       else
3018         arm_pcs_default = ARM_PCS_AAPCS;
3019     }
3020   else
3021     {
3022       if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
3023         sorry ("-mfloat-abi=hard and VFP");
3024
3025       if (arm_abi == ARM_ABI_APCS)
3026         arm_pcs_default = ARM_PCS_APCS;
3027       else
3028         arm_pcs_default = ARM_PCS_ATPCS;
3029     }
3030
3031   /* For arm2/3 there is no need to do any scheduling if we are doing
3032      software floating-point.  */
3033   if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
3034     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3035
3036   /* Use the cp15 method if it is available.  */
3037   if (target_thread_pointer == TP_AUTO)
3038     {
3039       if (arm_arch6k && !TARGET_THUMB1)
3040         target_thread_pointer = TP_CP15;
3041       else
3042         target_thread_pointer = TP_SOFT;
3043     }
3044
3045   if (TARGET_HARD_TP && TARGET_THUMB1)
3046     error ("can not use -mtp=cp15 with 16-bit Thumb");
3047
3048   /* Override the default structure alignment for AAPCS ABI.  */
3049   if (!global_options_set.x_arm_structure_size_boundary)
3050     {
3051       if (TARGET_AAPCS_BASED)
3052         arm_structure_size_boundary = 8;
3053     }
3054   else
3055     {
3056       if (arm_structure_size_boundary != 8
3057           && arm_structure_size_boundary != 32
3058           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3059         {
3060           if (ARM_DOUBLEWORD_ALIGN)
3061             warning (0,
3062                      "structure size boundary can only be set to 8, 32 or 64");
3063           else
3064             warning (0, "structure size boundary can only be set to 8 or 32");
3065           arm_structure_size_boundary
3066             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3067         }
3068     }
3069
3070   if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
3071     {
3072       error ("RTP PIC is incompatible with Thumb");
3073       flag_pic = 0;
3074     }
3075
3076   /* If stack checking is disabled, we can use r10 as the PIC register,
3077      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3078   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3079     {
3080       if (TARGET_VXWORKS_RTP)
3081         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3082       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3083     }
3084
3085   if (flag_pic && TARGET_VXWORKS_RTP)
3086     arm_pic_register = 9;
3087
3088   if (arm_pic_register_string != NULL)
3089     {
3090       int pic_register = decode_reg_name (arm_pic_register_string);
3091
3092       if (!flag_pic)
3093         warning (0, "-mpic-register= is useless without -fpic");
3094
3095       /* Prevent the user from choosing an obviously stupid PIC register.  */
3096       else if (pic_register < 0 || call_used_regs[pic_register]
3097                || pic_register == HARD_FRAME_POINTER_REGNUM
3098                || pic_register == STACK_POINTER_REGNUM
3099                || pic_register >= PC_REGNUM
3100                || (TARGET_VXWORKS_RTP
3101                    && (unsigned int) pic_register != arm_pic_register))
3102         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3103       else
3104         arm_pic_register = pic_register;
3105     }
3106
3107   if (TARGET_VXWORKS_RTP
3108       && !global_options_set.x_arm_pic_data_is_text_relative)
3109     arm_pic_data_is_text_relative = 0;
3110
3111   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3112   if (fix_cm3_ldrd == 2)
3113     {
3114       if (arm_selected_cpu->core == cortexm3)
3115         fix_cm3_ldrd = 1;
3116       else
3117         fix_cm3_ldrd = 0;
3118     }
3119
3120   /* Enable -munaligned-access by default for
3121      - all ARMv6 architecture-based processors
3122      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3123      - ARMv8 architecture-base processors.
3124
3125      Disable -munaligned-access by default for
3126      - all pre-ARMv6 architecture-based processors
3127      - ARMv6-M architecture-based processors.  */
3128
3129   if (unaligned_access == 2)
3130     {
3131       if (arm_arch6 && (arm_arch_notm || arm_arch7))
3132         unaligned_access = 1;
3133       else
3134         unaligned_access = 0;
3135     }
3136   else if (unaligned_access == 1
3137            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3138     {
3139       warning (0, "target CPU does not support unaligned accesses");
3140       unaligned_access = 0;
3141     }
3142
3143   if (TARGET_THUMB1 && flag_schedule_insns)
3144     {
3145       /* Don't warn since it's on by default in -O2.  */
3146       flag_schedule_insns = 0;
3147     }
3148
3149   if (optimize_size)
3150     {
3151       /* If optimizing for size, bump the number of instructions that we
3152          are prepared to conditionally execute (even on a StrongARM).  */
3153       max_insns_skipped = 6;
3154
3155       /* For THUMB2, we limit the conditional sequence to one IT block.  */
3156       if (TARGET_THUMB2)
3157         max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3158     }
3159   else
3160     max_insns_skipped = current_tune->max_insns_skipped;
3161
3162   /* Hot/Cold partitioning is not currently supported, since we can't
3163      handle literal pool placement in that case.  */
3164   if (flag_reorder_blocks_and_partition)
3165     {
3166       inform (input_location,
3167               "-freorder-blocks-and-partition not supported on this architecture");
3168       flag_reorder_blocks_and_partition = 0;
3169       flag_reorder_blocks = 1;
3170     }
3171
3172   if (flag_pic)
3173     /* Hoisting PIC address calculations more aggressively provides a small,
3174        but measurable, size reduction for PIC code.  Therefore, we decrease
3175        the bar for unrestricted expression hoisting to the cost of PIC address
3176        calculation, which is 2 instructions.  */
3177     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3178                            global_options.x_param_values,
3179                            global_options_set.x_param_values);
3180
3181   /* ARM EABI defaults to strict volatile bitfields.  */
3182   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3183       && abi_version_at_least(2))
3184     flag_strict_volatile_bitfields = 1;
3185
3186   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3187      it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
3188   if (flag_prefetch_loop_arrays < 0
3189       && HAVE_prefetch
3190       && optimize >= 3
3191       && current_tune->num_prefetch_slots > 0)
3192     flag_prefetch_loop_arrays = 1;
3193
3194   /* Set up parameters to be used in prefetching algorithm.  Do not override the
3195      defaults unless we are tuning for a core we have researched values for.  */
3196   if (current_tune->num_prefetch_slots > 0)
3197     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3198                            current_tune->num_prefetch_slots,
3199                            global_options.x_param_values,
3200                            global_options_set.x_param_values);
3201   if (current_tune->l1_cache_line_size >= 0)
3202     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3203                            current_tune->l1_cache_line_size,
3204                            global_options.x_param_values,
3205                            global_options_set.x_param_values);
3206   if (current_tune->l1_cache_size >= 0)
3207     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3208                            current_tune->l1_cache_size,
3209                            global_options.x_param_values,
3210                            global_options_set.x_param_values);
3211
3212   /* Use Neon to perform 64-bits operations rather than core
3213      registers.  */
3214   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3215   if (use_neon_for_64bits == 1)
3216      prefer_neon_for_64bits = true;
3217
3218   /* Use the alternative scheduling-pressure algorithm by default.  */
3219   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3220                          global_options.x_param_values,
3221                          global_options_set.x_param_values);
3222
3223   /* Look through ready list and all of queue for instructions
3224      relevant for L2 auto-prefetcher.  */
3225   int param_sched_autopref_queue_depth;
3226   if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF)
3227     param_sched_autopref_queue_depth = -1;
3228   else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK)
3229     param_sched_autopref_queue_depth = 0;
3230   else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL)
3231     param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3232   else
3233     gcc_unreachable ();
3234   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3235                          param_sched_autopref_queue_depth,
3236                          global_options.x_param_values,
3237                          global_options_set.x_param_values);
3238
3239   /* Disable shrink-wrap when optimizing function for size, since it tends to
3240      generate additional returns.  */
3241   if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3242     flag_shrink_wrap = false;
3243   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3244   if (TARGET_APCS_FRAME)
3245     flag_shrink_wrap = false;
3246
3247   /* We only support -mslow-flash-data on armv7-m targets.  */
3248   if (target_slow_flash_data
3249       && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3250           || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3251     error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3252
3253   /* Currently, for slow flash data, we just disable literal pools.  */
3254   if (target_slow_flash_data)
3255     arm_disable_literal_pool = true;
3256
3257   /* Thumb2 inline assembly code should always use unified syntax.
3258      This will apply to ARM and Thumb1 eventually.  */
3259   if (TARGET_THUMB2)
3260     inline_asm_unified = 1;
3261
3262   /* Disable scheduling fusion by default if it's not armv7 processor
3263      or doesn't prefer ldrd/strd.  */
3264   if (flag_schedule_fusion == 2
3265       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3266     flag_schedule_fusion = 0;
3267
3268   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3269      - epilogue_insns - does not accurately model the corresponding insns
3270      emitted in the asm file.  In particular, see the comment in thumb_exit
3271      'Find out how many of the (return) argument registers we can corrupt'.
3272      As a consequence, the epilogue may clobber registers without fipa-ra
3273      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3274      TODO: Accurately model clobbers for epilogue_insns and reenable
3275      fipa-ra.  */
3276   if (TARGET_THUMB1)
3277     flag_ipa_ra = 0;
3278
3279   /* Register global variables with the garbage collector.  */
3280   arm_add_gc_roots ();
3281 }
3282
3283 static void
3284 arm_add_gc_roots (void)
3285 {
3286   gcc_obstack_init(&minipool_obstack);
3287   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3288 }
3289 \f
3290 /* A table of known ARM exception types.
3291    For use with the interrupt function attribute.  */
3292
3293 typedef struct
3294 {
3295   const char *const arg;
3296   const unsigned long return_value;
3297 }
3298 isr_attribute_arg;
3299
3300 static const isr_attribute_arg isr_attribute_args [] =
3301 {
3302   { "IRQ",   ARM_FT_ISR },
3303   { "irq",   ARM_FT_ISR },
3304   { "FIQ",   ARM_FT_FIQ },
3305   { "fiq",   ARM_FT_FIQ },
3306   { "ABORT", ARM_FT_ISR },
3307   { "abort", ARM_FT_ISR },
3308   { "ABORT", ARM_FT_ISR },
3309   { "abort", ARM_FT_ISR },
3310   { "UNDEF", ARM_FT_EXCEPTION },
3311   { "undef", ARM_FT_EXCEPTION },
3312   { "SWI",   ARM_FT_EXCEPTION },
3313   { "swi",   ARM_FT_EXCEPTION },
3314   { NULL,    ARM_FT_NORMAL }
3315 };
3316
3317 /* Returns the (interrupt) function type of the current
3318    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3319
3320 static unsigned long
3321 arm_isr_value (tree argument)
3322 {
3323   const isr_attribute_arg * ptr;
3324   const char *              arg;
3325
3326   if (!arm_arch_notm)
3327     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3328
3329   /* No argument - default to IRQ.  */
3330   if (argument == NULL_TREE)
3331     return ARM_FT_ISR;
3332
3333   /* Get the value of the argument.  */
3334   if (TREE_VALUE (argument) == NULL_TREE
3335       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3336     return ARM_FT_UNKNOWN;
3337
3338   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3339
3340   /* Check it against the list of known arguments.  */
3341   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3342     if (streq (arg, ptr->arg))
3343       return ptr->return_value;
3344
3345   /* An unrecognized interrupt type.  */
3346   return ARM_FT_UNKNOWN;
3347 }
3348
3349 /* Computes the type of the current function.  */
3350
3351 static unsigned long
3352 arm_compute_func_type (void)
3353 {
3354   unsigned long type = ARM_FT_UNKNOWN;
3355   tree a;
3356   tree attr;
3357
3358   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3359
3360   /* Decide if the current function is volatile.  Such functions
3361      never return, and many memory cycles can be saved by not storing
3362      register values that will never be needed again.  This optimization
3363      was added to speed up context switching in a kernel application.  */
3364   if (optimize > 0
3365       && (TREE_NOTHROW (current_function_decl)
3366           || !(flag_unwind_tables
3367                || (flag_exceptions
3368                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3369       && TREE_THIS_VOLATILE (current_function_decl))
3370     type |= ARM_FT_VOLATILE;
3371
3372   if (cfun->static_chain_decl != NULL)
3373     type |= ARM_FT_NESTED;
3374
3375   attr = DECL_ATTRIBUTES (current_function_decl);
3376
3377   a = lookup_attribute ("naked", attr);
3378   if (a != NULL_TREE)
3379     type |= ARM_FT_NAKED;
3380
3381   a = lookup_attribute ("isr", attr);
3382   if (a == NULL_TREE)
3383     a = lookup_attribute ("interrupt", attr);
3384
3385   if (a == NULL_TREE)
3386     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3387   else
3388     type |= arm_isr_value (TREE_VALUE (a));
3389
3390   return type;
3391 }
3392
3393 /* Returns the type of the current function.  */
3394
3395 unsigned long
3396 arm_current_func_type (void)
3397 {
3398   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3399     cfun->machine->func_type = arm_compute_func_type ();
3400
3401   return cfun->machine->func_type;
3402 }
3403
3404 bool
3405 arm_allocate_stack_slots_for_args (void)
3406 {
3407   /* Naked functions should not allocate stack slots for arguments.  */
3408   return !IS_NAKED (arm_current_func_type ());
3409 }
3410
3411 static bool
3412 arm_warn_func_return (tree decl)
3413 {
3414   /* Naked functions are implemented entirely in assembly, including the
3415      return sequence, so suppress warnings about this.  */
3416   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3417 }
3418
3419 \f
3420 /* Output assembler code for a block containing the constant parts
3421    of a trampoline, leaving space for the variable parts.
3422
3423    On the ARM, (if r8 is the static chain regnum, and remembering that
3424    referencing pc adds an offset of 8) the trampoline looks like:
3425            ldr          r8, [pc, #0]
3426            ldr          pc, [pc]
3427            .word        static chain value
3428            .word        function's address
3429    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3430
3431 static void
3432 arm_asm_trampoline_template (FILE *f)
3433 {
3434   if (TARGET_ARM)
3435     {
3436       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3437       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3438     }
3439   else if (TARGET_THUMB2)
3440     {
3441       /* The Thumb-2 trampoline is similar to the arm implementation.
3442          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3443       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3444                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3445       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3446     }
3447   else
3448     {
3449       ASM_OUTPUT_ALIGN (f, 2);
3450       fprintf (f, "\t.code\t16\n");
3451       fprintf (f, ".Ltrampoline_start:\n");
3452       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3453       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3454       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3455       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3456       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3457       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3458     }
3459   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3460   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3461 }
3462
3463 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3464
3465 static void
3466 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3467 {
3468   rtx fnaddr, mem, a_tramp;
3469
3470   emit_block_move (m_tramp, assemble_trampoline_template (),
3471                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3472
3473   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3474   emit_move_insn (mem, chain_value);
3475
3476   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3477   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3478   emit_move_insn (mem, fnaddr);
3479
3480   a_tramp = XEXP (m_tramp, 0);
3481   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3482                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3483                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3484 }
3485
3486 /* Thumb trampolines should be entered in thumb mode, so set
3487    the bottom bit of the address.  */
3488
3489 static rtx
3490 arm_trampoline_adjust_address (rtx addr)
3491 {
3492   if (TARGET_THUMB)
3493     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3494                                 NULL, 0, OPTAB_LIB_WIDEN);
3495   return addr;
3496 }
3497 \f
3498 /* Return 1 if it is possible to return using a single instruction.
3499    If SIBLING is non-null, this is a test for a return before a sibling
3500    call.  SIBLING is the call insn, so we can examine its register usage.  */
3501
3502 int
3503 use_return_insn (int iscond, rtx sibling)
3504 {
3505   int regno;
3506   unsigned int func_type;
3507   unsigned long saved_int_regs;
3508   unsigned HOST_WIDE_INT stack_adjust;
3509   arm_stack_offsets *offsets;
3510
3511   /* Never use a return instruction before reload has run.  */
3512   if (!reload_completed)
3513     return 0;
3514
3515   func_type = arm_current_func_type ();
3516
3517   /* Naked, volatile and stack alignment functions need special
3518      consideration.  */
3519   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3520     return 0;
3521
3522   /* So do interrupt functions that use the frame pointer and Thumb
3523      interrupt functions.  */
3524   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3525     return 0;
3526
3527   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3528       && !optimize_function_for_size_p (cfun))
3529     return 0;
3530
3531   offsets = arm_get_frame_offsets ();
3532   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3533
3534   /* As do variadic functions.  */
3535   if (crtl->args.pretend_args_size
3536       || cfun->machine->uses_anonymous_args
3537       /* Or if the function calls __builtin_eh_return () */
3538       || crtl->calls_eh_return
3539       /* Or if the function calls alloca */
3540       || cfun->calls_alloca
3541       /* Or if there is a stack adjustment.  However, if the stack pointer
3542          is saved on the stack, we can use a pre-incrementing stack load.  */
3543       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3544                                  && stack_adjust == 4)))
3545     return 0;
3546
3547   saved_int_regs = offsets->saved_regs_mask;
3548
3549   /* Unfortunately, the insn
3550
3551        ldmib sp, {..., sp, ...}
3552
3553      triggers a bug on most SA-110 based devices, such that the stack
3554      pointer won't be correctly restored if the instruction takes a
3555      page fault.  We work around this problem by popping r3 along with
3556      the other registers, since that is never slower than executing
3557      another instruction.
3558
3559      We test for !arm_arch5 here, because code for any architecture
3560      less than this could potentially be run on one of the buggy
3561      chips.  */
3562   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3563     {
3564       /* Validate that r3 is a call-clobbered register (always true in
3565          the default abi) ...  */
3566       if (!call_used_regs[3])
3567         return 0;
3568
3569       /* ... that it isn't being used for a return value ... */
3570       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3571         return 0;
3572
3573       /* ... or for a tail-call argument ...  */
3574       if (sibling)
3575         {
3576           gcc_assert (CALL_P (sibling));
3577
3578           if (find_regno_fusage (sibling, USE, 3))
3579             return 0;
3580         }
3581
3582       /* ... and that there are no call-saved registers in r0-r2
3583          (always true in the default ABI).  */
3584       if (saved_int_regs & 0x7)
3585         return 0;
3586     }
3587
3588   /* Can't be done if interworking with Thumb, and any registers have been
3589      stacked.  */
3590   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3591     return 0;
3592
3593   /* On StrongARM, conditional returns are expensive if they aren't
3594      taken and multiple registers have been stacked.  */
3595   if (iscond && arm_tune_strongarm)
3596     {
3597       /* Conditional return when just the LR is stored is a simple
3598          conditional-load instruction, that's not expensive.  */
3599       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3600         return 0;
3601
3602       if (flag_pic
3603           && arm_pic_register != INVALID_REGNUM
3604           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3605         return 0;
3606     }
3607
3608   /* If there are saved registers but the LR isn't saved, then we need
3609      two instructions for the return.  */
3610   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3611     return 0;
3612
3613   /* Can't be done if any of the VFP regs are pushed,
3614      since this also requires an insn.  */
3615   if (TARGET_HARD_FLOAT && TARGET_VFP)
3616     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3617       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3618         return 0;
3619
3620   if (TARGET_REALLY_IWMMXT)
3621     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3622       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3623         return 0;
3624
3625   return 1;
3626 }
3627
3628 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3629    shrink-wrapping if possible.  This is the case if we need to emit a
3630    prologue, which we can test by looking at the offsets.  */
3631 bool
3632 use_simple_return_p (void)
3633 {
3634   arm_stack_offsets *offsets;
3635
3636   offsets = arm_get_frame_offsets ();
3637   return offsets->outgoing_args != 0;
3638 }
3639
3640 /* Return TRUE if int I is a valid immediate ARM constant.  */
3641
3642 int
3643 const_ok_for_arm (HOST_WIDE_INT i)
3644 {
3645   int lowbit;
3646
3647   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3648      be all zero, or all one.  */
3649   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3650       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3651           != ((~(unsigned HOST_WIDE_INT) 0)
3652               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3653     return FALSE;
3654
3655   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3656
3657   /* Fast return for 0 and small values.  We must do this for zero, since
3658      the code below can't handle that one case.  */
3659   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3660     return TRUE;
3661
3662   /* Get the number of trailing zeros.  */
3663   lowbit = ffs((int) i) - 1;
3664
3665   /* Only even shifts are allowed in ARM mode so round down to the
3666      nearest even number.  */
3667   if (TARGET_ARM)
3668     lowbit &= ~1;
3669
3670   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3671     return TRUE;
3672
3673   if (TARGET_ARM)
3674     {
3675       /* Allow rotated constants in ARM mode.  */
3676       if (lowbit <= 4
3677            && ((i & ~0xc000003f) == 0
3678                || (i & ~0xf000000f) == 0
3679                || (i & ~0xfc000003) == 0))
3680         return TRUE;
3681     }
3682   else
3683     {
3684       HOST_WIDE_INT v;
3685
3686       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
3687       v = i & 0xff;
3688       v |= v << 16;
3689       if (i == v || i == (v | (v << 8)))
3690         return TRUE;
3691
3692       /* Allow repeated pattern 0xXY00XY00.  */
3693       v = i & 0xff00;
3694       v |= v << 16;
3695       if (i == v)
3696         return TRUE;
3697     }
3698
3699   return FALSE;
3700 }
3701
3702 /* Return true if I is a valid constant for the operation CODE.  */
3703 int
3704 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3705 {
3706   if (const_ok_for_arm (i))
3707     return 1;
3708
3709   switch (code)
3710     {
3711     case SET:
3712       /* See if we can use movw.  */
3713       if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3714         return 1;
3715       else
3716         /* Otherwise, try mvn.  */
3717         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3718
3719     case PLUS:
3720       /* See if we can use addw or subw.  */
3721       if (TARGET_THUMB2
3722           && ((i & 0xfffff000) == 0
3723               || ((-i) & 0xfffff000) == 0))
3724         return 1;
3725       /* else fall through.  */
3726
3727     case COMPARE:
3728     case EQ:
3729     case NE:
3730     case GT:
3731     case LE:
3732     case LT:
3733     case GE:
3734     case GEU:
3735     case LTU:
3736     case GTU:
3737     case LEU:
3738     case UNORDERED:
3739     case ORDERED:
3740     case UNEQ:
3741     case UNGE:
3742     case UNLT:
3743     case UNGT:
3744     case UNLE:
3745       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3746
3747     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
3748     case XOR:
3749       return 0;
3750
3751     case IOR:
3752       if (TARGET_THUMB2)
3753         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3754       return 0;
3755
3756     case AND:
3757       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3758
3759     default:
3760       gcc_unreachable ();
3761     }
3762 }
3763
3764 /* Return true if I is a valid di mode constant for the operation CODE.  */
3765 int
3766 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3767 {
3768   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3769   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3770   rtx hi = GEN_INT (hi_val);
3771   rtx lo = GEN_INT (lo_val);
3772
3773   if (TARGET_THUMB1)
3774     return 0;
3775
3776   switch (code)
3777     {
3778     case AND:
3779     case IOR:
3780     case XOR:
3781       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3782               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3783     case PLUS:
3784       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3785
3786     default:
3787       return 0;
3788     }
3789 }
3790
3791 /* Emit a sequence of insns to handle a large constant.
3792    CODE is the code of the operation required, it can be any of SET, PLUS,
3793    IOR, AND, XOR, MINUS;
3794    MODE is the mode in which the operation is being performed;
3795    VAL is the integer to operate on;
3796    SOURCE is the other operand (a register, or a null-pointer for SET);
3797    SUBTARGETS means it is safe to create scratch registers if that will
3798    either produce a simpler sequence, or we will want to cse the values.
3799    Return value is the number of insns emitted.  */
3800
3801 /* ??? Tweak this for thumb2.  */
3802 int
3803 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3804                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3805 {
3806   rtx cond;
3807
3808   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3809     cond = COND_EXEC_TEST (PATTERN (insn));
3810   else
3811     cond = NULL_RTX;
3812
3813   if (subtargets || code == SET
3814       || (REG_P (target) && REG_P (source)
3815           && REGNO (target) != REGNO (source)))
3816     {
3817       /* After arm_reorg has been called, we can't fix up expensive
3818          constants by pushing them into memory so we must synthesize
3819          them in-line, regardless of the cost.  This is only likely to
3820          be more costly on chips that have load delay slots and we are
3821          compiling without running the scheduler (so no splitting
3822          occurred before the final instruction emission).
3823
3824          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3825       */
3826       if (!cfun->machine->after_arm_reorg
3827           && !cond
3828           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3829                                 1, 0)
3830               > (arm_constant_limit (optimize_function_for_size_p (cfun))
3831                  + (code != SET))))
3832         {
3833           if (code == SET)
3834             {
3835               /* Currently SET is the only monadic value for CODE, all
3836                  the rest are diadic.  */
3837               if (TARGET_USE_MOVT)
3838                 arm_emit_movpair (target, GEN_INT (val));
3839               else
3840                 emit_set_insn (target, GEN_INT (val));
3841
3842               return 1;
3843             }
3844           else
3845             {
3846               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3847
3848               if (TARGET_USE_MOVT)
3849                 arm_emit_movpair (temp, GEN_INT (val));
3850               else
3851                 emit_set_insn (temp, GEN_INT (val));
3852
3853               /* For MINUS, the value is subtracted from, since we never
3854                  have subtraction of a constant.  */
3855               if (code == MINUS)
3856                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3857               else
3858                 emit_set_insn (target,
3859                                gen_rtx_fmt_ee (code, mode, source, temp));
3860               return 2;
3861             }
3862         }
3863     }
3864
3865   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3866                            1);
3867 }
3868
3869 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3870    ARM/THUMB2 immediates, and add up to VAL.
3871    Thr function return value gives the number of insns required.  */
3872 static int
3873 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3874                             struct four_ints *return_sequence)
3875 {
3876   int best_consecutive_zeros = 0;
3877   int i;
3878   int best_start = 0;
3879   int insns1, insns2;
3880   struct four_ints tmp_sequence;
3881
3882   /* If we aren't targeting ARM, the best place to start is always at
3883      the bottom, otherwise look more closely.  */
3884   if (TARGET_ARM)
3885     {
3886       for (i = 0; i < 32; i += 2)
3887         {
3888           int consecutive_zeros = 0;
3889
3890           if (!(val & (3 << i)))
3891             {
3892               while ((i < 32) && !(val & (3 << i)))
3893                 {
3894                   consecutive_zeros += 2;
3895                   i += 2;
3896                 }
3897               if (consecutive_zeros > best_consecutive_zeros)
3898                 {
3899                   best_consecutive_zeros = consecutive_zeros;
3900                   best_start = i - consecutive_zeros;
3901                 }
3902               i -= 2;
3903             }
3904         }
3905     }
3906
3907   /* So long as it won't require any more insns to do so, it's
3908      desirable to emit a small constant (in bits 0...9) in the last
3909      insn.  This way there is more chance that it can be combined with
3910      a later addressing insn to form a pre-indexed load or store
3911      operation.  Consider:
3912
3913            *((volatile int *)0xe0000100) = 1;
3914            *((volatile int *)0xe0000110) = 2;
3915
3916      We want this to wind up as:
3917
3918             mov rA, #0xe0000000
3919             mov rB, #1
3920             str rB, [rA, #0x100]
3921             mov rB, #2
3922             str rB, [rA, #0x110]
3923
3924      rather than having to synthesize both large constants from scratch.
3925
3926      Therefore, we calculate how many insns would be required to emit
3927      the constant starting from `best_start', and also starting from
3928      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
3929      yield a shorter sequence, we may as well use zero.  */
3930   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3931   if (best_start != 0
3932       && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3933     {
3934       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3935       if (insns2 <= insns1)
3936         {
3937           *return_sequence = tmp_sequence;
3938           insns1 = insns2;
3939         }
3940     }
3941
3942   return insns1;
3943 }
3944
3945 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
3946 static int
3947 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3948                              struct four_ints *return_sequence, int i)
3949 {
3950   int remainder = val & 0xffffffff;
3951   int insns = 0;
3952
3953   /* Try and find a way of doing the job in either two or three
3954      instructions.
3955
3956      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3957      location.  We start at position I.  This may be the MSB, or
3958      optimial_immediate_sequence may have positioned it at the largest block
3959      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3960      wrapping around to the top of the word when we drop off the bottom.
3961      In the worst case this code should produce no more than four insns.
3962
3963      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3964      constants, shifted to any arbitrary location.  We should always start
3965      at the MSB.  */
3966   do
3967     {
3968       int end;
3969       unsigned int b1, b2, b3, b4;
3970       unsigned HOST_WIDE_INT result;
3971       int loc;
3972
3973       gcc_assert (insns < 4);
3974
3975       if (i <= 0)
3976         i += 32;
3977
3978       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
3979       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3980         {
3981           loc = i;
3982           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3983             /* We can use addw/subw for the last 12 bits.  */
3984             result = remainder;
3985           else
3986             {
3987               /* Use an 8-bit shifted/rotated immediate.  */
3988               end = i - 8;
3989               if (end < 0)
3990                 end += 32;
3991               result = remainder & ((0x0ff << end)
3992                                    | ((i < end) ? (0xff >> (32 - end))
3993                                                 : 0));
3994               i -= 8;
3995             }
3996         }
3997       else
3998         {
3999           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4000              arbitrary shifts.  */
4001           i -= TARGET_ARM ? 2 : 1;
4002           continue;
4003         }
4004
4005       /* Next, see if we can do a better job with a thumb2 replicated
4006          constant.
4007
4008          We do it this way around to catch the cases like 0x01F001E0 where
4009          two 8-bit immediates would work, but a replicated constant would
4010          make it worse.
4011
4012          TODO: 16-bit constants that don't clear all the bits, but still win.
4013          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4014       if (TARGET_THUMB2)
4015         {
4016           b1 = (remainder & 0xff000000) >> 24;
4017           b2 = (remainder & 0x00ff0000) >> 16;
4018           b3 = (remainder & 0x0000ff00) >> 8;
4019           b4 = remainder & 0xff;
4020
4021           if (loc > 24)
4022             {
4023               /* The 8-bit immediate already found clears b1 (and maybe b2),
4024                  but must leave b3 and b4 alone.  */
4025
4026               /* First try to find a 32-bit replicated constant that clears
4027                  almost everything.  We can assume that we can't do it in one,
4028                  or else we wouldn't be here.  */
4029               unsigned int tmp = b1 & b2 & b3 & b4;
4030               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4031                                   + (tmp << 24);
4032               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4033                                             + (tmp == b3) + (tmp == b4);
4034               if (tmp
4035                   && (matching_bytes >= 3
4036                       || (matching_bytes == 2
4037                           && const_ok_for_op (remainder & ~tmp2, code))))
4038                 {
4039                   /* At least 3 of the bytes match, and the fourth has at
4040                      least as many bits set, or two of the bytes match
4041                      and it will only require one more insn to finish.  */
4042                   result = tmp2;
4043                   i = tmp != b1 ? 32
4044                       : tmp != b2 ? 24
4045                       : tmp != b3 ? 16
4046                       : 8;
4047                 }
4048
4049               /* Second, try to find a 16-bit replicated constant that can
4050                  leave three of the bytes clear.  If b2 or b4 is already
4051                  zero, then we can.  If the 8-bit from above would not
4052                  clear b2 anyway, then we still win.  */
4053               else if (b1 == b3 && (!b2 || !b4
4054                                || (remainder & 0x00ff0000 & ~result)))
4055                 {
4056                   result = remainder & 0xff00ff00;
4057                   i = 24;
4058                 }
4059             }
4060           else if (loc > 16)
4061             {
4062               /* The 8-bit immediate already found clears b2 (and maybe b3)
4063                  and we don't get here unless b1 is alredy clear, but it will
4064                  leave b4 unchanged.  */
4065
4066               /* If we can clear b2 and b4 at once, then we win, since the
4067                  8-bits couldn't possibly reach that far.  */
4068               if (b2 == b4)
4069                 {
4070                   result = remainder & 0x00ff00ff;
4071                   i = 16;
4072                 }
4073             }
4074         }
4075
4076       return_sequence->i[insns++] = result;
4077       remainder &= ~result;
4078
4079       if (code == SET || code == MINUS)
4080         code = PLUS;
4081     }
4082   while (remainder);
4083
4084   return insns;
4085 }
4086
4087 /* Emit an instruction with the indicated PATTERN.  If COND is
4088    non-NULL, conditionalize the execution of the instruction on COND
4089    being true.  */
4090
4091 static void
4092 emit_constant_insn (rtx cond, rtx pattern)
4093 {
4094   if (cond)
4095     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4096   emit_insn (pattern);
4097 }
4098
4099 /* As above, but extra parameter GENERATE which, if clear, suppresses
4100    RTL generation.  */
4101
4102 static int
4103 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4104                   HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
4105                   int generate)
4106 {
4107   int can_invert = 0;
4108   int can_negate = 0;
4109   int final_invert = 0;
4110   int i;
4111   int set_sign_bit_copies = 0;
4112   int clear_sign_bit_copies = 0;
4113   int clear_zero_bit_copies = 0;
4114   int set_zero_bit_copies = 0;
4115   int insns = 0, neg_insns, inv_insns;
4116   unsigned HOST_WIDE_INT temp1, temp2;
4117   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4118   struct four_ints *immediates;
4119   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4120
4121   /* Find out which operations are safe for a given CODE.  Also do a quick
4122      check for degenerate cases; these can occur when DImode operations
4123      are split.  */
4124   switch (code)
4125     {
4126     case SET:
4127       can_invert = 1;
4128       break;
4129
4130     case PLUS:
4131       can_negate = 1;
4132       break;
4133
4134     case IOR:
4135       if (remainder == 0xffffffff)
4136         {
4137           if (generate)
4138             emit_constant_insn (cond,
4139                                 gen_rtx_SET (target,
4140                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4141           return 1;
4142         }
4143
4144       if (remainder == 0)
4145         {
4146           if (reload_completed && rtx_equal_p (target, source))
4147             return 0;
4148
4149           if (generate)
4150             emit_constant_insn (cond, gen_rtx_SET (target, source));
4151           return 1;
4152         }
4153       break;
4154
4155     case AND:
4156       if (remainder == 0)
4157         {
4158           if (generate)
4159             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4160           return 1;
4161         }
4162       if (remainder == 0xffffffff)
4163         {
4164           if (reload_completed && rtx_equal_p (target, source))
4165             return 0;
4166           if (generate)
4167             emit_constant_insn (cond, gen_rtx_SET (target, source));
4168           return 1;
4169         }
4170       can_invert = 1;
4171       break;
4172
4173     case XOR:
4174       if (remainder == 0)
4175         {
4176           if (reload_completed && rtx_equal_p (target, source))
4177             return 0;
4178           if (generate)
4179             emit_constant_insn (cond, gen_rtx_SET (target, source));
4180           return 1;
4181         }
4182
4183       if (remainder == 0xffffffff)
4184         {
4185           if (generate)
4186             emit_constant_insn (cond,
4187                                 gen_rtx_SET (target,
4188                                              gen_rtx_NOT (mode, source)));
4189           return 1;
4190         }
4191       final_invert = 1;
4192       break;
4193
4194     case MINUS:
4195       /* We treat MINUS as (val - source), since (source - val) is always
4196          passed as (source + (-val)).  */
4197       if (remainder == 0)
4198         {
4199           if (generate)
4200             emit_constant_insn (cond,
4201                                 gen_rtx_SET (target,
4202                                              gen_rtx_NEG (mode, source)));
4203           return 1;
4204         }
4205       if (const_ok_for_arm (val))
4206         {
4207           if (generate)
4208             emit_constant_insn (cond,
4209                                 gen_rtx_SET (target,
4210                                              gen_rtx_MINUS (mode, GEN_INT (val),
4211                                                             source)));
4212           return 1;
4213         }
4214
4215       break;
4216
4217     default:
4218       gcc_unreachable ();
4219     }
4220
4221   /* If we can do it in one insn get out quickly.  */
4222   if (const_ok_for_op (val, code))
4223     {
4224       if (generate)
4225         emit_constant_insn (cond,
4226                             gen_rtx_SET (target,
4227                                          (source
4228                                           ? gen_rtx_fmt_ee (code, mode, source,
4229                                                             GEN_INT (val))
4230                                           : GEN_INT (val))));
4231       return 1;
4232     }
4233
4234   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4235      insn.  */
4236   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4237       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4238     {
4239       if (generate)
4240         {
4241           if (mode == SImode && i == 16)
4242             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4243                smaller insn.  */
4244             emit_constant_insn (cond,
4245                                 gen_zero_extendhisi2
4246                                 (target, gen_lowpart (HImode, source)));
4247           else
4248             /* Extz only supports SImode, but we can coerce the operands
4249                into that mode.  */
4250             emit_constant_insn (cond,
4251                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4252                                               gen_lowpart (SImode, source),
4253                                               GEN_INT (i), const0_rtx));
4254         }
4255
4256       return 1;
4257     }
4258
4259   /* Calculate a few attributes that may be useful for specific
4260      optimizations.  */
4261   /* Count number of leading zeros.  */
4262   for (i = 31; i >= 0; i--)
4263     {
4264       if ((remainder & (1 << i)) == 0)
4265         clear_sign_bit_copies++;
4266       else
4267         break;
4268     }
4269
4270   /* Count number of leading 1's.  */
4271   for (i = 31; i >= 0; i--)
4272     {
4273       if ((remainder & (1 << i)) != 0)
4274         set_sign_bit_copies++;
4275       else
4276         break;
4277     }
4278
4279   /* Count number of trailing zero's.  */
4280   for (i = 0; i <= 31; i++)
4281     {
4282       if ((remainder & (1 << i)) == 0)
4283         clear_zero_bit_copies++;
4284       else
4285         break;
4286     }
4287
4288   /* Count number of trailing 1's.  */
4289   for (i = 0; i <= 31; i++)
4290     {
4291       if ((remainder & (1 << i)) != 0)
4292         set_zero_bit_copies++;
4293       else
4294         break;
4295     }
4296
4297   switch (code)
4298     {
4299     case SET:
4300       /* See if we can do this by sign_extending a constant that is known
4301          to be negative.  This is a good, way of doing it, since the shift
4302          may well merge into a subsequent insn.  */
4303       if (set_sign_bit_copies > 1)
4304         {
4305           if (const_ok_for_arm
4306               (temp1 = ARM_SIGN_EXTEND (remainder
4307                                         << (set_sign_bit_copies - 1))))
4308             {
4309               if (generate)
4310                 {
4311                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4312                   emit_constant_insn (cond,
4313                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4314                   emit_constant_insn (cond,
4315                                       gen_ashrsi3 (target, new_src,
4316                                                    GEN_INT (set_sign_bit_copies - 1)));
4317                 }
4318               return 2;
4319             }
4320           /* For an inverted constant, we will need to set the low bits,
4321              these will be shifted out of harm's way.  */
4322           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4323           if (const_ok_for_arm (~temp1))
4324             {
4325               if (generate)
4326                 {
4327                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4328                   emit_constant_insn (cond,
4329                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4330                   emit_constant_insn (cond,
4331                                       gen_ashrsi3 (target, new_src,
4332                                                    GEN_INT (set_sign_bit_copies - 1)));
4333                 }
4334               return 2;
4335             }
4336         }
4337
4338       /* See if we can calculate the value as the difference between two
4339          valid immediates.  */
4340       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4341         {
4342           int topshift = clear_sign_bit_copies & ~1;
4343
4344           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4345                                    & (0xff000000 >> topshift));
4346
4347           /* If temp1 is zero, then that means the 9 most significant
4348              bits of remainder were 1 and we've caused it to overflow.
4349              When topshift is 0 we don't need to do anything since we
4350              can borrow from 'bit 32'.  */
4351           if (temp1 == 0 && topshift != 0)
4352             temp1 = 0x80000000 >> (topshift - 1);
4353
4354           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4355
4356           if (const_ok_for_arm (temp2))
4357             {
4358               if (generate)
4359                 {
4360                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4361                   emit_constant_insn (cond,
4362                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4363                   emit_constant_insn (cond,
4364                                       gen_addsi3 (target, new_src,
4365                                                   GEN_INT (-temp2)));
4366                 }
4367
4368               return 2;
4369             }
4370         }
4371
4372       /* See if we can generate this by setting the bottom (or the top)
4373          16 bits, and then shifting these into the other half of the
4374          word.  We only look for the simplest cases, to do more would cost
4375          too much.  Be careful, however, not to generate this when the
4376          alternative would take fewer insns.  */
4377       if (val & 0xffff0000)
4378         {
4379           temp1 = remainder & 0xffff0000;
4380           temp2 = remainder & 0x0000ffff;
4381
4382           /* Overlaps outside this range are best done using other methods.  */
4383           for (i = 9; i < 24; i++)
4384             {
4385               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4386                   && !const_ok_for_arm (temp2))
4387                 {
4388                   rtx new_src = (subtargets
4389                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4390                                  : target);
4391                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4392                                             source, subtargets, generate);
4393                   source = new_src;
4394                   if (generate)
4395                     emit_constant_insn
4396                       (cond,
4397                        gen_rtx_SET
4398                        (target,
4399                         gen_rtx_IOR (mode,
4400                                      gen_rtx_ASHIFT (mode, source,
4401                                                      GEN_INT (i)),
4402                                      source)));
4403                   return insns + 1;
4404                 }
4405             }
4406
4407           /* Don't duplicate cases already considered.  */
4408           for (i = 17; i < 24; i++)
4409             {
4410               if (((temp1 | (temp1 >> i)) == remainder)
4411                   && !const_ok_for_arm (temp1))
4412                 {
4413                   rtx new_src = (subtargets
4414                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4415                                  : target);
4416                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4417                                             source, subtargets, generate);
4418                   source = new_src;
4419                   if (generate)
4420                     emit_constant_insn
4421                       (cond,
4422                        gen_rtx_SET (target,
4423                                     gen_rtx_IOR
4424                                     (mode,
4425                                      gen_rtx_LSHIFTRT (mode, source,
4426                                                        GEN_INT (i)),
4427                                      source)));
4428                   return insns + 1;
4429                 }
4430             }
4431         }
4432       break;
4433
4434     case IOR:
4435     case XOR:
4436       /* If we have IOR or XOR, and the constant can be loaded in a
4437          single instruction, and we can find a temporary to put it in,
4438          then this can be done in two instructions instead of 3-4.  */
4439       if (subtargets
4440           /* TARGET can't be NULL if SUBTARGETS is 0 */
4441           || (reload_completed && !reg_mentioned_p (target, source)))
4442         {
4443           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4444             {
4445               if (generate)
4446                 {
4447                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4448
4449                   emit_constant_insn (cond,
4450                                       gen_rtx_SET (sub, GEN_INT (val)));
4451                   emit_constant_insn (cond,
4452                                       gen_rtx_SET (target,
4453                                                    gen_rtx_fmt_ee (code, mode,
4454                                                                    source, sub)));
4455                 }
4456               return 2;
4457             }
4458         }
4459
4460       if (code == XOR)
4461         break;
4462
4463       /*  Convert.
4464           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4465                              and the remainder 0s for e.g. 0xfff00000)
4466           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4467
4468           This can be done in 2 instructions by using shifts with mov or mvn.
4469           e.g. for
4470           x = x | 0xfff00000;
4471           we generate.
4472           mvn   r0, r0, asl #12
4473           mvn   r0, r0, lsr #12  */
4474       if (set_sign_bit_copies > 8
4475           && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4476         {
4477           if (generate)
4478             {
4479               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4480               rtx shift = GEN_INT (set_sign_bit_copies);
4481
4482               emit_constant_insn
4483                 (cond,
4484                  gen_rtx_SET (sub,
4485                               gen_rtx_NOT (mode,
4486                                            gen_rtx_ASHIFT (mode,
4487                                                            source,
4488                                                            shift))));
4489               emit_constant_insn
4490                 (cond,
4491                  gen_rtx_SET (target,
4492                               gen_rtx_NOT (mode,
4493                                            gen_rtx_LSHIFTRT (mode, sub,
4494                                                              shift))));
4495             }
4496           return 2;
4497         }
4498
4499       /* Convert
4500           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4501            to
4502           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4503
4504           For eg. r0 = r0 | 0xfff
4505                mvn      r0, r0, lsr #12
4506                mvn      r0, r0, asl #12
4507
4508       */
4509       if (set_zero_bit_copies > 8
4510           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4511         {
4512           if (generate)
4513             {
4514               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4515               rtx shift = GEN_INT (set_zero_bit_copies);
4516
4517               emit_constant_insn
4518                 (cond,
4519                  gen_rtx_SET (sub,
4520                               gen_rtx_NOT (mode,
4521                                            gen_rtx_LSHIFTRT (mode,
4522                                                              source,
4523                                                              shift))));
4524               emit_constant_insn
4525                 (cond,
4526                  gen_rtx_SET (target,
4527                               gen_rtx_NOT (mode,
4528                                            gen_rtx_ASHIFT (mode, sub,
4529                                                            shift))));
4530             }
4531           return 2;
4532         }
4533
4534       /* This will never be reached for Thumb2 because orn is a valid
4535          instruction. This is for Thumb1 and the ARM 32 bit cases.
4536
4537          x = y | constant (such that ~constant is a valid constant)
4538          Transform this to
4539          x = ~(~y & ~constant).
4540       */
4541       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4542         {
4543           if (generate)
4544             {
4545               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4546               emit_constant_insn (cond,
4547                                   gen_rtx_SET (sub,
4548                                                gen_rtx_NOT (mode, source)));
4549               source = sub;
4550               if (subtargets)
4551                 sub = gen_reg_rtx (mode);
4552               emit_constant_insn (cond,
4553                                   gen_rtx_SET (sub,
4554                                                gen_rtx_AND (mode, source,
4555                                                             GEN_INT (temp1))));
4556               emit_constant_insn (cond,
4557                                   gen_rtx_SET (target,
4558                                                gen_rtx_NOT (mode, sub)));
4559             }
4560           return 3;
4561         }
4562       break;
4563
4564     case AND:
4565       /* See if two shifts will do 2 or more insn's worth of work.  */
4566       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4567         {
4568           HOST_WIDE_INT shift_mask = ((0xffffffff
4569                                        << (32 - clear_sign_bit_copies))
4570                                       & 0xffffffff);
4571
4572           if ((remainder | shift_mask) != 0xffffffff)
4573             {
4574               HOST_WIDE_INT new_val
4575                 = ARM_SIGN_EXTEND (remainder | shift_mask);
4576
4577               if (generate)
4578                 {
4579                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4580                   insns = arm_gen_constant (AND, SImode, cond, new_val,
4581                                             new_src, source, subtargets, 1);
4582                   source = new_src;
4583                 }
4584               else
4585                 {
4586                   rtx targ = subtargets ? NULL_RTX : target;
4587                   insns = arm_gen_constant (AND, mode, cond, new_val,
4588                                             targ, source, subtargets, 0);
4589                 }
4590             }
4591
4592           if (generate)
4593             {
4594               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4595               rtx shift = GEN_INT (clear_sign_bit_copies);
4596
4597               emit_insn (gen_ashlsi3 (new_src, source, shift));
4598               emit_insn (gen_lshrsi3 (target, new_src, shift));
4599             }
4600
4601           return insns + 2;
4602         }
4603
4604       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4605         {
4606           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4607
4608           if ((remainder | shift_mask) != 0xffffffff)
4609             {
4610               HOST_WIDE_INT new_val
4611                 = ARM_SIGN_EXTEND (remainder | shift_mask);
4612               if (generate)
4613                 {
4614                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4615
4616                   insns = arm_gen_constant (AND, mode, cond, new_val,
4617                                             new_src, source, subtargets, 1);
4618                   source = new_src;
4619                 }
4620               else
4621                 {
4622                   rtx targ = subtargets ? NULL_RTX : target;
4623
4624                   insns = arm_gen_constant (AND, mode, cond, new_val,
4625                                             targ, source, subtargets, 0);
4626                 }
4627             }
4628
4629           if (generate)
4630             {
4631               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4632               rtx shift = GEN_INT (clear_zero_bit_copies);
4633
4634               emit_insn (gen_lshrsi3 (new_src, source, shift));
4635               emit_insn (gen_ashlsi3 (target, new_src, shift));
4636             }
4637
4638           return insns + 2;
4639         }
4640
4641       break;
4642
4643     default:
4644       break;
4645     }
4646
4647   /* Calculate what the instruction sequences would be if we generated it
4648      normally, negated, or inverted.  */
4649   if (code == AND)
4650     /* AND cannot be split into multiple insns, so invert and use BIC.  */
4651     insns = 99;
4652   else
4653     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4654
4655   if (can_negate)
4656     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4657                                             &neg_immediates);
4658   else
4659     neg_insns = 99;
4660
4661   if (can_invert || final_invert)
4662     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4663                                             &inv_immediates);
4664   else
4665     inv_insns = 99;
4666
4667   immediates = &pos_immediates;
4668
4669   /* Is the negated immediate sequence more efficient?  */
4670   if (neg_insns < insns && neg_insns <= inv_insns)
4671     {
4672       insns = neg_insns;
4673       immediates = &neg_immediates;
4674     }
4675   else
4676     can_negate = 0;
4677
4678   /* Is the inverted immediate sequence more efficient?
4679      We must allow for an extra NOT instruction for XOR operations, although
4680      there is some chance that the final 'mvn' will get optimized later.  */
4681   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4682     {
4683       insns = inv_insns;
4684       immediates = &inv_immediates;
4685     }
4686   else
4687     {
4688       can_invert = 0;
4689       final_invert = 0;
4690     }
4691
4692   /* Now output the chosen sequence as instructions.  */
4693   if (generate)
4694     {
4695       for (i = 0; i < insns; i++)
4696         {
4697           rtx new_src, temp1_rtx;
4698
4699           temp1 = immediates->i[i];
4700
4701           if (code == SET || code == MINUS)
4702             new_src = (subtargets ? gen_reg_rtx (mode) : target);
4703           else if ((final_invert || i < (insns - 1)) && subtargets)
4704             new_src = gen_reg_rtx (mode);
4705           else
4706             new_src = target;
4707
4708           if (can_invert)
4709             temp1 = ~temp1;
4710           else if (can_negate)
4711             temp1 = -temp1;
4712
4713           temp1 = trunc_int_for_mode (temp1, mode);
4714           temp1_rtx = GEN_INT (temp1);
4715
4716           if (code == SET)
4717             ;
4718           else if (code == MINUS)
4719             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4720           else
4721             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4722
4723           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4724           source = new_src;
4725
4726           if (code == SET)
4727             {
4728               can_negate = can_invert;
4729               can_invert = 0;
4730               code = PLUS;
4731             }
4732           else if (code == MINUS)
4733             code = PLUS;
4734         }
4735     }
4736
4737   if (final_invert)
4738     {
4739       if (generate)
4740         emit_constant_insn (cond, gen_rtx_SET (target,
4741                                                gen_rtx_NOT (mode, source)));
4742       insns++;
4743     }
4744
4745   return insns;
4746 }
4747
4748 /* Canonicalize a comparison so that we are more likely to recognize it.
4749    This can be done for a few constant compares, where we can make the
4750    immediate value easier to load.  */
4751
4752 static void
4753 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4754                              bool op0_preserve_value)
4755 {
4756   machine_mode mode;
4757   unsigned HOST_WIDE_INT i, maxval;
4758
4759   mode = GET_MODE (*op0);
4760   if (mode == VOIDmode)
4761     mode = GET_MODE (*op1);
4762
4763   maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4764
4765   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
4766      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
4767      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
4768      for GTU/LEU in Thumb mode.  */
4769   if (mode == DImode)
4770     {
4771
4772       if (*code == GT || *code == LE
4773           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4774         {
4775           /* Missing comparison.  First try to use an available
4776              comparison.  */
4777           if (CONST_INT_P (*op1))
4778             {
4779               i = INTVAL (*op1);
4780               switch (*code)
4781                 {
4782                 case GT:
4783                 case LE:
4784                   if (i != maxval
4785                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4786                     {
4787                       *op1 = GEN_INT (i + 1);
4788                       *code = *code == GT ? GE : LT;
4789                       return;
4790                     }
4791                   break;
4792                 case GTU:
4793                 case LEU:
4794                   if (i != ~((unsigned HOST_WIDE_INT) 0)
4795                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4796                     {
4797                       *op1 = GEN_INT (i + 1);
4798                       *code = *code == GTU ? GEU : LTU;
4799                       return;
4800                     }
4801                   break;
4802                 default:
4803                   gcc_unreachable ();
4804                 }
4805             }
4806
4807           /* If that did not work, reverse the condition.  */
4808           if (!op0_preserve_value)
4809             {
4810               std::swap (*op0, *op1);
4811               *code = (int)swap_condition ((enum rtx_code)*code);
4812             }
4813         }
4814       return;
4815     }
4816
4817   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4818      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4819      to facilitate possible combining with a cmp into 'ands'.  */
4820   if (mode == SImode
4821       && GET_CODE (*op0) == ZERO_EXTEND
4822       && GET_CODE (XEXP (*op0, 0)) == SUBREG
4823       && GET_MODE (XEXP (*op0, 0)) == QImode
4824       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4825       && subreg_lowpart_p (XEXP (*op0, 0))
4826       && *op1 == const0_rtx)
4827     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4828                         GEN_INT (255));
4829
4830   /* Comparisons smaller than DImode.  Only adjust comparisons against
4831      an out-of-range constant.  */
4832   if (!CONST_INT_P (*op1)
4833       || const_ok_for_arm (INTVAL (*op1))
4834       || const_ok_for_arm (- INTVAL (*op1)))
4835     return;
4836
4837   i = INTVAL (*op1);
4838
4839   switch (*code)
4840     {
4841     case EQ:
4842     case NE:
4843       return;
4844
4845     case GT:
4846     case LE:
4847       if (i != maxval
4848           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4849         {
4850           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4851           *code = *code == GT ? GE : LT;
4852           return;
4853         }
4854       break;
4855
4856     case GE:
4857     case LT:
4858       if (i != ~maxval
4859           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4860         {
4861           *op1 = GEN_INT (i - 1);
4862           *code = *code == GE ? GT : LE;
4863           return;
4864         }
4865       break;
4866
4867     case GTU:
4868     case LEU:
4869       if (i != ~((unsigned HOST_WIDE_INT) 0)
4870           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4871         {
4872           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4873           *code = *code == GTU ? GEU : LTU;
4874           return;
4875         }
4876       break;
4877
4878     case GEU:
4879     case LTU:
4880       if (i != 0
4881           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4882         {
4883           *op1 = GEN_INT (i - 1);
4884           *code = *code == GEU ? GTU : LEU;
4885           return;
4886         }
4887       break;
4888
4889     default:
4890       gcc_unreachable ();
4891     }
4892 }
4893
4894
4895 /* Define how to find the value returned by a function.  */
4896
4897 static rtx
4898 arm_function_value(const_tree type, const_tree func,
4899                    bool outgoing ATTRIBUTE_UNUSED)
4900 {
4901   machine_mode mode;
4902   int unsignedp ATTRIBUTE_UNUSED;
4903   rtx r ATTRIBUTE_UNUSED;
4904
4905   mode = TYPE_MODE (type);
4906
4907   if (TARGET_AAPCS_BASED)
4908     return aapcs_allocate_return_reg (mode, type, func);
4909
4910   /* Promote integer types.  */
4911   if (INTEGRAL_TYPE_P (type))
4912     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4913
4914   /* Promotes small structs returned in a register to full-word size
4915      for big-endian AAPCS.  */
4916   if (arm_return_in_msb (type))
4917     {
4918       HOST_WIDE_INT size = int_size_in_bytes (type);
4919       if (size % UNITS_PER_WORD != 0)
4920         {
4921           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4922           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4923         }
4924     }
4925
4926   return arm_libcall_value_1 (mode);
4927 }
4928
4929 /* libcall hashtable helpers.  */
4930
4931 struct libcall_hasher : typed_noop_remove <rtx_def>
4932 {
4933   typedef const rtx_def *value_type;
4934   typedef const rtx_def *compare_type;
4935   static inline hashval_t hash (const rtx_def *);
4936   static inline bool equal (const rtx_def *, const rtx_def *);
4937   static inline void remove (rtx_def *);
4938 };
4939
4940 inline bool
4941 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
4942 {
4943   return rtx_equal_p (p1, p2);
4944 }
4945
4946 inline hashval_t
4947 libcall_hasher::hash (const rtx_def *p1)
4948 {
4949   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4950 }
4951
4952 typedef hash_table<libcall_hasher> libcall_table_type;
4953
4954 static void
4955 add_libcall (libcall_table_type *htab, rtx libcall)
4956 {
4957   *htab->find_slot (libcall, INSERT) = libcall;
4958 }
4959
4960 static bool
4961 arm_libcall_uses_aapcs_base (const_rtx libcall)
4962 {
4963   static bool init_done = false;
4964   static libcall_table_type *libcall_htab = NULL;
4965
4966   if (!init_done)
4967     {
4968       init_done = true;
4969
4970       libcall_htab = new libcall_table_type (31);
4971       add_libcall (libcall_htab,
4972                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4973       add_libcall (libcall_htab,
4974                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4975       add_libcall (libcall_htab,
4976                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4977       add_libcall (libcall_htab,
4978                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4979
4980       add_libcall (libcall_htab,
4981                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4982       add_libcall (libcall_htab,
4983                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4984       add_libcall (libcall_htab,
4985                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4986       add_libcall (libcall_htab,
4987                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4988
4989       add_libcall (libcall_htab,
4990                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
4991       add_libcall (libcall_htab,
4992                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4993       add_libcall (libcall_htab,
4994                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
4995       add_libcall (libcall_htab,
4996                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
4997       add_libcall (libcall_htab,
4998                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
4999       add_libcall (libcall_htab,
5000                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5001       add_libcall (libcall_htab,
5002                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5003       add_libcall (libcall_htab,
5004                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5005
5006       /* Values from double-precision helper functions are returned in core
5007          registers if the selected core only supports single-precision
5008          arithmetic, even if we are using the hard-float ABI.  The same is
5009          true for single-precision helpers, but we will never be using the
5010          hard-float ABI on a CPU which doesn't support single-precision
5011          operations in hardware.  */
5012       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5013       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5014       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5015       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5016       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5017       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5018       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5019       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5020       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5021       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5022       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5023       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5024                                                         SFmode));
5025       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5026                                                         DFmode));
5027     }
5028
5029   return libcall && libcall_htab->find (libcall) != NULL;
5030 }
5031
5032 static rtx
5033 arm_libcall_value_1 (machine_mode mode)
5034 {
5035   if (TARGET_AAPCS_BASED)
5036     return aapcs_libcall_value (mode);
5037   else if (TARGET_IWMMXT_ABI
5038            && arm_vector_mode_supported_p (mode))
5039     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5040   else
5041     return gen_rtx_REG (mode, ARG_REGISTER (1));
5042 }
5043
5044 /* Define how to find the value returned by a library function
5045    assuming the value has mode MODE.  */
5046
5047 static rtx
5048 arm_libcall_value (machine_mode mode, const_rtx libcall)
5049 {
5050   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5051       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5052     {
5053       /* The following libcalls return their result in integer registers,
5054          even though they return a floating point value.  */
5055       if (arm_libcall_uses_aapcs_base (libcall))
5056         return gen_rtx_REG (mode, ARG_REGISTER(1));
5057
5058     }
5059
5060   return arm_libcall_value_1 (mode);
5061 }
5062
5063 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5064
5065 static bool
5066 arm_function_value_regno_p (const unsigned int regno)
5067 {
5068   if (regno == ARG_REGISTER (1)
5069       || (TARGET_32BIT
5070           && TARGET_AAPCS_BASED
5071           && TARGET_VFP
5072           && TARGET_HARD_FLOAT
5073           && regno == FIRST_VFP_REGNUM)
5074       || (TARGET_IWMMXT_ABI
5075           && regno == FIRST_IWMMXT_REGNUM))
5076     return true;
5077
5078   return false;
5079 }
5080
5081 /* Determine the amount of memory needed to store the possible return
5082    registers of an untyped call.  */
5083 int
5084 arm_apply_result_size (void)
5085 {
5086   int size = 16;
5087
5088   if (TARGET_32BIT)
5089     {
5090       if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5091         size += 32;
5092       if (TARGET_IWMMXT_ABI)
5093         size += 8;
5094     }
5095
5096   return size;
5097 }
5098
5099 /* Decide whether TYPE should be returned in memory (true)
5100    or in a register (false).  FNTYPE is the type of the function making
5101    the call.  */
5102 static bool
5103 arm_return_in_memory (const_tree type, const_tree fntype)
5104 {
5105   HOST_WIDE_INT size;
5106
5107   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5108
5109   if (TARGET_AAPCS_BASED)
5110     {
5111       /* Simple, non-aggregate types (ie not including vectors and
5112          complex) are always returned in a register (or registers).
5113          We don't care about which register here, so we can short-cut
5114          some of the detail.  */
5115       if (!AGGREGATE_TYPE_P (type)
5116           && TREE_CODE (type) != VECTOR_TYPE
5117           && TREE_CODE (type) != COMPLEX_TYPE)
5118         return false;
5119
5120       /* Any return value that is no larger than one word can be
5121          returned in r0.  */
5122       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5123         return false;
5124
5125       /* Check any available co-processors to see if they accept the
5126          type as a register candidate (VFP, for example, can return
5127          some aggregates in consecutive registers).  These aren't
5128          available if the call is variadic.  */
5129       if (aapcs_select_return_coproc (type, fntype) >= 0)
5130         return false;
5131
5132       /* Vector values should be returned using ARM registers, not
5133          memory (unless they're over 16 bytes, which will break since
5134          we only have four call-clobbered registers to play with).  */
5135       if (TREE_CODE (type) == VECTOR_TYPE)
5136         return (size < 0 || size > (4 * UNITS_PER_WORD));
5137
5138       /* The rest go in memory.  */
5139       return true;
5140     }
5141
5142   if (TREE_CODE (type) == VECTOR_TYPE)
5143     return (size < 0 || size > (4 * UNITS_PER_WORD));
5144
5145   if (!AGGREGATE_TYPE_P (type) &&
5146       (TREE_CODE (type) != VECTOR_TYPE))
5147     /* All simple types are returned in registers.  */
5148     return false;
5149
5150   if (arm_abi != ARM_ABI_APCS)
5151     {
5152       /* ATPCS and later return aggregate types in memory only if they are
5153          larger than a word (or are variable size).  */
5154       return (size < 0 || size > UNITS_PER_WORD);
5155     }
5156
5157   /* For the arm-wince targets we choose to be compatible with Microsoft's
5158      ARM and Thumb compilers, which always return aggregates in memory.  */
5159 #ifndef ARM_WINCE
5160   /* All structures/unions bigger than one word are returned in memory.
5161      Also catch the case where int_size_in_bytes returns -1.  In this case
5162      the aggregate is either huge or of variable size, and in either case
5163      we will want to return it via memory and not in a register.  */
5164   if (size < 0 || size > UNITS_PER_WORD)
5165     return true;
5166
5167   if (TREE_CODE (type) == RECORD_TYPE)
5168     {
5169       tree field;
5170
5171       /* For a struct the APCS says that we only return in a register
5172          if the type is 'integer like' and every addressable element
5173          has an offset of zero.  For practical purposes this means
5174          that the structure can have at most one non bit-field element
5175          and that this element must be the first one in the structure.  */
5176
5177       /* Find the first field, ignoring non FIELD_DECL things which will
5178          have been created by C++.  */
5179       for (field = TYPE_FIELDS (type);
5180            field && TREE_CODE (field) != FIELD_DECL;
5181            field = DECL_CHAIN (field))
5182         continue;
5183
5184       if (field == NULL)
5185         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5186
5187       /* Check that the first field is valid for returning in a register.  */
5188
5189       /* ... Floats are not allowed */
5190       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5191         return true;
5192
5193       /* ... Aggregates that are not themselves valid for returning in
5194          a register are not allowed.  */
5195       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5196         return true;
5197
5198       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5199          since they are not addressable.  */
5200       for (field = DECL_CHAIN (field);
5201            field;
5202            field = DECL_CHAIN (field))
5203         {
5204           if (TREE_CODE (field) != FIELD_DECL)
5205             continue;
5206
5207           if (!DECL_BIT_FIELD_TYPE (field))
5208             return true;
5209         }
5210
5211       return false;
5212     }
5213
5214   if (TREE_CODE (type) == UNION_TYPE)
5215     {
5216       tree field;
5217
5218       /* Unions can be returned in registers if every element is
5219          integral, or can be returned in an integer register.  */
5220       for (field = TYPE_FIELDS (type);
5221            field;
5222            field = DECL_CHAIN (field))
5223         {
5224           if (TREE_CODE (field) != FIELD_DECL)
5225             continue;
5226
5227           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5228             return true;
5229
5230           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5231             return true;
5232         }
5233
5234       return false;
5235     }
5236 #endif /* not ARM_WINCE */
5237
5238   /* Return all other types in memory.  */
5239   return true;
5240 }
5241
5242 const struct pcs_attribute_arg
5243 {
5244   const char *arg;
5245   enum arm_pcs value;
5246 } pcs_attribute_args[] =
5247   {
5248     {"aapcs", ARM_PCS_AAPCS},
5249     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5250 #if 0
5251     /* We could recognize these, but changes would be needed elsewhere
5252      * to implement them.  */
5253     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5254     {"atpcs", ARM_PCS_ATPCS},
5255     {"apcs", ARM_PCS_APCS},
5256 #endif
5257     {NULL, ARM_PCS_UNKNOWN}
5258   };
5259
5260 static enum arm_pcs
5261 arm_pcs_from_attribute (tree attr)
5262 {
5263   const struct pcs_attribute_arg *ptr;
5264   const char *arg;
5265
5266   /* Get the value of the argument.  */
5267   if (TREE_VALUE (attr) == NULL_TREE
5268       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5269     return ARM_PCS_UNKNOWN;
5270
5271   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5272
5273   /* Check it against the list of known arguments.  */
5274   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5275     if (streq (arg, ptr->arg))
5276       return ptr->value;
5277
5278   /* An unrecognized interrupt type.  */
5279   return ARM_PCS_UNKNOWN;
5280 }
5281
5282 /* Get the PCS variant to use for this call.  TYPE is the function's type
5283    specification, DECL is the specific declartion.  DECL may be null if
5284    the call could be indirect or if this is a library call.  */
5285 static enum arm_pcs
5286 arm_get_pcs_model (const_tree type, const_tree decl)
5287 {
5288   bool user_convention = false;
5289   enum arm_pcs user_pcs = arm_pcs_default;
5290   tree attr;
5291
5292   gcc_assert (type);
5293
5294   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5295   if (attr)
5296     {
5297       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5298       user_convention = true;
5299     }
5300
5301   if (TARGET_AAPCS_BASED)
5302     {
5303       /* Detect varargs functions.  These always use the base rules
5304          (no argument is ever a candidate for a co-processor
5305          register).  */
5306       bool base_rules = stdarg_p (type);
5307
5308       if (user_convention)
5309         {
5310           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5311             sorry ("non-AAPCS derived PCS variant");
5312           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5313             error ("variadic functions must use the base AAPCS variant");
5314         }
5315
5316       if (base_rules)
5317         return ARM_PCS_AAPCS;
5318       else if (user_convention)
5319         return user_pcs;
5320       else if (decl && flag_unit_at_a_time)
5321         {
5322           /* Local functions never leak outside this compilation unit,
5323              so we are free to use whatever conventions are
5324              appropriate.  */
5325           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5326           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5327           if (i && i->local)
5328             return ARM_PCS_AAPCS_LOCAL;
5329         }
5330     }
5331   else if (user_convention && user_pcs != arm_pcs_default)
5332     sorry ("PCS variant");
5333
5334   /* For everything else we use the target's default.  */
5335   return arm_pcs_default;
5336 }
5337
5338
5339 static void
5340 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5341                     const_tree fntype ATTRIBUTE_UNUSED,
5342                     rtx libcall ATTRIBUTE_UNUSED,
5343                     const_tree fndecl ATTRIBUTE_UNUSED)
5344 {
5345   /* Record the unallocated VFP registers.  */
5346   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5347   pcum->aapcs_vfp_reg_alloc = 0;
5348 }
5349
5350 /* Walk down the type tree of TYPE counting consecutive base elements.
5351    If *MODEP is VOIDmode, then set it to the first valid floating point
5352    type.  If a non-floating point type is found, or if a floating point
5353    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5354    otherwise return the count in the sub-tree.  */
5355 static int
5356 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5357 {
5358   machine_mode mode;
5359   HOST_WIDE_INT size;
5360
5361   switch (TREE_CODE (type))
5362     {
5363     case REAL_TYPE:
5364       mode = TYPE_MODE (type);
5365       if (mode != DFmode && mode != SFmode)
5366         return -1;
5367
5368       if (*modep == VOIDmode)
5369         *modep = mode;
5370
5371       if (*modep == mode)
5372         return 1;
5373
5374       break;
5375
5376     case COMPLEX_TYPE:
5377       mode = TYPE_MODE (TREE_TYPE (type));
5378       if (mode != DFmode && mode != SFmode)
5379         return -1;
5380
5381       if (*modep == VOIDmode)
5382         *modep = mode;
5383
5384       if (*modep == mode)
5385         return 2;
5386
5387       break;
5388
5389     case VECTOR_TYPE:
5390       /* Use V2SImode and V4SImode as representatives of all 64-bit
5391          and 128-bit vector types, whether or not those modes are
5392          supported with the present options.  */
5393       size = int_size_in_bytes (type);
5394       switch (size)
5395         {
5396         case 8:
5397           mode = V2SImode;
5398           break;
5399         case 16:
5400           mode = V4SImode;
5401           break;
5402         default:
5403           return -1;
5404         }
5405
5406       if (*modep == VOIDmode)
5407         *modep = mode;
5408
5409       /* Vector modes are considered to be opaque: two vectors are
5410          equivalent for the purposes of being homogeneous aggregates
5411          if they are the same size.  */
5412       if (*modep == mode)
5413         return 1;
5414
5415       break;
5416
5417     case ARRAY_TYPE:
5418       {
5419         int count;
5420         tree index = TYPE_DOMAIN (type);
5421
5422         /* Can't handle incomplete types nor sizes that are not
5423            fixed.  */
5424         if (!COMPLETE_TYPE_P (type)
5425             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5426           return -1;
5427
5428         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5429         if (count == -1
5430             || !index
5431             || !TYPE_MAX_VALUE (index)
5432             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5433             || !TYPE_MIN_VALUE (index)
5434             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5435             || count < 0)
5436           return -1;
5437
5438         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5439                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5440
5441         /* There must be no padding.  */
5442         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5443           return -1;
5444
5445         return count;
5446       }
5447
5448     case RECORD_TYPE:
5449       {
5450         int count = 0;
5451         int sub_count;
5452         tree field;
5453
5454         /* Can't handle incomplete types nor sizes that are not
5455            fixed.  */
5456         if (!COMPLETE_TYPE_P (type)
5457             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5458           return -1;
5459
5460         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5461           {
5462             if (TREE_CODE (field) != FIELD_DECL)
5463               continue;
5464
5465             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5466             if (sub_count < 0)
5467               return -1;
5468             count += sub_count;
5469           }
5470
5471         /* There must be no padding.  */
5472         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5473           return -1;
5474
5475         return count;
5476       }
5477
5478     case UNION_TYPE:
5479     case QUAL_UNION_TYPE:
5480       {
5481         /* These aren't very interesting except in a degenerate case.  */
5482         int count = 0;
5483         int sub_count;
5484         tree field;
5485
5486         /* Can't handle incomplete types nor sizes that are not
5487            fixed.  */
5488         if (!COMPLETE_TYPE_P (type)
5489             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5490           return -1;
5491
5492         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5493           {
5494             if (TREE_CODE (field) != FIELD_DECL)
5495               continue;
5496
5497             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5498             if (sub_count < 0)
5499               return -1;
5500             count = count > sub_count ? count : sub_count;
5501           }
5502
5503         /* There must be no padding.  */
5504         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5505           return -1;
5506
5507         return count;
5508       }
5509
5510     default:
5511       break;
5512     }
5513
5514   return -1;
5515 }
5516
5517 /* Return true if PCS_VARIANT should use VFP registers.  */
5518 static bool
5519 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5520 {
5521   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5522     {
5523       static bool seen_thumb1_vfp = false;
5524
5525       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5526         {
5527           sorry ("Thumb-1 hard-float VFP ABI");
5528           /* sorry() is not immediately fatal, so only display this once.  */
5529           seen_thumb1_vfp = true;
5530         }
5531
5532       return true;
5533     }
5534
5535   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5536     return false;
5537
5538   return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5539           (TARGET_VFP_DOUBLE || !is_double));
5540 }
5541
5542 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5543    suitable for passing or returning in VFP registers for the PCS
5544    variant selected.  If it is, then *BASE_MODE is updated to contain
5545    a machine mode describing each element of the argument's type and
5546    *COUNT to hold the number of such elements.  */
5547 static bool
5548 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5549                                        machine_mode mode, const_tree type,
5550                                        machine_mode *base_mode, int *count)
5551 {
5552   machine_mode new_mode = VOIDmode;
5553
5554   /* If we have the type information, prefer that to working things
5555      out from the mode.  */
5556   if (type)
5557     {
5558       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5559
5560       if (ag_count > 0 && ag_count <= 4)
5561         *count = ag_count;
5562       else
5563         return false;
5564     }
5565   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5566            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5567            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5568     {
5569       *count = 1;
5570       new_mode = mode;
5571     }
5572   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5573     {
5574       *count = 2;
5575       new_mode = (mode == DCmode ? DFmode : SFmode);
5576     }
5577   else
5578     return false;
5579
5580
5581   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5582     return false;
5583
5584   *base_mode = new_mode;
5585   return true;
5586 }
5587
5588 static bool
5589 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5590                                machine_mode mode, const_tree type)
5591 {
5592   int count ATTRIBUTE_UNUSED;
5593   machine_mode ag_mode ATTRIBUTE_UNUSED;
5594
5595   if (!use_vfp_abi (pcs_variant, false))
5596     return false;
5597   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5598                                                 &ag_mode, &count);
5599 }
5600
5601 static bool
5602 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5603                              const_tree type)
5604 {
5605   if (!use_vfp_abi (pcum->pcs_variant, false))
5606     return false;
5607
5608   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5609                                                 &pcum->aapcs_vfp_rmode,
5610                                                 &pcum->aapcs_vfp_rcount);
5611 }
5612
5613 static bool
5614 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5615                     const_tree type  ATTRIBUTE_UNUSED)
5616 {
5617   int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5618   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5619   int regno;
5620
5621   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5622     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5623       {
5624         pcum->aapcs_vfp_reg_alloc = mask << regno;
5625         if (mode == BLKmode
5626             || (mode == TImode && ! TARGET_NEON)
5627             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5628           {
5629             int i;
5630             int rcount = pcum->aapcs_vfp_rcount;
5631             int rshift = shift;
5632             machine_mode rmode = pcum->aapcs_vfp_rmode;
5633             rtx par;
5634             if (!TARGET_NEON)
5635               {
5636                 /* Avoid using unsupported vector modes.  */
5637                 if (rmode == V2SImode)
5638                   rmode = DImode;
5639                 else if (rmode == V4SImode)
5640                   {
5641                     rmode = DImode;
5642                     rcount *= 2;
5643                     rshift /= 2;
5644                   }
5645               }
5646             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5647             for (i = 0; i < rcount; i++)
5648               {
5649                 rtx tmp = gen_rtx_REG (rmode,
5650                                        FIRST_VFP_REGNUM + regno + i * rshift);
5651                 tmp = gen_rtx_EXPR_LIST
5652                   (VOIDmode, tmp,
5653                    GEN_INT (i * GET_MODE_SIZE (rmode)));
5654                 XVECEXP (par, 0, i) = tmp;
5655               }
5656
5657             pcum->aapcs_reg = par;
5658           }
5659         else
5660           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5661         return true;
5662       }
5663   return false;
5664 }
5665
5666 static rtx
5667 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5668                                machine_mode mode,
5669                                const_tree type ATTRIBUTE_UNUSED)
5670 {
5671   if (!use_vfp_abi (pcs_variant, false))
5672     return NULL;
5673
5674   if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5675     {
5676       int count;
5677       machine_mode ag_mode;
5678       int i;
5679       rtx par;
5680       int shift;
5681
5682       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5683                                              &ag_mode, &count);
5684
5685       if (!TARGET_NEON)
5686         {
5687           if (ag_mode == V2SImode)
5688             ag_mode = DImode;
5689           else if (ag_mode == V4SImode)
5690             {
5691               ag_mode = DImode;
5692               count *= 2;
5693             }
5694         }
5695       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5696       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5697       for (i = 0; i < count; i++)
5698         {
5699           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5700           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5701                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5702           XVECEXP (par, 0, i) = tmp;
5703         }
5704
5705       return par;
5706     }
5707
5708   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5709 }
5710
5711 static void
5712 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5713                    machine_mode mode  ATTRIBUTE_UNUSED,
5714                    const_tree type  ATTRIBUTE_UNUSED)
5715 {
5716   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5717   pcum->aapcs_vfp_reg_alloc = 0;
5718   return;
5719 }
5720
5721 #define AAPCS_CP(X)                             \
5722   {                                             \
5723     aapcs_ ## X ## _cum_init,                   \
5724     aapcs_ ## X ## _is_call_candidate,          \
5725     aapcs_ ## X ## _allocate,                   \
5726     aapcs_ ## X ## _is_return_candidate,        \
5727     aapcs_ ## X ## _allocate_return_reg,        \
5728     aapcs_ ## X ## _advance                     \
5729   }
5730
5731 /* Table of co-processors that can be used to pass arguments in
5732    registers.  Idealy no arugment should be a candidate for more than
5733    one co-processor table entry, but the table is processed in order
5734    and stops after the first match.  If that entry then fails to put
5735    the argument into a co-processor register, the argument will go on
5736    the stack.  */
5737 static struct
5738 {
5739   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
5740   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5741
5742   /* Return true if an argument of mode MODE (or type TYPE if MODE is
5743      BLKmode) is a candidate for this co-processor's registers; this
5744      function should ignore any position-dependent state in
5745      CUMULATIVE_ARGS and only use call-type dependent information.  */
5746   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5747
5748   /* Return true if the argument does get a co-processor register; it
5749      should set aapcs_reg to an RTX of the register allocated as is
5750      required for a return from FUNCTION_ARG.  */
5751   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5752
5753   /* Return true if a result of mode MODE (or type TYPE if MODE is
5754      BLKmode) is can be returned in this co-processor's registers.  */
5755   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5756
5757   /* Allocate and return an RTX element to hold the return type of a
5758      call, this routine must not fail and will only be called if
5759      is_return_candidate returned true with the same parameters.  */
5760   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5761
5762   /* Finish processing this argument and prepare to start processing
5763      the next one.  */
5764   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5765 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5766   {
5767     AAPCS_CP(vfp)
5768   };
5769
5770 #undef AAPCS_CP
5771
5772 static int
5773 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5774                           const_tree type)
5775 {
5776   int i;
5777
5778   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5779     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5780       return i;
5781
5782   return -1;
5783 }
5784
5785 static int
5786 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5787 {
5788   /* We aren't passed a decl, so we can't check that a call is local.
5789      However, it isn't clear that that would be a win anyway, since it
5790      might limit some tail-calling opportunities.  */
5791   enum arm_pcs pcs_variant;
5792
5793   if (fntype)
5794     {
5795       const_tree fndecl = NULL_TREE;
5796
5797       if (TREE_CODE (fntype) == FUNCTION_DECL)
5798         {
5799           fndecl = fntype;
5800           fntype = TREE_TYPE (fntype);
5801         }
5802
5803       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5804     }
5805   else
5806     pcs_variant = arm_pcs_default;
5807
5808   if (pcs_variant != ARM_PCS_AAPCS)
5809     {
5810       int i;
5811
5812       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5813         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5814                                                         TYPE_MODE (type),
5815                                                         type))
5816           return i;
5817     }
5818   return -1;
5819 }
5820
5821 static rtx
5822 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5823                            const_tree fntype)
5824 {
5825   /* We aren't passed a decl, so we can't check that a call is local.
5826      However, it isn't clear that that would be a win anyway, since it
5827      might limit some tail-calling opportunities.  */
5828   enum arm_pcs pcs_variant;
5829   int unsignedp ATTRIBUTE_UNUSED;
5830
5831   if (fntype)
5832     {
5833       const_tree fndecl = NULL_TREE;
5834
5835       if (TREE_CODE (fntype) == FUNCTION_DECL)
5836         {
5837           fndecl = fntype;
5838           fntype = TREE_TYPE (fntype);
5839         }
5840
5841       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5842     }
5843   else
5844     pcs_variant = arm_pcs_default;
5845
5846   /* Promote integer types.  */
5847   if (type && INTEGRAL_TYPE_P (type))
5848     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5849
5850   if (pcs_variant != ARM_PCS_AAPCS)
5851     {
5852       int i;
5853
5854       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5855         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5856                                                         type))
5857           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5858                                                              mode, type);
5859     }
5860
5861   /* Promotes small structs returned in a register to full-word size
5862      for big-endian AAPCS.  */
5863   if (type && arm_return_in_msb (type))
5864     {
5865       HOST_WIDE_INT size = int_size_in_bytes (type);
5866       if (size % UNITS_PER_WORD != 0)
5867         {
5868           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5869           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5870         }
5871     }
5872
5873   return gen_rtx_REG (mode, R0_REGNUM);
5874 }
5875
5876 static rtx
5877 aapcs_libcall_value (machine_mode mode)
5878 {
5879   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5880       && GET_MODE_SIZE (mode) <= 4)
5881     mode = SImode;
5882
5883   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5884 }
5885
5886 /* Lay out a function argument using the AAPCS rules.  The rule
5887    numbers referred to here are those in the AAPCS.  */
5888 static void
5889 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5890                   const_tree type, bool named)
5891 {
5892   int nregs, nregs2;
5893   int ncrn;
5894
5895   /* We only need to do this once per argument.  */
5896   if (pcum->aapcs_arg_processed)
5897     return;
5898
5899   pcum->aapcs_arg_processed = true;
5900
5901   /* Special case: if named is false then we are handling an incoming
5902      anonymous argument which is on the stack.  */
5903   if (!named)
5904     return;
5905
5906   /* Is this a potential co-processor register candidate?  */
5907   if (pcum->pcs_variant != ARM_PCS_AAPCS)
5908     {
5909       int slot = aapcs_select_call_coproc (pcum, mode, type);
5910       pcum->aapcs_cprc_slot = slot;
5911
5912       /* We don't have to apply any of the rules from part B of the
5913          preparation phase, these are handled elsewhere in the
5914          compiler.  */
5915
5916       if (slot >= 0)
5917         {
5918           /* A Co-processor register candidate goes either in its own
5919              class of registers or on the stack.  */
5920           if (!pcum->aapcs_cprc_failed[slot])
5921             {
5922               /* C1.cp - Try to allocate the argument to co-processor
5923                  registers.  */
5924               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5925                 return;
5926
5927               /* C2.cp - Put the argument on the stack and note that we
5928                  can't assign any more candidates in this slot.  We also
5929                  need to note that we have allocated stack space, so that
5930                  we won't later try to split a non-cprc candidate between
5931                  core registers and the stack.  */
5932               pcum->aapcs_cprc_failed[slot] = true;
5933               pcum->can_split = false;
5934             }
5935
5936           /* We didn't get a register, so this argument goes on the
5937              stack.  */
5938           gcc_assert (pcum->can_split == false);
5939           return;
5940         }
5941     }
5942
5943   /* C3 - For double-word aligned arguments, round the NCRN up to the
5944      next even number.  */
5945   ncrn = pcum->aapcs_ncrn;
5946   if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5947     ncrn++;
5948
5949   nregs = ARM_NUM_REGS2(mode, type);
5950
5951   /* Sigh, this test should really assert that nregs > 0, but a GCC
5952      extension allows empty structs and then gives them empty size; it
5953      then allows such a structure to be passed by value.  For some of
5954      the code below we have to pretend that such an argument has
5955      non-zero size so that we 'locate' it correctly either in
5956      registers or on the stack.  */
5957   gcc_assert (nregs >= 0);
5958
5959   nregs2 = nregs ? nregs : 1;
5960
5961   /* C4 - Argument fits entirely in core registers.  */
5962   if (ncrn + nregs2 <= NUM_ARG_REGS)
5963     {
5964       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5965       pcum->aapcs_next_ncrn = ncrn + nregs;
5966       return;
5967     }
5968
5969   /* C5 - Some core registers left and there are no arguments already
5970      on the stack: split this argument between the remaining core
5971      registers and the stack.  */
5972   if (ncrn < NUM_ARG_REGS && pcum->can_split)
5973     {
5974       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5975       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5976       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5977       return;
5978     }
5979
5980   /* C6 - NCRN is set to 4.  */
5981   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5982
5983   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
5984   return;
5985 }
5986
5987 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5988    for a call to a function whose data type is FNTYPE.
5989    For a library call, FNTYPE is NULL.  */
5990 void
5991 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5992                           rtx libname,
5993                           tree fndecl ATTRIBUTE_UNUSED)
5994 {
5995   /* Long call handling.  */
5996   if (fntype)
5997     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5998   else
5999     pcum->pcs_variant = arm_pcs_default;
6000
6001   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6002     {
6003       if (arm_libcall_uses_aapcs_base (libname))
6004         pcum->pcs_variant = ARM_PCS_AAPCS;
6005
6006       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6007       pcum->aapcs_reg = NULL_RTX;
6008       pcum->aapcs_partial = 0;
6009       pcum->aapcs_arg_processed = false;
6010       pcum->aapcs_cprc_slot = -1;
6011       pcum->can_split = true;
6012
6013       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6014         {
6015           int i;
6016
6017           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6018             {
6019               pcum->aapcs_cprc_failed[i] = false;
6020               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6021             }
6022         }
6023       return;
6024     }
6025
6026   /* Legacy ABIs */
6027
6028   /* On the ARM, the offset starts at 0.  */
6029   pcum->nregs = 0;
6030   pcum->iwmmxt_nregs = 0;
6031   pcum->can_split = true;
6032
6033   /* Varargs vectors are treated the same as long long.
6034      named_count avoids having to change the way arm handles 'named' */
6035   pcum->named_count = 0;
6036   pcum->nargs = 0;
6037
6038   if (TARGET_REALLY_IWMMXT && fntype)
6039     {
6040       tree fn_arg;
6041
6042       for (fn_arg = TYPE_ARG_TYPES (fntype);
6043            fn_arg;
6044            fn_arg = TREE_CHAIN (fn_arg))
6045         pcum->named_count += 1;
6046
6047       if (! pcum->named_count)
6048         pcum->named_count = INT_MAX;
6049     }
6050 }
6051
6052 /* Return true if mode/type need doubleword alignment.  */
6053 static bool
6054 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6055 {
6056   return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
6057           || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
6058 }
6059
6060
6061 /* Determine where to put an argument to a function.
6062    Value is zero to push the argument on the stack,
6063    or a hard register in which to store the argument.
6064
6065    MODE is the argument's machine mode.
6066    TYPE is the data type of the argument (as a tree).
6067     This is null for libcalls where that information may
6068     not be available.
6069    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6070     the preceding args and about the function being called.
6071    NAMED is nonzero if this argument is a named parameter
6072     (otherwise it is an extra parameter matching an ellipsis).
6073
6074    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6075    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6076    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6077    defined), say it is passed in the stack (function_prologue will
6078    indeed make it pass in the stack if necessary).  */
6079
6080 static rtx
6081 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6082                   const_tree type, bool named)
6083 {
6084   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6085   int nregs;
6086
6087   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6088      a call insn (op3 of a call_value insn).  */
6089   if (mode == VOIDmode)
6090     return const0_rtx;
6091
6092   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6093     {
6094       aapcs_layout_arg (pcum, mode, type, named);
6095       return pcum->aapcs_reg;
6096     }
6097
6098   /* Varargs vectors are treated the same as long long.
6099      named_count avoids having to change the way arm handles 'named' */
6100   if (TARGET_IWMMXT_ABI
6101       && arm_vector_mode_supported_p (mode)
6102       && pcum->named_count > pcum->nargs + 1)
6103     {
6104       if (pcum->iwmmxt_nregs <= 9)
6105         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6106       else
6107         {
6108           pcum->can_split = false;
6109           return NULL_RTX;
6110         }
6111     }
6112
6113   /* Put doubleword aligned quantities in even register pairs.  */
6114   if (pcum->nregs & 1
6115       && ARM_DOUBLEWORD_ALIGN
6116       && arm_needs_doubleword_align (mode, type))
6117     pcum->nregs++;
6118
6119   /* Only allow splitting an arg between regs and memory if all preceding
6120      args were allocated to regs.  For args passed by reference we only count
6121      the reference pointer.  */
6122   if (pcum->can_split)
6123     nregs = 1;
6124   else
6125     nregs = ARM_NUM_REGS2 (mode, type);
6126
6127   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6128     return NULL_RTX;
6129
6130   return gen_rtx_REG (mode, pcum->nregs);
6131 }
6132
6133 static unsigned int
6134 arm_function_arg_boundary (machine_mode mode, const_tree type)
6135 {
6136   return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6137           ? DOUBLEWORD_ALIGNMENT
6138           : PARM_BOUNDARY);
6139 }
6140
6141 static int
6142 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6143                        tree type, bool named)
6144 {
6145   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6146   int nregs = pcum->nregs;
6147
6148   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6149     {
6150       aapcs_layout_arg (pcum, mode, type, named);
6151       return pcum->aapcs_partial;
6152     }
6153
6154   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6155     return 0;
6156
6157   if (NUM_ARG_REGS > nregs
6158       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6159       && pcum->can_split)
6160     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6161
6162   return 0;
6163 }
6164
6165 /* Update the data in PCUM to advance over an argument
6166    of mode MODE and data type TYPE.
6167    (TYPE is null for libcalls where that information may not be available.)  */
6168
6169 static void
6170 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6171                           const_tree type, bool named)
6172 {
6173   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6174
6175   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6176     {
6177       aapcs_layout_arg (pcum, mode, type, named);
6178
6179       if (pcum->aapcs_cprc_slot >= 0)
6180         {
6181           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6182                                                               type);
6183           pcum->aapcs_cprc_slot = -1;
6184         }
6185
6186       /* Generic stuff.  */
6187       pcum->aapcs_arg_processed = false;
6188       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6189       pcum->aapcs_reg = NULL_RTX;
6190       pcum->aapcs_partial = 0;
6191     }
6192   else
6193     {
6194       pcum->nargs += 1;
6195       if (arm_vector_mode_supported_p (mode)
6196           && pcum->named_count > pcum->nargs
6197           && TARGET_IWMMXT_ABI)
6198         pcum->iwmmxt_nregs += 1;
6199       else
6200         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6201     }
6202 }
6203
6204 /* Variable sized types are passed by reference.  This is a GCC
6205    extension to the ARM ABI.  */
6206
6207 static bool
6208 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6209                        machine_mode mode ATTRIBUTE_UNUSED,
6210                        const_tree type, bool named ATTRIBUTE_UNUSED)
6211 {
6212   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6213 }
6214 \f
6215 /* Encode the current state of the #pragma [no_]long_calls.  */
6216 typedef enum
6217 {
6218   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6219   LONG,         /* #pragma long_calls is in effect.  */
6220   SHORT         /* #pragma no_long_calls is in effect.  */
6221 } arm_pragma_enum;
6222
6223 static arm_pragma_enum arm_pragma_long_calls = OFF;
6224
6225 void
6226 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6227 {
6228   arm_pragma_long_calls = LONG;
6229 }
6230
6231 void
6232 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6233 {
6234   arm_pragma_long_calls = SHORT;
6235 }
6236
6237 void
6238 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6239 {
6240   arm_pragma_long_calls = OFF;
6241 }
6242 \f
6243 /* Handle an attribute requiring a FUNCTION_DECL;
6244    arguments as in struct attribute_spec.handler.  */
6245 static tree
6246 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6247                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6248 {
6249   if (TREE_CODE (*node) != FUNCTION_DECL)
6250     {
6251       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6252                name);
6253       *no_add_attrs = true;
6254     }
6255
6256   return NULL_TREE;
6257 }
6258
6259 /* Handle an "interrupt" or "isr" attribute;
6260    arguments as in struct attribute_spec.handler.  */
6261 static tree
6262 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6263                           bool *no_add_attrs)
6264 {
6265   if (DECL_P (*node))
6266     {
6267       if (TREE_CODE (*node) != FUNCTION_DECL)
6268         {
6269           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6270                    name);
6271           *no_add_attrs = true;
6272         }
6273       /* FIXME: the argument if any is checked for type attributes;
6274          should it be checked for decl ones?  */
6275     }
6276   else
6277     {
6278       if (TREE_CODE (*node) == FUNCTION_TYPE
6279           || TREE_CODE (*node) == METHOD_TYPE)
6280         {
6281           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6282             {
6283               warning (OPT_Wattributes, "%qE attribute ignored",
6284                        name);
6285               *no_add_attrs = true;
6286             }
6287         }
6288       else if (TREE_CODE (*node) == POINTER_TYPE
6289                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6290                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6291                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6292         {
6293           *node = build_variant_type_copy (*node);
6294           TREE_TYPE (*node) = build_type_attribute_variant
6295             (TREE_TYPE (*node),
6296              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6297           *no_add_attrs = true;
6298         }
6299       else
6300         {
6301           /* Possibly pass this attribute on from the type to a decl.  */
6302           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6303                        | (int) ATTR_FLAG_FUNCTION_NEXT
6304                        | (int) ATTR_FLAG_ARRAY_NEXT))
6305             {
6306               *no_add_attrs = true;
6307               return tree_cons (name, args, NULL_TREE);
6308             }
6309           else
6310             {
6311               warning (OPT_Wattributes, "%qE attribute ignored",
6312                        name);
6313             }
6314         }
6315     }
6316
6317   return NULL_TREE;
6318 }
6319
6320 /* Handle a "pcs" attribute; arguments as in struct
6321    attribute_spec.handler.  */
6322 static tree
6323 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6324                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6325 {
6326   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6327     {
6328       warning (OPT_Wattributes, "%qE attribute ignored", name);
6329       *no_add_attrs = true;
6330     }
6331   return NULL_TREE;
6332 }
6333
6334 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6335 /* Handle the "notshared" attribute.  This attribute is another way of
6336    requesting hidden visibility.  ARM's compiler supports
6337    "__declspec(notshared)"; we support the same thing via an
6338    attribute.  */
6339
6340 static tree
6341 arm_handle_notshared_attribute (tree *node,
6342                                 tree name ATTRIBUTE_UNUSED,
6343                                 tree args ATTRIBUTE_UNUSED,
6344                                 int flags ATTRIBUTE_UNUSED,
6345                                 bool *no_add_attrs)
6346 {
6347   tree decl = TYPE_NAME (*node);
6348
6349   if (decl)
6350     {
6351       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6352       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6353       *no_add_attrs = false;
6354     }
6355   return NULL_TREE;
6356 }
6357 #endif
6358
6359 /* Return 0 if the attributes for two types are incompatible, 1 if they
6360    are compatible, and 2 if they are nearly compatible (which causes a
6361    warning to be generated).  */
6362 static int
6363 arm_comp_type_attributes (const_tree type1, const_tree type2)
6364 {
6365   int l1, l2, s1, s2;
6366
6367   /* Check for mismatch of non-default calling convention.  */
6368   if (TREE_CODE (type1) != FUNCTION_TYPE)
6369     return 1;
6370
6371   /* Check for mismatched call attributes.  */
6372   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6373   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6374   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6375   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6376
6377   /* Only bother to check if an attribute is defined.  */
6378   if (l1 | l2 | s1 | s2)
6379     {
6380       /* If one type has an attribute, the other must have the same attribute.  */
6381       if ((l1 != l2) || (s1 != s2))
6382         return 0;
6383
6384       /* Disallow mixed attributes.  */
6385       if ((l1 & s2) || (l2 & s1))
6386         return 0;
6387     }
6388
6389   /* Check for mismatched ISR attribute.  */
6390   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6391   if (! l1)
6392     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6393   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6394   if (! l2)
6395     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6396   if (l1 != l2)
6397     return 0;
6398
6399   return 1;
6400 }
6401
6402 /*  Assigns default attributes to newly defined type.  This is used to
6403     set short_call/long_call attributes for function types of
6404     functions defined inside corresponding #pragma scopes.  */
6405 static void
6406 arm_set_default_type_attributes (tree type)
6407 {
6408   /* Add __attribute__ ((long_call)) to all functions, when
6409      inside #pragma long_calls or __attribute__ ((short_call)),
6410      when inside #pragma no_long_calls.  */
6411   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6412     {
6413       tree type_attr_list, attr_name;
6414       type_attr_list = TYPE_ATTRIBUTES (type);
6415
6416       if (arm_pragma_long_calls == LONG)
6417         attr_name = get_identifier ("long_call");
6418       else if (arm_pragma_long_calls == SHORT)
6419         attr_name = get_identifier ("short_call");
6420       else
6421         return;
6422
6423       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6424       TYPE_ATTRIBUTES (type) = type_attr_list;
6425     }
6426 }
6427 \f
6428 /* Return true if DECL is known to be linked into section SECTION.  */
6429
6430 static bool
6431 arm_function_in_section_p (tree decl, section *section)
6432 {
6433   /* We can only be certain about the prevailing symbol definition.  */
6434   if (!decl_binds_to_current_def_p (decl))
6435     return false;
6436
6437   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
6438   if (!DECL_SECTION_NAME (decl))
6439     {
6440       /* Make sure that we will not create a unique section for DECL.  */
6441       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6442         return false;
6443     }
6444
6445   return function_section (decl) == section;
6446 }
6447
6448 /* Return nonzero if a 32-bit "long_call" should be generated for
6449    a call from the current function to DECL.  We generate a long_call
6450    if the function:
6451
6452         a.  has an __attribute__((long call))
6453      or b.  is within the scope of a #pragma long_calls
6454      or c.  the -mlong-calls command line switch has been specified
6455
6456    However we do not generate a long call if the function:
6457
6458         d.  has an __attribute__ ((short_call))
6459      or e.  is inside the scope of a #pragma no_long_calls
6460      or f.  is defined in the same section as the current function.  */
6461
6462 bool
6463 arm_is_long_call_p (tree decl)
6464 {
6465   tree attrs;
6466
6467   if (!decl)
6468     return TARGET_LONG_CALLS;
6469
6470   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6471   if (lookup_attribute ("short_call", attrs))
6472     return false;
6473
6474   /* For "f", be conservative, and only cater for cases in which the
6475      whole of the current function is placed in the same section.  */
6476   if (!flag_reorder_blocks_and_partition
6477       && TREE_CODE (decl) == FUNCTION_DECL
6478       && arm_function_in_section_p (decl, current_function_section ()))
6479     return false;
6480
6481   if (lookup_attribute ("long_call", attrs))
6482     return true;
6483
6484   return TARGET_LONG_CALLS;
6485 }
6486
6487 /* Return nonzero if it is ok to make a tail-call to DECL.  */
6488 static bool
6489 arm_function_ok_for_sibcall (tree decl, tree exp)
6490 {
6491   unsigned long func_type;
6492
6493   if (cfun->machine->sibcall_blocked)
6494     return false;
6495
6496   /* Never tailcall something if we are generating code for Thumb-1.  */
6497   if (TARGET_THUMB1)
6498     return false;
6499
6500   /* The PIC register is live on entry to VxWorks PLT entries, so we
6501      must make the call before restoring the PIC register.  */
6502   if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6503     return false;
6504
6505   /* If we are interworking and the function is not declared static
6506      then we can't tail-call it unless we know that it exists in this
6507      compilation unit (since it might be a Thumb routine).  */
6508   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6509       && !TREE_ASM_WRITTEN (decl))
6510     return false;
6511
6512   func_type = arm_current_func_type ();
6513   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
6514   if (IS_INTERRUPT (func_type))
6515     return false;
6516
6517   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6518     {
6519       /* Check that the return value locations are the same.  For
6520          example that we aren't returning a value from the sibling in
6521          a VFP register but then need to transfer it to a core
6522          register.  */
6523       rtx a, b;
6524
6525       a = arm_function_value (TREE_TYPE (exp), decl, false);
6526       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6527                               cfun->decl, false);
6528       if (!rtx_equal_p (a, b))
6529         return false;
6530     }
6531
6532   /* Never tailcall if function may be called with a misaligned SP.  */
6533   if (IS_STACKALIGN (func_type))
6534     return false;
6535
6536   /* The AAPCS says that, on bare-metal, calls to unresolved weak
6537      references should become a NOP.  Don't convert such calls into
6538      sibling calls.  */
6539   if (TARGET_AAPCS_BASED
6540       && arm_abi == ARM_ABI_AAPCS
6541       && decl
6542       && DECL_WEAK (decl))
6543     return false;
6544
6545   /* Everything else is ok.  */
6546   return true;
6547 }
6548
6549 \f
6550 /* Addressing mode support functions.  */
6551
6552 /* Return nonzero if X is a legitimate immediate operand when compiling
6553    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
6554 int
6555 legitimate_pic_operand_p (rtx x)
6556 {
6557   if (GET_CODE (x) == SYMBOL_REF
6558       || (GET_CODE (x) == CONST
6559           && GET_CODE (XEXP (x, 0)) == PLUS
6560           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6561     return 0;
6562
6563   return 1;
6564 }
6565
6566 /* Record that the current function needs a PIC register.  Initialize
6567    cfun->machine->pic_reg if we have not already done so.  */
6568
6569 static void
6570 require_pic_register (void)
6571 {
6572   /* A lot of the logic here is made obscure by the fact that this
6573      routine gets called as part of the rtx cost estimation process.
6574      We don't want those calls to affect any assumptions about the real
6575      function; and further, we can't call entry_of_function() until we
6576      start the real expansion process.  */
6577   if (!crtl->uses_pic_offset_table)
6578     {
6579       gcc_assert (can_create_pseudo_p ());
6580       if (arm_pic_register != INVALID_REGNUM
6581           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6582         {
6583           if (!cfun->machine->pic_reg)
6584             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6585
6586           /* Play games to avoid marking the function as needing pic
6587              if we are being called as part of the cost-estimation
6588              process.  */
6589           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6590             crtl->uses_pic_offset_table = 1;
6591         }
6592       else
6593         {
6594           rtx_insn *seq, *insn;
6595
6596           if (!cfun->machine->pic_reg)
6597             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6598
6599           /* Play games to avoid marking the function as needing pic
6600              if we are being called as part of the cost-estimation
6601              process.  */
6602           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6603             {
6604               crtl->uses_pic_offset_table = 1;
6605               start_sequence ();
6606
6607               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6608                   && arm_pic_register > LAST_LO_REGNUM)
6609                 emit_move_insn (cfun->machine->pic_reg,
6610                                 gen_rtx_REG (Pmode, arm_pic_register));
6611               else
6612                 arm_load_pic_register (0UL);
6613
6614               seq = get_insns ();
6615               end_sequence ();
6616
6617               for (insn = seq; insn; insn = NEXT_INSN (insn))
6618                 if (INSN_P (insn))
6619                   INSN_LOCATION (insn) = prologue_location;
6620
6621               /* We can be called during expansion of PHI nodes, where
6622                  we can't yet emit instructions directly in the final
6623                  insn stream.  Queue the insns on the entry edge, they will
6624                  be committed after everything else is expanded.  */
6625               insert_insn_on_edge (seq,
6626                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6627             }
6628         }
6629     }
6630 }
6631
6632 rtx
6633 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6634 {
6635   if (GET_CODE (orig) == SYMBOL_REF
6636       || GET_CODE (orig) == LABEL_REF)
6637     {
6638       rtx insn;
6639
6640       if (reg == 0)
6641         {
6642           gcc_assert (can_create_pseudo_p ());
6643           reg = gen_reg_rtx (Pmode);
6644         }
6645
6646       /* VxWorks does not impose a fixed gap between segments; the run-time
6647          gap can be different from the object-file gap.  We therefore can't
6648          use GOTOFF unless we are absolutely sure that the symbol is in the
6649          same segment as the GOT.  Unfortunately, the flexibility of linker
6650          scripts means that we can't be sure of that in general, so assume
6651          that GOTOFF is never valid on VxWorks.  */
6652       if ((GET_CODE (orig) == LABEL_REF
6653            || (GET_CODE (orig) == SYMBOL_REF &&
6654                SYMBOL_REF_LOCAL_P (orig)))
6655           && NEED_GOT_RELOC
6656           && arm_pic_data_is_text_relative)
6657         insn = arm_pic_static_addr (orig, reg);
6658       else
6659         {
6660           rtx pat;
6661           rtx mem;
6662
6663           /* If this function doesn't have a pic register, create one now.  */
6664           require_pic_register ();
6665
6666           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6667
6668           /* Make the MEM as close to a constant as possible.  */
6669           mem = SET_SRC (pat);
6670           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6671           MEM_READONLY_P (mem) = 1;
6672           MEM_NOTRAP_P (mem) = 1;
6673
6674           insn = emit_insn (pat);
6675         }
6676
6677       /* Put a REG_EQUAL note on this insn, so that it can be optimized
6678          by loop.  */
6679       set_unique_reg_note (insn, REG_EQUAL, orig);
6680
6681       return reg;
6682     }
6683   else if (GET_CODE (orig) == CONST)
6684     {
6685       rtx base, offset;
6686
6687       if (GET_CODE (XEXP (orig, 0)) == PLUS
6688           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6689         return orig;
6690
6691       /* Handle the case where we have: const (UNSPEC_TLS).  */
6692       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6693           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6694         return orig;
6695
6696       /* Handle the case where we have:
6697          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
6698          CONST_INT.  */
6699       if (GET_CODE (XEXP (orig, 0)) == PLUS
6700           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6701           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6702         {
6703           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6704           return orig;
6705         }
6706
6707       if (reg == 0)
6708         {
6709           gcc_assert (can_create_pseudo_p ());
6710           reg = gen_reg_rtx (Pmode);
6711         }
6712
6713       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6714
6715       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6716       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6717                                        base == reg ? 0 : reg);
6718
6719       if (CONST_INT_P (offset))
6720         {
6721           /* The base register doesn't really matter, we only want to
6722              test the index for the appropriate mode.  */
6723           if (!arm_legitimate_index_p (mode, offset, SET, 0))
6724             {
6725               gcc_assert (can_create_pseudo_p ());
6726               offset = force_reg (Pmode, offset);
6727             }
6728
6729           if (CONST_INT_P (offset))
6730             return plus_constant (Pmode, base, INTVAL (offset));
6731         }
6732
6733       if (GET_MODE_SIZE (mode) > 4
6734           && (GET_MODE_CLASS (mode) == MODE_INT
6735               || TARGET_SOFT_FLOAT))
6736         {
6737           emit_insn (gen_addsi3 (reg, base, offset));
6738           return reg;
6739         }
6740
6741       return gen_rtx_PLUS (Pmode, base, offset);
6742     }
6743
6744   return orig;
6745 }
6746
6747
6748 /* Find a spare register to use during the prolog of a function.  */
6749
6750 static int
6751 thumb_find_work_register (unsigned long pushed_regs_mask)
6752 {
6753   int reg;
6754
6755   /* Check the argument registers first as these are call-used.  The
6756      register allocation order means that sometimes r3 might be used
6757      but earlier argument registers might not, so check them all.  */
6758   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6759     if (!df_regs_ever_live_p (reg))
6760       return reg;
6761
6762   /* Before going on to check the call-saved registers we can try a couple
6763      more ways of deducing that r3 is available.  The first is when we are
6764      pushing anonymous arguments onto the stack and we have less than 4
6765      registers worth of fixed arguments(*).  In this case r3 will be part of
6766      the variable argument list and so we can be sure that it will be
6767      pushed right at the start of the function.  Hence it will be available
6768      for the rest of the prologue.
6769      (*): ie crtl->args.pretend_args_size is greater than 0.  */
6770   if (cfun->machine->uses_anonymous_args
6771       && crtl->args.pretend_args_size > 0)
6772     return LAST_ARG_REGNUM;
6773
6774   /* The other case is when we have fixed arguments but less than 4 registers
6775      worth.  In this case r3 might be used in the body of the function, but
6776      it is not being used to convey an argument into the function.  In theory
6777      we could just check crtl->args.size to see how many bytes are
6778      being passed in argument registers, but it seems that it is unreliable.
6779      Sometimes it will have the value 0 when in fact arguments are being
6780      passed.  (See testcase execute/20021111-1.c for an example).  So we also
6781      check the args_info.nregs field as well.  The problem with this field is
6782      that it makes no allowances for arguments that are passed to the
6783      function but which are not used.  Hence we could miss an opportunity
6784      when a function has an unused argument in r3.  But it is better to be
6785      safe than to be sorry.  */
6786   if (! cfun->machine->uses_anonymous_args
6787       && crtl->args.size >= 0
6788       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6789       && (TARGET_AAPCS_BASED
6790           ? crtl->args.info.aapcs_ncrn < 4
6791           : crtl->args.info.nregs < 4))
6792     return LAST_ARG_REGNUM;
6793
6794   /* Otherwise look for a call-saved register that is going to be pushed.  */
6795   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6796     if (pushed_regs_mask & (1 << reg))
6797       return reg;
6798
6799   if (TARGET_THUMB2)
6800     {
6801       /* Thumb-2 can use high regs.  */
6802       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6803         if (pushed_regs_mask & (1 << reg))
6804           return reg;
6805     }
6806   /* Something went wrong - thumb_compute_save_reg_mask()
6807      should have arranged for a suitable register to be pushed.  */
6808   gcc_unreachable ();
6809 }
6810
6811 static GTY(()) int pic_labelno;
6812
6813 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
6814    low register.  */
6815
6816 void
6817 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6818 {
6819   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6820
6821   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6822     return;
6823
6824   gcc_assert (flag_pic);
6825
6826   pic_reg = cfun->machine->pic_reg;
6827   if (TARGET_VXWORKS_RTP)
6828     {
6829       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6830       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6831       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6832
6833       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6834
6835       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6836       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6837     }
6838   else
6839     {
6840       /* We use an UNSPEC rather than a LABEL_REF because this label
6841          never appears in the code stream.  */
6842
6843       labelno = GEN_INT (pic_labelno++);
6844       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6845       l1 = gen_rtx_CONST (VOIDmode, l1);
6846
6847       /* On the ARM the PC register contains 'dot + 8' at the time of the
6848          addition, on the Thumb it is 'dot + 4'.  */
6849       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6850       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6851                                 UNSPEC_GOTSYM_OFF);
6852       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6853
6854       if (TARGET_32BIT)
6855         {
6856           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6857         }
6858       else /* TARGET_THUMB1 */
6859         {
6860           if (arm_pic_register != INVALID_REGNUM
6861               && REGNO (pic_reg) > LAST_LO_REGNUM)
6862             {
6863               /* We will have pushed the pic register, so we should always be
6864                  able to find a work register.  */
6865               pic_tmp = gen_rtx_REG (SImode,
6866                                      thumb_find_work_register (saved_regs));
6867               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6868               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6869               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6870             }
6871           else if (arm_pic_register != INVALID_REGNUM
6872                    && arm_pic_register > LAST_LO_REGNUM
6873                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
6874             {
6875               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6876               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6877               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6878             }
6879           else
6880             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6881         }
6882     }
6883
6884   /* Need to emit this whether or not we obey regdecls,
6885      since setjmp/longjmp can cause life info to screw up.  */
6886   emit_use (pic_reg);
6887 }
6888
6889 /* Generate code to load the address of a static var when flag_pic is set.  */
6890 static rtx
6891 arm_pic_static_addr (rtx orig, rtx reg)
6892 {
6893   rtx l1, labelno, offset_rtx, insn;
6894
6895   gcc_assert (flag_pic);
6896
6897   /* We use an UNSPEC rather than a LABEL_REF because this label
6898      never appears in the code stream.  */
6899   labelno = GEN_INT (pic_labelno++);
6900   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6901   l1 = gen_rtx_CONST (VOIDmode, l1);
6902
6903   /* On the ARM the PC register contains 'dot + 8' at the time of the
6904      addition, on the Thumb it is 'dot + 4'.  */
6905   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6906   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6907                                UNSPEC_SYMBOL_OFFSET);
6908   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6909
6910   insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6911   return insn;
6912 }
6913
6914 /* Return nonzero if X is valid as an ARM state addressing register.  */
6915 static int
6916 arm_address_register_rtx_p (rtx x, int strict_p)
6917 {
6918   int regno;
6919
6920   if (!REG_P (x))
6921     return 0;
6922
6923   regno = REGNO (x);
6924
6925   if (strict_p)
6926     return ARM_REGNO_OK_FOR_BASE_P (regno);
6927
6928   return (regno <= LAST_ARM_REGNUM
6929           || regno >= FIRST_PSEUDO_REGISTER
6930           || regno == FRAME_POINTER_REGNUM
6931           || regno == ARG_POINTER_REGNUM);
6932 }
6933
6934 /* Return TRUE if this rtx is the difference of a symbol and a label,
6935    and will reduce to a PC-relative relocation in the object file.
6936    Expressions like this can be left alone when generating PIC, rather
6937    than forced through the GOT.  */
6938 static int
6939 pcrel_constant_p (rtx x)
6940 {
6941   if (GET_CODE (x) == MINUS)
6942     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6943
6944   return FALSE;
6945 }
6946
6947 /* Return true if X will surely end up in an index register after next
6948    splitting pass.  */
6949 static bool
6950 will_be_in_index_register (const_rtx x)
6951 {
6952   /* arm.md: calculate_pic_address will split this into a register.  */
6953   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6954 }
6955
6956 /* Return nonzero if X is a valid ARM state address operand.  */
6957 int
6958 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6959                                 int strict_p)
6960 {
6961   bool use_ldrd;
6962   enum rtx_code code = GET_CODE (x);
6963
6964   if (arm_address_register_rtx_p (x, strict_p))
6965     return 1;
6966
6967   use_ldrd = (TARGET_LDRD
6968               && (mode == DImode
6969                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6970
6971   if (code == POST_INC || code == PRE_DEC
6972       || ((code == PRE_INC || code == POST_DEC)
6973           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6974     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6975
6976   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6977            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6978            && GET_CODE (XEXP (x, 1)) == PLUS
6979            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6980     {
6981       rtx addend = XEXP (XEXP (x, 1), 1);
6982
6983       /* Don't allow ldrd post increment by register because it's hard
6984          to fixup invalid register choices.  */
6985       if (use_ldrd
6986           && GET_CODE (x) == POST_MODIFY
6987           && REG_P (addend))
6988         return 0;
6989
6990       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6991               && arm_legitimate_index_p (mode, addend, outer, strict_p));
6992     }
6993
6994   /* After reload constants split into minipools will have addresses
6995      from a LABEL_REF.  */
6996   else if (reload_completed
6997            && (code == LABEL_REF
6998                || (code == CONST
6999                    && GET_CODE (XEXP (x, 0)) == PLUS
7000                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7001                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7002     return 1;
7003
7004   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7005     return 0;
7006
7007   else if (code == PLUS)
7008     {
7009       rtx xop0 = XEXP (x, 0);
7010       rtx xop1 = XEXP (x, 1);
7011
7012       return ((arm_address_register_rtx_p (xop0, strict_p)
7013                && ((CONST_INT_P (xop1)
7014                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7015                    || (!strict_p && will_be_in_index_register (xop1))))
7016               || (arm_address_register_rtx_p (xop1, strict_p)
7017                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7018     }
7019
7020 #if 0
7021   /* Reload currently can't handle MINUS, so disable this for now */
7022   else if (GET_CODE (x) == MINUS)
7023     {
7024       rtx xop0 = XEXP (x, 0);
7025       rtx xop1 = XEXP (x, 1);
7026
7027       return (arm_address_register_rtx_p (xop0, strict_p)
7028               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7029     }
7030 #endif
7031
7032   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7033            && code == SYMBOL_REF
7034            && CONSTANT_POOL_ADDRESS_P (x)
7035            && ! (flag_pic
7036                  && symbol_mentioned_p (get_pool_constant (x))
7037                  && ! pcrel_constant_p (get_pool_constant (x))))
7038     return 1;
7039
7040   return 0;
7041 }
7042
7043 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7044 static int
7045 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7046 {
7047   bool use_ldrd;
7048   enum rtx_code code = GET_CODE (x);
7049
7050   if (arm_address_register_rtx_p (x, strict_p))
7051     return 1;
7052
7053   use_ldrd = (TARGET_LDRD
7054               && (mode == DImode
7055                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7056
7057   if (code == POST_INC || code == PRE_DEC
7058       || ((code == PRE_INC || code == POST_DEC)
7059           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7060     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7061
7062   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7063            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7064            && GET_CODE (XEXP (x, 1)) == PLUS
7065            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7066     {
7067       /* Thumb-2 only has autoincrement by constant.  */
7068       rtx addend = XEXP (XEXP (x, 1), 1);
7069       HOST_WIDE_INT offset;
7070
7071       if (!CONST_INT_P (addend))
7072         return 0;
7073
7074       offset = INTVAL(addend);
7075       if (GET_MODE_SIZE (mode) <= 4)
7076         return (offset > -256 && offset < 256);
7077
7078       return (use_ldrd && offset > -1024 && offset < 1024
7079               && (offset & 3) == 0);
7080     }
7081
7082   /* After reload constants split into minipools will have addresses
7083      from a LABEL_REF.  */
7084   else if (reload_completed
7085            && (code == LABEL_REF
7086                || (code == CONST
7087                    && GET_CODE (XEXP (x, 0)) == PLUS
7088                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7089                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7090     return 1;
7091
7092   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7093     return 0;
7094
7095   else if (code == PLUS)
7096     {
7097       rtx xop0 = XEXP (x, 0);
7098       rtx xop1 = XEXP (x, 1);
7099
7100       return ((arm_address_register_rtx_p (xop0, strict_p)
7101                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7102                    || (!strict_p && will_be_in_index_register (xop1))))
7103               || (arm_address_register_rtx_p (xop1, strict_p)
7104                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7105     }
7106
7107   /* Normally we can assign constant values to target registers without
7108      the help of constant pool.  But there are cases we have to use constant
7109      pool like:
7110      1) assign a label to register.
7111      2) sign-extend a 8bit value to 32bit and then assign to register.
7112
7113      Constant pool access in format:
7114      (set (reg r0) (mem (symbol_ref (".LC0"))))
7115      will cause the use of literal pool (later in function arm_reorg).
7116      So here we mark such format as an invalid format, then the compiler
7117      will adjust it into:
7118      (set (reg r0) (symbol_ref (".LC0")))
7119      (set (reg r0) (mem (reg r0))).
7120      No extra register is required, and (mem (reg r0)) won't cause the use
7121      of literal pools.  */
7122   else if (arm_disable_literal_pool && code == SYMBOL_REF
7123            && CONSTANT_POOL_ADDRESS_P (x))
7124     return 0;
7125
7126   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7127            && code == SYMBOL_REF
7128            && CONSTANT_POOL_ADDRESS_P (x)
7129            && ! (flag_pic
7130                  && symbol_mentioned_p (get_pool_constant (x))
7131                  && ! pcrel_constant_p (get_pool_constant (x))))
7132     return 1;
7133
7134   return 0;
7135 }
7136
7137 /* Return nonzero if INDEX is valid for an address index operand in
7138    ARM state.  */
7139 static int
7140 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7141                         int strict_p)
7142 {
7143   HOST_WIDE_INT range;
7144   enum rtx_code code = GET_CODE (index);
7145
7146   /* Standard coprocessor addressing modes.  */
7147   if (TARGET_HARD_FLOAT
7148       && TARGET_VFP
7149       && (mode == SFmode || mode == DFmode))
7150     return (code == CONST_INT && INTVAL (index) < 1024
7151             && INTVAL (index) > -1024
7152             && (INTVAL (index) & 3) == 0);
7153
7154   /* For quad modes, we restrict the constant offset to be slightly less
7155      than what the instruction format permits.  We do this because for
7156      quad mode moves, we will actually decompose them into two separate
7157      double-mode reads or writes.  INDEX must therefore be a valid
7158      (double-mode) offset and so should INDEX+8.  */
7159   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7160     return (code == CONST_INT
7161             && INTVAL (index) < 1016
7162             && INTVAL (index) > -1024
7163             && (INTVAL (index) & 3) == 0);
7164
7165   /* We have no such constraint on double mode offsets, so we permit the
7166      full range of the instruction format.  */
7167   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7168     return (code == CONST_INT
7169             && INTVAL (index) < 1024
7170             && INTVAL (index) > -1024
7171             && (INTVAL (index) & 3) == 0);
7172
7173   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7174     return (code == CONST_INT
7175             && INTVAL (index) < 1024
7176             && INTVAL (index) > -1024
7177             && (INTVAL (index) & 3) == 0);
7178
7179   if (arm_address_register_rtx_p (index, strict_p)
7180       && (GET_MODE_SIZE (mode) <= 4))
7181     return 1;
7182
7183   if (mode == DImode || mode == DFmode)
7184     {
7185       if (code == CONST_INT)
7186         {
7187           HOST_WIDE_INT val = INTVAL (index);
7188
7189           if (TARGET_LDRD)
7190             return val > -256 && val < 256;
7191           else
7192             return val > -4096 && val < 4092;
7193         }
7194
7195       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7196     }
7197
7198   if (GET_MODE_SIZE (mode) <= 4
7199       && ! (arm_arch4
7200             && (mode == HImode
7201                 || mode == HFmode
7202                 || (mode == QImode && outer == SIGN_EXTEND))))
7203     {
7204       if (code == MULT)
7205         {
7206           rtx xiop0 = XEXP (index, 0);
7207           rtx xiop1 = XEXP (index, 1);
7208
7209           return ((arm_address_register_rtx_p (xiop0, strict_p)
7210                    && power_of_two_operand (xiop1, SImode))
7211                   || (arm_address_register_rtx_p (xiop1, strict_p)
7212                       && power_of_two_operand (xiop0, SImode)));
7213         }
7214       else if (code == LSHIFTRT || code == ASHIFTRT
7215                || code == ASHIFT || code == ROTATERT)
7216         {
7217           rtx op = XEXP (index, 1);
7218
7219           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7220                   && CONST_INT_P (op)
7221                   && INTVAL (op) > 0
7222                   && INTVAL (op) <= 31);
7223         }
7224     }
7225
7226   /* For ARM v4 we may be doing a sign-extend operation during the
7227      load.  */
7228   if (arm_arch4)
7229     {
7230       if (mode == HImode
7231           || mode == HFmode
7232           || (outer == SIGN_EXTEND && mode == QImode))
7233         range = 256;
7234       else
7235         range = 4096;
7236     }
7237   else
7238     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7239
7240   return (code == CONST_INT
7241           && INTVAL (index) < range
7242           && INTVAL (index) > -range);
7243 }
7244
7245 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7246    index operand.  i.e. 1, 2, 4 or 8.  */
7247 static bool
7248 thumb2_index_mul_operand (rtx op)
7249 {
7250   HOST_WIDE_INT val;
7251
7252   if (!CONST_INT_P (op))
7253     return false;
7254
7255   val = INTVAL(op);
7256   return (val == 1 || val == 2 || val == 4 || val == 8);
7257 }
7258
7259 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
7260 static int
7261 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7262 {
7263   enum rtx_code code = GET_CODE (index);
7264
7265   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
7266   /* Standard coprocessor addressing modes.  */
7267   if (TARGET_HARD_FLOAT
7268       && TARGET_VFP
7269       && (mode == SFmode || mode == DFmode))
7270     return (code == CONST_INT && INTVAL (index) < 1024
7271             /* Thumb-2 allows only > -256 index range for it's core register
7272                load/stores. Since we allow SF/DF in core registers, we have
7273                to use the intersection between -256~4096 (core) and -1024~1024
7274                (coprocessor).  */
7275             && INTVAL (index) > -256
7276             && (INTVAL (index) & 3) == 0);
7277
7278   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7279     {
7280       /* For DImode assume values will usually live in core regs
7281          and only allow LDRD addressing modes.  */
7282       if (!TARGET_LDRD || mode != DImode)
7283         return (code == CONST_INT
7284                 && INTVAL (index) < 1024
7285                 && INTVAL (index) > -1024
7286                 && (INTVAL (index) & 3) == 0);
7287     }
7288
7289   /* For quad modes, we restrict the constant offset to be slightly less
7290      than what the instruction format permits.  We do this because for
7291      quad mode moves, we will actually decompose them into two separate
7292      double-mode reads or writes.  INDEX must therefore be a valid
7293      (double-mode) offset and so should INDEX+8.  */
7294   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7295     return (code == CONST_INT
7296             && INTVAL (index) < 1016
7297             && INTVAL (index) > -1024
7298             && (INTVAL (index) & 3) == 0);
7299
7300   /* We have no such constraint on double mode offsets, so we permit the
7301      full range of the instruction format.  */
7302   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7303     return (code == CONST_INT
7304             && INTVAL (index) < 1024
7305             && INTVAL (index) > -1024
7306             && (INTVAL (index) & 3) == 0);
7307
7308   if (arm_address_register_rtx_p (index, strict_p)
7309       && (GET_MODE_SIZE (mode) <= 4))
7310     return 1;
7311
7312   if (mode == DImode || mode == DFmode)
7313     {
7314       if (code == CONST_INT)
7315         {
7316           HOST_WIDE_INT val = INTVAL (index);
7317           /* ??? Can we assume ldrd for thumb2?  */
7318           /* Thumb-2 ldrd only has reg+const addressing modes.  */
7319           /* ldrd supports offsets of +-1020.
7320              However the ldr fallback does not.  */
7321           return val > -256 && val < 256 && (val & 3) == 0;
7322         }
7323       else
7324         return 0;
7325     }
7326
7327   if (code == MULT)
7328     {
7329       rtx xiop0 = XEXP (index, 0);
7330       rtx xiop1 = XEXP (index, 1);
7331
7332       return ((arm_address_register_rtx_p (xiop0, strict_p)
7333                && thumb2_index_mul_operand (xiop1))
7334               || (arm_address_register_rtx_p (xiop1, strict_p)
7335                   && thumb2_index_mul_operand (xiop0)));
7336     }
7337   else if (code == ASHIFT)
7338     {
7339       rtx op = XEXP (index, 1);
7340
7341       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7342               && CONST_INT_P (op)
7343               && INTVAL (op) > 0
7344               && INTVAL (op) <= 3);
7345     }
7346
7347   return (code == CONST_INT
7348           && INTVAL (index) < 4096
7349           && INTVAL (index) > -256);
7350 }
7351
7352 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
7353 static int
7354 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7355 {
7356   int regno;
7357
7358   if (!REG_P (x))
7359     return 0;
7360
7361   regno = REGNO (x);
7362
7363   if (strict_p)
7364     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7365
7366   return (regno <= LAST_LO_REGNUM
7367           || regno > LAST_VIRTUAL_REGISTER
7368           || regno == FRAME_POINTER_REGNUM
7369           || (GET_MODE_SIZE (mode) >= 4
7370               && (regno == STACK_POINTER_REGNUM
7371                   || regno >= FIRST_PSEUDO_REGISTER
7372                   || x == hard_frame_pointer_rtx
7373                   || x == arg_pointer_rtx)));
7374 }
7375
7376 /* Return nonzero if x is a legitimate index register.  This is the case
7377    for any base register that can access a QImode object.  */
7378 inline static int
7379 thumb1_index_register_rtx_p (rtx x, int strict_p)
7380 {
7381   return thumb1_base_register_rtx_p (x, QImode, strict_p);
7382 }
7383
7384 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7385
7386    The AP may be eliminated to either the SP or the FP, so we use the
7387    least common denominator, e.g. SImode, and offsets from 0 to 64.
7388
7389    ??? Verify whether the above is the right approach.
7390
7391    ??? Also, the FP may be eliminated to the SP, so perhaps that
7392    needs special handling also.
7393
7394    ??? Look at how the mips16 port solves this problem.  It probably uses
7395    better ways to solve some of these problems.
7396
7397    Although it is not incorrect, we don't accept QImode and HImode
7398    addresses based on the frame pointer or arg pointer until the
7399    reload pass starts.  This is so that eliminating such addresses
7400    into stack based ones won't produce impossible code.  */
7401 int
7402 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7403 {
7404   /* ??? Not clear if this is right.  Experiment.  */
7405   if (GET_MODE_SIZE (mode) < 4
7406       && !(reload_in_progress || reload_completed)
7407       && (reg_mentioned_p (frame_pointer_rtx, x)
7408           || reg_mentioned_p (arg_pointer_rtx, x)
7409           || reg_mentioned_p (virtual_incoming_args_rtx, x)
7410           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7411           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7412           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7413     return 0;
7414
7415   /* Accept any base register.  SP only in SImode or larger.  */
7416   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7417     return 1;
7418
7419   /* This is PC relative data before arm_reorg runs.  */
7420   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7421            && GET_CODE (x) == SYMBOL_REF
7422            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7423     return 1;
7424
7425   /* This is PC relative data after arm_reorg runs.  */
7426   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7427            && reload_completed
7428            && (GET_CODE (x) == LABEL_REF
7429                || (GET_CODE (x) == CONST
7430                    && GET_CODE (XEXP (x, 0)) == PLUS
7431                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7432                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7433     return 1;
7434
7435   /* Post-inc indexing only supported for SImode and larger.  */
7436   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7437            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7438     return 1;
7439
7440   else if (GET_CODE (x) == PLUS)
7441     {
7442       /* REG+REG address can be any two index registers.  */
7443       /* We disallow FRAME+REG addressing since we know that FRAME
7444          will be replaced with STACK, and SP relative addressing only
7445          permits SP+OFFSET.  */
7446       if (GET_MODE_SIZE (mode) <= 4
7447           && XEXP (x, 0) != frame_pointer_rtx
7448           && XEXP (x, 1) != frame_pointer_rtx
7449           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7450           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7451               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7452         return 1;
7453
7454       /* REG+const has 5-7 bit offset for non-SP registers.  */
7455       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7456                 || XEXP (x, 0) == arg_pointer_rtx)
7457                && CONST_INT_P (XEXP (x, 1))
7458                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7459         return 1;
7460
7461       /* REG+const has 10-bit offset for SP, but only SImode and
7462          larger is supported.  */
7463       /* ??? Should probably check for DI/DFmode overflow here
7464          just like GO_IF_LEGITIMATE_OFFSET does.  */
7465       else if (REG_P (XEXP (x, 0))
7466                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7467                && GET_MODE_SIZE (mode) >= 4
7468                && CONST_INT_P (XEXP (x, 1))
7469                && INTVAL (XEXP (x, 1)) >= 0
7470                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7471                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7472         return 1;
7473
7474       else if (REG_P (XEXP (x, 0))
7475                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7476                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7477                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7478                        && REGNO (XEXP (x, 0))
7479                           <= LAST_VIRTUAL_POINTER_REGISTER))
7480                && GET_MODE_SIZE (mode) >= 4
7481                && CONST_INT_P (XEXP (x, 1))
7482                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7483         return 1;
7484     }
7485
7486   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7487            && GET_MODE_SIZE (mode) == 4
7488            && GET_CODE (x) == SYMBOL_REF
7489            && CONSTANT_POOL_ADDRESS_P (x)
7490            && ! (flag_pic
7491                  && symbol_mentioned_p (get_pool_constant (x))
7492                  && ! pcrel_constant_p (get_pool_constant (x))))
7493     return 1;
7494
7495   return 0;
7496 }
7497
7498 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7499    instruction of mode MODE.  */
7500 int
7501 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7502 {
7503   switch (GET_MODE_SIZE (mode))
7504     {
7505     case 1:
7506       return val >= 0 && val < 32;
7507
7508     case 2:
7509       return val >= 0 && val < 64 && (val & 1) == 0;
7510
7511     default:
7512       return (val >= 0
7513               && (val + GET_MODE_SIZE (mode)) <= 128
7514               && (val & 3) == 0);
7515     }
7516 }
7517
7518 bool
7519 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7520 {
7521   if (TARGET_ARM)
7522     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7523   else if (TARGET_THUMB2)
7524     return thumb2_legitimate_address_p (mode, x, strict_p);
7525   else /* if (TARGET_THUMB1) */
7526     return thumb1_legitimate_address_p (mode, x, strict_p);
7527 }
7528
7529 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7530
7531    Given an rtx X being reloaded into a reg required to be
7532    in class CLASS, return the class of reg to actually use.
7533    In general this is just CLASS, but for the Thumb core registers and
7534    immediate constants we prefer a LO_REGS class or a subset.  */
7535
7536 static reg_class_t
7537 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7538 {
7539   if (TARGET_32BIT)
7540     return rclass;
7541   else
7542     {
7543       if (rclass == GENERAL_REGS)
7544         return LO_REGS;
7545       else
7546         return rclass;
7547     }
7548 }
7549
7550 /* Build the SYMBOL_REF for __tls_get_addr.  */
7551
7552 static GTY(()) rtx tls_get_addr_libfunc;
7553
7554 static rtx
7555 get_tls_get_addr (void)
7556 {
7557   if (!tls_get_addr_libfunc)
7558     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7559   return tls_get_addr_libfunc;
7560 }
7561
7562 rtx
7563 arm_load_tp (rtx target)
7564 {
7565   if (!target)
7566     target = gen_reg_rtx (SImode);
7567
7568   if (TARGET_HARD_TP)
7569     {
7570       /* Can return in any reg.  */
7571       emit_insn (gen_load_tp_hard (target));
7572     }
7573   else
7574     {
7575       /* Always returned in r0.  Immediately copy the result into a pseudo,
7576          otherwise other uses of r0 (e.g. setting up function arguments) may
7577          clobber the value.  */
7578
7579       rtx tmp;
7580
7581       emit_insn (gen_load_tp_soft ());
7582
7583       tmp = gen_rtx_REG (SImode, R0_REGNUM);
7584       emit_move_insn (target, tmp);
7585     }
7586   return target;
7587 }
7588
7589 static rtx
7590 load_tls_operand (rtx x, rtx reg)
7591 {
7592   rtx tmp;
7593
7594   if (reg == NULL_RTX)
7595     reg = gen_reg_rtx (SImode);
7596
7597   tmp = gen_rtx_CONST (SImode, x);
7598
7599   emit_move_insn (reg, tmp);
7600
7601   return reg;
7602 }
7603
7604 static rtx
7605 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7606 {
7607   rtx insns, label, labelno, sum;
7608
7609   gcc_assert (reloc != TLS_DESCSEQ);
7610   start_sequence ();
7611
7612   labelno = GEN_INT (pic_labelno++);
7613   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7614   label = gen_rtx_CONST (VOIDmode, label);
7615
7616   sum = gen_rtx_UNSPEC (Pmode,
7617                         gen_rtvec (4, x, GEN_INT (reloc), label,
7618                                    GEN_INT (TARGET_ARM ? 8 : 4)),
7619                         UNSPEC_TLS);
7620   reg = load_tls_operand (sum, reg);
7621
7622   if (TARGET_ARM)
7623     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7624   else
7625     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7626
7627   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7628                                      LCT_PURE, /* LCT_CONST?  */
7629                                      Pmode, 1, reg, Pmode);
7630
7631   insns = get_insns ();
7632   end_sequence ();
7633
7634   return insns;
7635 }
7636
7637 static rtx
7638 arm_tls_descseq_addr (rtx x, rtx reg)
7639 {
7640   rtx labelno = GEN_INT (pic_labelno++);
7641   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7642   rtx sum = gen_rtx_UNSPEC (Pmode,
7643                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7644                                        gen_rtx_CONST (VOIDmode, label),
7645                                        GEN_INT (!TARGET_ARM)),
7646                             UNSPEC_TLS);
7647   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7648
7649   emit_insn (gen_tlscall (x, labelno));
7650   if (!reg)
7651     reg = gen_reg_rtx (SImode);
7652   else
7653     gcc_assert (REGNO (reg) != R0_REGNUM);
7654
7655   emit_move_insn (reg, reg0);
7656
7657   return reg;
7658 }
7659
7660 rtx
7661 legitimize_tls_address (rtx x, rtx reg)
7662 {
7663   rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7664   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7665
7666   switch (model)
7667     {
7668     case TLS_MODEL_GLOBAL_DYNAMIC:
7669       if (TARGET_GNU2_TLS)
7670         {
7671           reg = arm_tls_descseq_addr (x, reg);
7672
7673           tp = arm_load_tp (NULL_RTX);
7674
7675           dest = gen_rtx_PLUS (Pmode, tp, reg);
7676         }
7677       else
7678         {
7679           /* Original scheme */
7680           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7681           dest = gen_reg_rtx (Pmode);
7682           emit_libcall_block (insns, dest, ret, x);
7683         }
7684       return dest;
7685
7686     case TLS_MODEL_LOCAL_DYNAMIC:
7687       if (TARGET_GNU2_TLS)
7688         {
7689           reg = arm_tls_descseq_addr (x, reg);
7690
7691           tp = arm_load_tp (NULL_RTX);
7692
7693           dest = gen_rtx_PLUS (Pmode, tp, reg);
7694         }
7695       else
7696         {
7697           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7698
7699           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7700              share the LDM result with other LD model accesses.  */
7701           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7702                                 UNSPEC_TLS);
7703           dest = gen_reg_rtx (Pmode);
7704           emit_libcall_block (insns, dest, ret, eqv);
7705
7706           /* Load the addend.  */
7707           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7708                                                      GEN_INT (TLS_LDO32)),
7709                                    UNSPEC_TLS);
7710           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7711           dest = gen_rtx_PLUS (Pmode, dest, addend);
7712         }
7713       return dest;
7714
7715     case TLS_MODEL_INITIAL_EXEC:
7716       labelno = GEN_INT (pic_labelno++);
7717       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7718       label = gen_rtx_CONST (VOIDmode, label);
7719       sum = gen_rtx_UNSPEC (Pmode,
7720                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7721                                        GEN_INT (TARGET_ARM ? 8 : 4)),
7722                             UNSPEC_TLS);
7723       reg = load_tls_operand (sum, reg);
7724
7725       if (TARGET_ARM)
7726         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7727       else if (TARGET_THUMB2)
7728         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7729       else
7730         {
7731           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7732           emit_move_insn (reg, gen_const_mem (SImode, reg));
7733         }
7734
7735       tp = arm_load_tp (NULL_RTX);
7736
7737       return gen_rtx_PLUS (Pmode, tp, reg);
7738
7739     case TLS_MODEL_LOCAL_EXEC:
7740       tp = arm_load_tp (NULL_RTX);
7741
7742       reg = gen_rtx_UNSPEC (Pmode,
7743                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7744                             UNSPEC_TLS);
7745       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7746
7747       return gen_rtx_PLUS (Pmode, tp, reg);
7748
7749     default:
7750       abort ();
7751     }
7752 }
7753
7754 /* Try machine-dependent ways of modifying an illegitimate address
7755    to be legitimate.  If we find one, return the new, valid address.  */
7756 rtx
7757 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7758 {
7759   if (arm_tls_referenced_p (x))
7760     {
7761       rtx addend = NULL;
7762
7763       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7764         {
7765           addend = XEXP (XEXP (x, 0), 1);
7766           x = XEXP (XEXP (x, 0), 0);
7767         }
7768
7769       if (GET_CODE (x) != SYMBOL_REF)
7770         return x;
7771
7772       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7773
7774       x = legitimize_tls_address (x, NULL_RTX);
7775
7776       if (addend)
7777         {
7778           x = gen_rtx_PLUS (SImode, x, addend);
7779           orig_x = x;
7780         }
7781       else
7782         return x;
7783     }
7784
7785   if (!TARGET_ARM)
7786     {
7787       /* TODO: legitimize_address for Thumb2.  */
7788       if (TARGET_THUMB2)
7789         return x;
7790       return thumb_legitimize_address (x, orig_x, mode);
7791     }
7792
7793   if (GET_CODE (x) == PLUS)
7794     {
7795       rtx xop0 = XEXP (x, 0);
7796       rtx xop1 = XEXP (x, 1);
7797
7798       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7799         xop0 = force_reg (SImode, xop0);
7800
7801       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7802           && !symbol_mentioned_p (xop1))
7803         xop1 = force_reg (SImode, xop1);
7804
7805       if (ARM_BASE_REGISTER_RTX_P (xop0)
7806           && CONST_INT_P (xop1))
7807         {
7808           HOST_WIDE_INT n, low_n;
7809           rtx base_reg, val;
7810           n = INTVAL (xop1);
7811
7812           /* VFP addressing modes actually allow greater offsets, but for
7813              now we just stick with the lowest common denominator.  */
7814           if (mode == DImode
7815               || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7816             {
7817               low_n = n & 0x0f;
7818               n &= ~0x0f;
7819               if (low_n > 4)
7820                 {
7821                   n += 16;
7822                   low_n -= 16;
7823                 }
7824             }
7825           else
7826             {
7827               low_n = ((mode) == TImode ? 0
7828                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7829               n -= low_n;
7830             }
7831
7832           base_reg = gen_reg_rtx (SImode);
7833           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7834           emit_move_insn (base_reg, val);
7835           x = plus_constant (Pmode, base_reg, low_n);
7836         }
7837       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7838         x = gen_rtx_PLUS (SImode, xop0, xop1);
7839     }
7840
7841   /* XXX We don't allow MINUS any more -- see comment in
7842      arm_legitimate_address_outer_p ().  */
7843   else if (GET_CODE (x) == MINUS)
7844     {
7845       rtx xop0 = XEXP (x, 0);
7846       rtx xop1 = XEXP (x, 1);
7847
7848       if (CONSTANT_P (xop0))
7849         xop0 = force_reg (SImode, xop0);
7850
7851       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7852         xop1 = force_reg (SImode, xop1);
7853
7854       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7855         x = gen_rtx_MINUS (SImode, xop0, xop1);
7856     }
7857
7858   /* Make sure to take full advantage of the pre-indexed addressing mode
7859      with absolute addresses which often allows for the base register to
7860      be factorized for multiple adjacent memory references, and it might
7861      even allows for the mini pool to be avoided entirely. */
7862   else if (CONST_INT_P (x) && optimize > 0)
7863     {
7864       unsigned int bits;
7865       HOST_WIDE_INT mask, base, index;
7866       rtx base_reg;
7867
7868       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7869          use a 8-bit index. So let's use a 12-bit index for SImode only and
7870          hope that arm_gen_constant will enable ldrb to use more bits. */
7871       bits = (mode == SImode) ? 12 : 8;
7872       mask = (1 << bits) - 1;
7873       base = INTVAL (x) & ~mask;
7874       index = INTVAL (x) & mask;
7875       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7876         {
7877           /* It'll most probably be more efficient to generate the base
7878              with more bits set and use a negative index instead. */
7879           base |= mask;
7880           index -= mask;
7881         }
7882       base_reg = force_reg (SImode, GEN_INT (base));
7883       x = plus_constant (Pmode, base_reg, index);
7884     }
7885
7886   if (flag_pic)
7887     {
7888       /* We need to find and carefully transform any SYMBOL and LABEL
7889          references; so go back to the original address expression.  */
7890       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7891
7892       if (new_x != orig_x)
7893         x = new_x;
7894     }
7895
7896   return x;
7897 }
7898
7899
7900 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7901    to be legitimate.  If we find one, return the new, valid address.  */
7902 rtx
7903 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7904 {
7905   if (GET_CODE (x) == PLUS
7906       && CONST_INT_P (XEXP (x, 1))
7907       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7908           || INTVAL (XEXP (x, 1)) < 0))
7909     {
7910       rtx xop0 = XEXP (x, 0);
7911       rtx xop1 = XEXP (x, 1);
7912       HOST_WIDE_INT offset = INTVAL (xop1);
7913
7914       /* Try and fold the offset into a biasing of the base register and
7915          then offsetting that.  Don't do this when optimizing for space
7916          since it can cause too many CSEs.  */
7917       if (optimize_size && offset >= 0
7918           && offset < 256 + 31 * GET_MODE_SIZE (mode))
7919         {
7920           HOST_WIDE_INT delta;
7921
7922           if (offset >= 256)
7923             delta = offset - (256 - GET_MODE_SIZE (mode));
7924           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7925             delta = 31 * GET_MODE_SIZE (mode);
7926           else
7927             delta = offset & (~31 * GET_MODE_SIZE (mode));
7928
7929           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7930                                 NULL_RTX);
7931           x = plus_constant (Pmode, xop0, delta);
7932         }
7933       else if (offset < 0 && offset > -256)
7934         /* Small negative offsets are best done with a subtract before the
7935            dereference, forcing these into a register normally takes two
7936            instructions.  */
7937         x = force_operand (x, NULL_RTX);
7938       else
7939         {
7940           /* For the remaining cases, force the constant into a register.  */
7941           xop1 = force_reg (SImode, xop1);
7942           x = gen_rtx_PLUS (SImode, xop0, xop1);
7943         }
7944     }
7945   else if (GET_CODE (x) == PLUS
7946            && s_register_operand (XEXP (x, 1), SImode)
7947            && !s_register_operand (XEXP (x, 0), SImode))
7948     {
7949       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7950
7951       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7952     }
7953
7954   if (flag_pic)
7955     {
7956       /* We need to find and carefully transform any SYMBOL and LABEL
7957          references; so go back to the original address expression.  */
7958       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7959
7960       if (new_x != orig_x)
7961         x = new_x;
7962     }
7963
7964   return x;
7965 }
7966
7967 /* Return TRUE if X contains any TLS symbol references.  */
7968
7969 bool
7970 arm_tls_referenced_p (rtx x)
7971 {
7972   if (! TARGET_HAVE_TLS)
7973     return false;
7974
7975   subrtx_iterator::array_type array;
7976   FOR_EACH_SUBRTX (iter, array, x, ALL)
7977     {
7978       const_rtx x = *iter;
7979       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
7980         return true;
7981
7982       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7983          TLS offsets, not real symbol references.  */
7984       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7985         iter.skip_subrtxes ();
7986     }
7987   return false;
7988 }
7989
7990 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7991
7992    On the ARM, allow any integer (invalid ones are removed later by insn
7993    patterns), nice doubles and symbol_refs which refer to the function's
7994    constant pool XXX.
7995
7996    When generating pic allow anything.  */
7997
7998 static bool
7999 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8000 {
8001   return flag_pic || !label_mentioned_p (x);
8002 }
8003
8004 static bool
8005 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8006 {
8007   return (CONST_INT_P (x)
8008           || CONST_DOUBLE_P (x)
8009           || CONSTANT_ADDRESS_P (x)
8010           || flag_pic);
8011 }
8012
8013 static bool
8014 arm_legitimate_constant_p (machine_mode mode, rtx x)
8015 {
8016   return (!arm_cannot_force_const_mem (mode, x)
8017           && (TARGET_32BIT
8018               ? arm_legitimate_constant_p_1 (mode, x)
8019               : thumb_legitimate_constant_p (mode, x)));
8020 }
8021
8022 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8023
8024 static bool
8025 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8026 {
8027   rtx base, offset;
8028
8029   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8030     {
8031       split_const (x, &base, &offset);
8032       if (GET_CODE (base) == SYMBOL_REF
8033           && !offset_within_block_p (base, INTVAL (offset)))
8034         return true;
8035     }
8036   return arm_tls_referenced_p (x);
8037 }
8038 \f
8039 #define REG_OR_SUBREG_REG(X)                                            \
8040   (REG_P (X)                                                    \
8041    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8042
8043 #define REG_OR_SUBREG_RTX(X)                    \
8044    (REG_P (X) ? (X) : SUBREG_REG (X))
8045
8046 static inline int
8047 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8048 {
8049   machine_mode mode = GET_MODE (x);
8050   int total, words;
8051
8052   switch (code)
8053     {
8054     case ASHIFT:
8055     case ASHIFTRT:
8056     case LSHIFTRT:
8057     case ROTATERT:
8058       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8059
8060     case PLUS:
8061     case MINUS:
8062     case COMPARE:
8063     case NEG:
8064     case NOT:
8065       return COSTS_N_INSNS (1);
8066
8067     case MULT:
8068       if (CONST_INT_P (XEXP (x, 1)))
8069         {
8070           int cycles = 0;
8071           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8072
8073           while (i)
8074             {
8075               i >>= 2;
8076               cycles++;
8077             }
8078           return COSTS_N_INSNS (2) + cycles;
8079         }
8080       return COSTS_N_INSNS (1) + 16;
8081
8082     case SET:
8083       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8084          the mode.  */
8085       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8086       return (COSTS_N_INSNS (words)
8087               + 4 * ((MEM_P (SET_SRC (x)))
8088                      + MEM_P (SET_DEST (x))));
8089
8090     case CONST_INT:
8091       if (outer == SET)
8092         {
8093           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8094             return 0;
8095           if (thumb_shiftable_const (INTVAL (x)))
8096             return COSTS_N_INSNS (2);
8097           return COSTS_N_INSNS (3);
8098         }
8099       else if ((outer == PLUS || outer == COMPARE)
8100                && INTVAL (x) < 256 && INTVAL (x) > -256)
8101         return 0;
8102       else if ((outer == IOR || outer == XOR || outer == AND)
8103                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8104         return COSTS_N_INSNS (1);
8105       else if (outer == AND)
8106         {
8107           int i;
8108           /* This duplicates the tests in the andsi3 expander.  */
8109           for (i = 9; i <= 31; i++)
8110             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8111                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8112               return COSTS_N_INSNS (2);
8113         }
8114       else if (outer == ASHIFT || outer == ASHIFTRT
8115                || outer == LSHIFTRT)
8116         return 0;
8117       return COSTS_N_INSNS (2);
8118
8119     case CONST:
8120     case CONST_DOUBLE:
8121     case LABEL_REF:
8122     case SYMBOL_REF:
8123       return COSTS_N_INSNS (3);
8124
8125     case UDIV:
8126     case UMOD:
8127     case DIV:
8128     case MOD:
8129       return 100;
8130
8131     case TRUNCATE:
8132       return 99;
8133
8134     case AND:
8135     case XOR:
8136     case IOR:
8137       /* XXX guess.  */
8138       return 8;
8139
8140     case MEM:
8141       /* XXX another guess.  */
8142       /* Memory costs quite a lot for the first word, but subsequent words
8143          load at the equivalent of a single insn each.  */
8144       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8145               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8146                  ? 4 : 0));
8147
8148     case IF_THEN_ELSE:
8149       /* XXX a guess.  */
8150       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8151         return 14;
8152       return 2;
8153
8154     case SIGN_EXTEND:
8155     case ZERO_EXTEND:
8156       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8157       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8158
8159       if (mode == SImode)
8160         return total;
8161
8162       if (arm_arch6)
8163         return total + COSTS_N_INSNS (1);
8164
8165       /* Assume a two-shift sequence.  Increase the cost slightly so
8166          we prefer actual shifts over an extend operation.  */
8167       return total + 1 + COSTS_N_INSNS (2);
8168
8169     default:
8170       return 99;
8171     }
8172 }
8173
8174 static inline bool
8175 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8176 {
8177   machine_mode mode = GET_MODE (x);
8178   enum rtx_code subcode;
8179   rtx operand;
8180   enum rtx_code code = GET_CODE (x);
8181   *total = 0;
8182
8183   switch (code)
8184     {
8185     case MEM:
8186       /* Memory costs quite a lot for the first word, but subsequent words
8187          load at the equivalent of a single insn each.  */
8188       *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8189       return true;
8190
8191     case DIV:
8192     case MOD:
8193     case UDIV:
8194     case UMOD:
8195       if (TARGET_HARD_FLOAT && mode == SFmode)
8196         *total = COSTS_N_INSNS (2);
8197       else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8198         *total = COSTS_N_INSNS (4);
8199       else
8200         *total = COSTS_N_INSNS (20);
8201       return false;
8202
8203     case ROTATE:
8204       if (REG_P (XEXP (x, 1)))
8205         *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8206       else if (!CONST_INT_P (XEXP (x, 1)))
8207         *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8208
8209       /* Fall through */
8210     case ROTATERT:
8211       if (mode != SImode)
8212         {
8213           *total += COSTS_N_INSNS (4);
8214           return true;
8215         }
8216
8217       /* Fall through */
8218     case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8219       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8220       if (mode == DImode)
8221         {
8222           *total += COSTS_N_INSNS (3);
8223           return true;
8224         }
8225
8226       *total += COSTS_N_INSNS (1);
8227       /* Increase the cost of complex shifts because they aren't any faster,
8228          and reduce dual issue opportunities.  */
8229       if (arm_tune_cortex_a9
8230           && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8231         ++*total;
8232
8233       return true;
8234
8235     case MINUS:
8236       if (mode == DImode)
8237         {
8238           *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8239           if (CONST_INT_P (XEXP (x, 0))
8240               && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8241             {
8242               *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8243               return true;
8244             }
8245
8246           if (CONST_INT_P (XEXP (x, 1))
8247               && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8248             {
8249               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8250               return true;
8251             }
8252
8253           return false;
8254         }
8255
8256       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8257         {
8258           if (TARGET_HARD_FLOAT
8259               && (mode == SFmode
8260                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8261             {
8262               *total = COSTS_N_INSNS (1);
8263               if (CONST_DOUBLE_P (XEXP (x, 0))
8264                   && arm_const_double_rtx (XEXP (x, 0)))
8265                 {
8266                   *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8267                   return true;
8268                 }
8269
8270               if (CONST_DOUBLE_P (XEXP (x, 1))
8271                   && arm_const_double_rtx (XEXP (x, 1)))
8272                 {
8273                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8274                   return true;
8275                 }
8276
8277               return false;
8278             }
8279           *total = COSTS_N_INSNS (20);
8280           return false;
8281         }
8282
8283       *total = COSTS_N_INSNS (1);
8284       if (CONST_INT_P (XEXP (x, 0))
8285           && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8286         {
8287           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8288           return true;
8289         }
8290
8291       subcode = GET_CODE (XEXP (x, 1));
8292       if (subcode == ASHIFT || subcode == ASHIFTRT
8293           || subcode == LSHIFTRT
8294           || subcode == ROTATE || subcode == ROTATERT)
8295         {
8296           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8297           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8298           return true;
8299         }
8300
8301       /* A shift as a part of RSB costs no more than RSB itself.  */
8302       if (GET_CODE (XEXP (x, 0)) == MULT
8303           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8304         {
8305           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8306           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8307           return true;
8308         }
8309
8310       if (subcode == MULT
8311           && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8312         {
8313           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8314           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8315           return true;
8316         }
8317
8318       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8319           || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8320         {
8321           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8322           if (REG_P (XEXP (XEXP (x, 1), 0))
8323               && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8324             *total += COSTS_N_INSNS (1);
8325
8326           return true;
8327         }
8328
8329       /* Fall through */
8330
8331     case PLUS:
8332       if (code == PLUS && arm_arch6 && mode == SImode
8333           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8334               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8335         {
8336           *total = COSTS_N_INSNS (1);
8337           *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8338                               0, speed);
8339           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8340           return true;
8341         }
8342
8343       /* MLA: All arguments must be registers.  We filter out
8344          multiplication by a power of two, so that we fall down into
8345          the code below.  */
8346       if (GET_CODE (XEXP (x, 0)) == MULT
8347           && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8348         {
8349           /* The cost comes from the cost of the multiply.  */
8350           return false;
8351         }
8352
8353       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8354         {
8355           if (TARGET_HARD_FLOAT
8356               && (mode == SFmode
8357                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8358             {
8359               *total = COSTS_N_INSNS (1);
8360               if (CONST_DOUBLE_P (XEXP (x, 1))
8361                   && arm_const_double_rtx (XEXP (x, 1)))
8362                 {
8363                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8364                   return true;
8365                 }
8366
8367               return false;
8368             }
8369
8370           *total = COSTS_N_INSNS (20);
8371           return false;
8372         }
8373
8374       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8375           || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8376         {
8377           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8378           if (REG_P (XEXP (XEXP (x, 0), 0))
8379               && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8380             *total += COSTS_N_INSNS (1);
8381           return true;
8382         }
8383
8384       /* Fall through */
8385
8386     case AND: case XOR: case IOR:
8387
8388       /* Normally the frame registers will be spilt into reg+const during
8389          reload, so it is a bad idea to combine them with other instructions,
8390          since then they might not be moved outside of loops.  As a compromise
8391          we allow integration with ops that have a constant as their second
8392          operand.  */
8393       if (REG_OR_SUBREG_REG (XEXP (x, 0))
8394           && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8395           && !CONST_INT_P (XEXP (x, 1)))
8396         *total = COSTS_N_INSNS (1);
8397
8398       if (mode == DImode)
8399         {
8400           *total += COSTS_N_INSNS (2);
8401           if (CONST_INT_P (XEXP (x, 1))
8402               && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8403             {
8404               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8405               return true;
8406             }
8407
8408           return false;
8409         }
8410
8411       *total += COSTS_N_INSNS (1);
8412       if (CONST_INT_P (XEXP (x, 1))
8413           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8414         {
8415           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8416           return true;
8417         }
8418       subcode = GET_CODE (XEXP (x, 0));
8419       if (subcode == ASHIFT || subcode == ASHIFTRT
8420           || subcode == LSHIFTRT
8421           || subcode == ROTATE || subcode == ROTATERT)
8422         {
8423           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8424           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8425           return true;
8426         }
8427
8428       if (subcode == MULT
8429           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8430         {
8431           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8432           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8433           return true;
8434         }
8435
8436       if (subcode == UMIN || subcode == UMAX
8437           || subcode == SMIN || subcode == SMAX)
8438         {
8439           *total = COSTS_N_INSNS (3);
8440           return true;
8441         }
8442
8443       return false;
8444
8445     case MULT:
8446       /* This should have been handled by the CPU specific routines.  */
8447       gcc_unreachable ();
8448
8449     case TRUNCATE:
8450       if (arm_arch3m && mode == SImode
8451           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8452           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8453           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8454               == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8455           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8456               || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8457         {
8458           *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8459           return true;
8460         }
8461       *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8462       return false;
8463
8464     case NEG:
8465       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8466         {
8467           if (TARGET_HARD_FLOAT
8468               && (mode == SFmode
8469                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8470             {
8471               *total = COSTS_N_INSNS (1);
8472               return false;
8473             }
8474           *total = COSTS_N_INSNS (2);
8475           return false;
8476         }
8477
8478       /* Fall through */
8479     case NOT:
8480       *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8481       if (mode == SImode && code == NOT)
8482         {
8483           subcode = GET_CODE (XEXP (x, 0));
8484           if (subcode == ASHIFT || subcode == ASHIFTRT
8485               || subcode == LSHIFTRT
8486               || subcode == ROTATE || subcode == ROTATERT
8487               || (subcode == MULT
8488                   && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8489             {
8490               *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8491               /* Register shifts cost an extra cycle.  */
8492               if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8493                 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8494                                                         subcode, 1, speed);
8495               return true;
8496             }
8497         }
8498
8499       return false;
8500
8501     case IF_THEN_ELSE:
8502       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8503         {
8504           *total = COSTS_N_INSNS (4);
8505           return true;
8506         }
8507
8508       operand = XEXP (x, 0);
8509
8510       if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8511              || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8512             && REG_P (XEXP (operand, 0))
8513             && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8514         *total += COSTS_N_INSNS (1);
8515       *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8516                  + rtx_cost (XEXP (x, 2), code, 2, speed));
8517       return true;
8518
8519     case NE:
8520       if (mode == SImode && XEXP (x, 1) == const0_rtx)
8521         {
8522           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8523           return true;
8524         }
8525       goto scc_insn;
8526
8527     case GE:
8528       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8529           && mode == SImode && XEXP (x, 1) == const0_rtx)
8530         {
8531           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8532           return true;
8533         }
8534       goto scc_insn;
8535
8536     case LT:
8537       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8538           && mode == SImode && XEXP (x, 1) == const0_rtx)
8539         {
8540           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8541           return true;
8542         }
8543       goto scc_insn;
8544
8545     case EQ:
8546     case GT:
8547     case LE:
8548     case GEU:
8549     case LTU:
8550     case GTU:
8551     case LEU:
8552     case UNORDERED:
8553     case ORDERED:
8554     case UNEQ:
8555     case UNGE:
8556     case UNLT:
8557     case UNGT:
8558     case UNLE:
8559     scc_insn:
8560       /* SCC insns.  In the case where the comparison has already been
8561          performed, then they cost 2 instructions.  Otherwise they need
8562          an additional comparison before them.  */
8563       *total = COSTS_N_INSNS (2);
8564       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8565         {
8566           return true;
8567         }
8568
8569       /* Fall through */
8570     case COMPARE:
8571       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8572         {
8573           *total = 0;
8574           return true;
8575         }
8576
8577       *total += COSTS_N_INSNS (1);
8578       if (CONST_INT_P (XEXP (x, 1))
8579           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8580         {
8581           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8582           return true;
8583         }
8584
8585       subcode = GET_CODE (XEXP (x, 0));
8586       if (subcode == ASHIFT || subcode == ASHIFTRT
8587           || subcode == LSHIFTRT
8588           || subcode == ROTATE || subcode == ROTATERT)
8589         {
8590           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8591           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8592           return true;
8593         }
8594
8595       if (subcode == MULT
8596           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8597         {
8598           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8599           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8600           return true;
8601         }
8602
8603       return false;
8604
8605     case UMIN:
8606     case UMAX:
8607     case SMIN:
8608     case SMAX:
8609       *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8610       if (!CONST_INT_P (XEXP (x, 1))
8611           || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8612         *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8613       return true;
8614
8615     case ABS:
8616       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8617         {
8618           if (TARGET_HARD_FLOAT
8619               && (mode == SFmode
8620                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8621             {
8622               *total = COSTS_N_INSNS (1);
8623               return false;
8624             }
8625           *total = COSTS_N_INSNS (20);
8626           return false;
8627         }
8628       *total = COSTS_N_INSNS (1);
8629       if (mode == DImode)
8630         *total += COSTS_N_INSNS (3);
8631       return false;
8632
8633     case SIGN_EXTEND:
8634     case ZERO_EXTEND:
8635       *total = 0;
8636       if (GET_MODE_CLASS (mode) == MODE_INT)
8637         {
8638           rtx op = XEXP (x, 0);
8639           machine_mode opmode = GET_MODE (op);
8640
8641           if (mode == DImode)
8642             *total += COSTS_N_INSNS (1);
8643
8644           if (opmode != SImode)
8645             {
8646               if (MEM_P (op))
8647                 {
8648                   /* If !arm_arch4, we use one of the extendhisi2_mem
8649                      or movhi_bytes patterns for HImode.  For a QImode
8650                      sign extension, we first zero-extend from memory
8651                      and then perform a shift sequence.  */
8652                   if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8653                     *total += COSTS_N_INSNS (2);
8654                 }
8655               else if (arm_arch6)
8656                 *total += COSTS_N_INSNS (1);
8657
8658               /* We don't have the necessary insn, so we need to perform some
8659                  other operation.  */
8660               else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8661                 /* An and with constant 255.  */
8662                 *total += COSTS_N_INSNS (1);
8663               else
8664                 /* A shift sequence.  Increase costs slightly to avoid
8665                    combining two shifts into an extend operation.  */
8666                 *total += COSTS_N_INSNS (2) + 1;
8667             }
8668
8669           return false;
8670         }
8671
8672       switch (GET_MODE (XEXP (x, 0)))
8673         {
8674         case V8QImode:
8675         case V4HImode:
8676         case V2SImode:
8677         case V4QImode:
8678         case V2HImode:
8679           *total = COSTS_N_INSNS (1);
8680           return false;
8681
8682         default:
8683           gcc_unreachable ();
8684         }
8685       gcc_unreachable ();
8686
8687     case ZERO_EXTRACT:
8688     case SIGN_EXTRACT:
8689       *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8690       return true;
8691
8692     case CONST_INT:
8693       if (const_ok_for_arm (INTVAL (x))
8694           || const_ok_for_arm (~INTVAL (x)))
8695         *total = COSTS_N_INSNS (1);
8696       else
8697         *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8698                                                   INTVAL (x), NULL_RTX,
8699                                                   NULL_RTX, 0, 0));
8700       return true;
8701
8702     case CONST:
8703     case LABEL_REF:
8704     case SYMBOL_REF:
8705       *total = COSTS_N_INSNS (3);
8706       return true;
8707
8708     case HIGH:
8709       *total = COSTS_N_INSNS (1);
8710       return true;
8711
8712     case LO_SUM:
8713       *total = COSTS_N_INSNS (1);
8714       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8715       return true;
8716
8717     case CONST_DOUBLE:
8718       if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8719           && (mode == SFmode || !TARGET_VFP_SINGLE))
8720         *total = COSTS_N_INSNS (1);
8721       else
8722         *total = COSTS_N_INSNS (4);
8723       return true;
8724
8725     case SET:
8726       /* The vec_extract patterns accept memory operands that require an
8727          address reload.  Account for the cost of that reload to give the
8728          auto-inc-dec pass an incentive to try to replace them.  */
8729       if (TARGET_NEON && MEM_P (SET_DEST (x))
8730           && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8731         {
8732           *total = rtx_cost (SET_DEST (x), code, 0, speed);
8733           if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8734             *total += COSTS_N_INSNS (1);
8735           return true;
8736         }
8737       /* Likewise for the vec_set patterns.  */
8738       if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8739           && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8740           && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8741         {
8742           rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8743           *total = rtx_cost (mem, code, 0, speed);
8744           if (!neon_vector_mem_operand (mem, 2, true))
8745             *total += COSTS_N_INSNS (1);
8746           return true;
8747         }
8748       return false;
8749
8750     case UNSPEC:
8751       /* We cost this as high as our memory costs to allow this to
8752          be hoisted from loops.  */
8753       if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8754         {
8755           *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8756         }
8757       return true;
8758
8759     case CONST_VECTOR:
8760       if (TARGET_NEON
8761           && TARGET_HARD_FLOAT
8762           && outer == SET
8763           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8764           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8765         *total = COSTS_N_INSNS (1);
8766       else
8767         *total = COSTS_N_INSNS (4);
8768       return true;
8769
8770     default:
8771       *total = COSTS_N_INSNS (4);
8772       return false;
8773     }
8774 }
8775
8776 /* Estimates the size cost of thumb1 instructions.
8777    For now most of the code is copied from thumb1_rtx_costs. We need more
8778    fine grain tuning when we have more related test cases.  */
8779 static inline int
8780 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8781 {
8782   machine_mode mode = GET_MODE (x);
8783   int words;
8784
8785   switch (code)
8786     {
8787     case ASHIFT:
8788     case ASHIFTRT:
8789     case LSHIFTRT:
8790     case ROTATERT:
8791       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8792
8793     case PLUS:
8794     case MINUS:
8795       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8796          defined by RTL expansion, especially for the expansion of
8797          multiplication.  */
8798       if ((GET_CODE (XEXP (x, 0)) == MULT
8799            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8800           || (GET_CODE (XEXP (x, 1)) == MULT
8801               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8802         return COSTS_N_INSNS (2);
8803       /* On purpose fall through for normal RTX.  */
8804     case COMPARE:
8805     case NEG:
8806     case NOT:
8807       return COSTS_N_INSNS (1);
8808
8809     case MULT:
8810       if (CONST_INT_P (XEXP (x, 1)))
8811         {
8812           /* Thumb1 mul instruction can't operate on const. We must Load it
8813              into a register first.  */
8814           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8815           /* For the targets which have a very small and high-latency multiply
8816              unit, we prefer to synthesize the mult with up to 5 instructions,
8817              giving a good balance between size and performance.  */
8818           if (arm_arch6m && arm_m_profile_small_mul)
8819             return COSTS_N_INSNS (5);
8820           else
8821             return COSTS_N_INSNS (1) + const_size;
8822         }
8823       return COSTS_N_INSNS (1);
8824
8825     case SET:
8826       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8827          the mode.  */
8828       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8829       return COSTS_N_INSNS (words)
8830              + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8831                                     || satisfies_constraint_K (SET_SRC (x))
8832                                        /* thumb1_movdi_insn.  */
8833                                     || ((words > 1) && MEM_P (SET_SRC (x))));
8834
8835     case CONST_INT:
8836       if (outer == SET)
8837         {
8838           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8839             return COSTS_N_INSNS (1);
8840           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
8841           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8842             return COSTS_N_INSNS (2);
8843           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
8844           if (thumb_shiftable_const (INTVAL (x)))
8845             return COSTS_N_INSNS (2);
8846           return COSTS_N_INSNS (3);
8847         }
8848       else if ((outer == PLUS || outer == COMPARE)
8849                && INTVAL (x) < 256 && INTVAL (x) > -256)
8850         return 0;
8851       else if ((outer == IOR || outer == XOR || outer == AND)
8852                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8853         return COSTS_N_INSNS (1);
8854       else if (outer == AND)
8855         {
8856           int i;
8857           /* This duplicates the tests in the andsi3 expander.  */
8858           for (i = 9; i <= 31; i++)
8859             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8860                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8861               return COSTS_N_INSNS (2);
8862         }
8863       else if (outer == ASHIFT || outer == ASHIFTRT
8864                || outer == LSHIFTRT)
8865         return 0;
8866       return COSTS_N_INSNS (2);
8867
8868     case CONST:
8869     case CONST_DOUBLE:
8870     case LABEL_REF:
8871     case SYMBOL_REF:
8872       return COSTS_N_INSNS (3);
8873
8874     case UDIV:
8875     case UMOD:
8876     case DIV:
8877     case MOD:
8878       return 100;
8879
8880     case TRUNCATE:
8881       return 99;
8882
8883     case AND:
8884     case XOR:
8885     case IOR:
8886       return COSTS_N_INSNS (1);
8887
8888     case MEM:
8889       return (COSTS_N_INSNS (1)
8890               + COSTS_N_INSNS (1)
8891                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8892               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8893                  ? COSTS_N_INSNS (1) : 0));
8894
8895     case IF_THEN_ELSE:
8896       /* XXX a guess.  */
8897       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8898         return 14;
8899       return 2;
8900
8901     case ZERO_EXTEND:
8902       /* XXX still guessing.  */
8903       switch (GET_MODE (XEXP (x, 0)))
8904         {
8905           case QImode:
8906             return (1 + (mode == DImode ? 4 : 0)
8907                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8908
8909           case HImode:
8910             return (4 + (mode == DImode ? 4 : 0)
8911                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8912
8913           case SImode:
8914             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8915
8916           default:
8917             return 99;
8918         }
8919
8920     default:
8921       return 99;
8922     }
8923 }
8924
8925 /* RTX costs when optimizing for size.  */
8926 static bool
8927 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8928                     int *total)
8929 {
8930   machine_mode mode = GET_MODE (x);
8931   if (TARGET_THUMB1)
8932     {
8933       *total = thumb1_size_rtx_costs (x, code, outer_code);
8934       return true;
8935     }
8936
8937   /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
8938   switch (code)
8939     {
8940     case MEM:
8941       /* A memory access costs 1 insn if the mode is small, or the address is
8942          a single register, otherwise it costs one insn per word.  */
8943       if (REG_P (XEXP (x, 0)))
8944         *total = COSTS_N_INSNS (1);
8945       else if (flag_pic
8946                && GET_CODE (XEXP (x, 0)) == PLUS
8947                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8948         /* This will be split into two instructions.
8949            See arm.md:calculate_pic_address.  */
8950         *total = COSTS_N_INSNS (2);
8951       else
8952         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8953       return true;
8954
8955     case DIV:
8956     case MOD:
8957     case UDIV:
8958     case UMOD:
8959       /* Needs a libcall, so it costs about this.  */
8960       *total = COSTS_N_INSNS (2);
8961       return false;
8962
8963     case ROTATE:
8964       if (mode == SImode && REG_P (XEXP (x, 1)))
8965         {
8966           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8967           return true;
8968         }
8969       /* Fall through */
8970     case ROTATERT:
8971     case ASHIFT:
8972     case LSHIFTRT:
8973     case ASHIFTRT:
8974       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8975         {
8976           *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8977           return true;
8978         }
8979       else if (mode == SImode)
8980         {
8981           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8982           /* Slightly disparage register shifts, but not by much.  */
8983           if (!CONST_INT_P (XEXP (x, 1)))
8984             *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8985           return true;
8986         }
8987
8988       /* Needs a libcall.  */
8989       *total = COSTS_N_INSNS (2);
8990       return false;
8991
8992     case MINUS:
8993       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8994           && (mode == SFmode || !TARGET_VFP_SINGLE))
8995         {
8996           *total = COSTS_N_INSNS (1);
8997           return false;
8998         }
8999
9000       if (mode == SImode)
9001         {
9002           enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9003           enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9004
9005           if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9006               || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9007               || subcode1 == ROTATE || subcode1 == ROTATERT
9008               || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9009               || subcode1 == ASHIFTRT)
9010             {
9011               /* It's just the cost of the two operands.  */
9012               *total = 0;
9013               return false;
9014             }
9015
9016           *total = COSTS_N_INSNS (1);
9017           return false;
9018         }
9019
9020       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9021       return false;
9022
9023     case PLUS:
9024       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9025           && (mode == SFmode || !TARGET_VFP_SINGLE))
9026         {
9027           *total = COSTS_N_INSNS (1);
9028           return false;
9029         }
9030
9031       /* A shift as a part of ADD costs nothing.  */
9032       if (GET_CODE (XEXP (x, 0)) == MULT
9033           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9034         {
9035           *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9036           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9037           *total += rtx_cost (XEXP (x, 1), code, 1, false);
9038           return true;
9039         }
9040
9041       /* Fall through */
9042     case AND: case XOR: case IOR:
9043       if (mode == SImode)
9044         {
9045           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9046
9047           if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9048               || subcode == LSHIFTRT || subcode == ASHIFTRT
9049               || (code == AND && subcode == NOT))
9050             {
9051               /* It's just the cost of the two operands.  */
9052               *total = 0;
9053               return false;
9054             }
9055         }
9056
9057       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9058       return false;
9059
9060     case MULT:
9061       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9062       return false;
9063
9064     case NEG:
9065       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9066           && (mode == SFmode || !TARGET_VFP_SINGLE))
9067         {
9068           *total = COSTS_N_INSNS (1);
9069           return false;
9070         }
9071
9072       /* Fall through */
9073     case NOT:
9074       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9075
9076       return false;
9077
9078     case IF_THEN_ELSE:
9079       *total = 0;
9080       return false;
9081
9082     case COMPARE:
9083       if (cc_register (XEXP (x, 0), VOIDmode))
9084         * total = 0;
9085       else
9086         *total = COSTS_N_INSNS (1);
9087       return false;
9088
9089     case ABS:
9090       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9091           && (mode == SFmode || !TARGET_VFP_SINGLE))
9092         *total = COSTS_N_INSNS (1);
9093       else
9094         *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9095       return false;
9096
9097     case SIGN_EXTEND:
9098     case ZERO_EXTEND:
9099       return arm_rtx_costs_1 (x, outer_code, total, 0);
9100
9101     case CONST_INT:
9102       if (const_ok_for_arm (INTVAL (x)))
9103         /* A multiplication by a constant requires another instruction
9104            to load the constant to a register.  */
9105         *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9106                                 ? 1 : 0);
9107       else if (const_ok_for_arm (~INTVAL (x)))
9108         *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9109       else if (const_ok_for_arm (-INTVAL (x)))
9110         {
9111           if (outer_code == COMPARE || outer_code == PLUS
9112               || outer_code == MINUS)
9113             *total = 0;
9114           else
9115             *total = COSTS_N_INSNS (1);
9116         }
9117       else
9118         *total = COSTS_N_INSNS (2);
9119       return true;
9120
9121     case CONST:
9122     case LABEL_REF:
9123     case SYMBOL_REF:
9124       *total = COSTS_N_INSNS (2);
9125       return true;
9126
9127     case CONST_DOUBLE:
9128       *total = COSTS_N_INSNS (4);
9129       return true;
9130
9131     case CONST_VECTOR:
9132       if (TARGET_NEON
9133           && TARGET_HARD_FLOAT
9134           && outer_code == SET
9135           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9136           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9137         *total = COSTS_N_INSNS (1);
9138       else
9139         *total = COSTS_N_INSNS (4);
9140       return true;
9141
9142     case HIGH:
9143     case LO_SUM:
9144       /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9145          cost of these slightly.  */
9146       *total = COSTS_N_INSNS (1) + 1;
9147       return true;
9148
9149     case SET:
9150       return false;
9151
9152     default:
9153       if (mode != VOIDmode)
9154         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9155       else
9156         *total = COSTS_N_INSNS (4); /* How knows?  */
9157       return false;
9158     }
9159 }
9160
9161 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9162    operand, then return the operand that is being shifted.  If the shift
9163    is not by a constant, then set SHIFT_REG to point to the operand.
9164    Return NULL if OP is not a shifter operand.  */
9165 static rtx
9166 shifter_op_p (rtx op, rtx *shift_reg)
9167 {
9168   enum rtx_code code = GET_CODE (op);
9169
9170   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9171       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9172     return XEXP (op, 0);
9173   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9174     return XEXP (op, 0);
9175   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9176            || code == ASHIFTRT)
9177     {
9178       if (!CONST_INT_P (XEXP (op, 1)))
9179         *shift_reg = XEXP (op, 1);
9180       return XEXP (op, 0);
9181     }
9182
9183   return NULL;
9184 }
9185
9186 static bool
9187 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9188 {
9189   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9190   gcc_assert (GET_CODE (x) == UNSPEC);
9191
9192   switch (XINT (x, 1))
9193     {
9194     case UNSPEC_UNALIGNED_LOAD:
9195       /* We can only do unaligned loads into the integer unit, and we can't
9196          use LDM or LDRD.  */
9197       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9198       if (speed_p)
9199         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9200                   + extra_cost->ldst.load_unaligned);
9201
9202 #ifdef NOT_YET
9203       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9204                                  ADDR_SPACE_GENERIC, speed_p);
9205 #endif
9206       return true;
9207
9208     case UNSPEC_UNALIGNED_STORE:
9209       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9210       if (speed_p)
9211         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9212                   + extra_cost->ldst.store_unaligned);
9213
9214       *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9215 #ifdef NOT_YET
9216       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9217                                  ADDR_SPACE_GENERIC, speed_p);
9218 #endif
9219       return true;
9220
9221     case UNSPEC_VRINTZ:
9222     case UNSPEC_VRINTP:
9223     case UNSPEC_VRINTM:
9224     case UNSPEC_VRINTR:
9225     case UNSPEC_VRINTX:
9226     case UNSPEC_VRINTA:
9227       *cost = COSTS_N_INSNS (1);
9228       if (speed_p)
9229         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9230
9231       return true;
9232     default:
9233       *cost = COSTS_N_INSNS (2);
9234       break;
9235     }
9236   return false;
9237 }
9238
9239 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9240    call (one insn for -Os) and then one for processing the result.  */
9241 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9242
9243 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9244         do                                                              \
9245           {                                                             \
9246             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9247             if (shift_op != NULL                                        \
9248                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9249               {                                                         \
9250                 if (shift_reg)                                          \
9251                   {                                                     \
9252                     if (speed_p)                                        \
9253                       *cost += extra_cost->alu.arith_shift_reg; \
9254                     *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);  \
9255                   }                                                     \
9256                 else if (speed_p)                                       \
9257                   *cost += extra_cost->alu.arith_shift;         \
9258                                                                         \
9259                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)     \
9260                           + rtx_cost (XEXP (x, 1 - IDX),                \
9261                                       OP, 1, speed_p));         \
9262                 return true;                                            \
9263               }                                                         \
9264           }                                                             \
9265         while (0);
9266
9267 /* RTX costs.  Make an estimate of the cost of executing the operation
9268    X, which is contained with an operation with code OUTER_CODE.
9269    SPEED_P indicates whether the cost desired is the performance cost,
9270    or the size cost.  The estimate is stored in COST and the return
9271    value is TRUE if the cost calculation is final, or FALSE if the
9272    caller should recurse through the operands of X to add additional
9273    costs.
9274
9275    We currently make no attempt to model the size savings of Thumb-2
9276    16-bit instructions.  At the normal points in compilation where
9277    this code is called we have no measure of whether the condition
9278    flags are live or not, and thus no realistic way to determine what
9279    the size will eventually be.  */
9280 static bool
9281 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9282                    const struct cpu_cost_table *extra_cost,
9283                    int *cost, bool speed_p)
9284 {
9285   machine_mode mode = GET_MODE (x);
9286
9287   if (TARGET_THUMB1)
9288     {
9289       if (speed_p)
9290         *cost = thumb1_rtx_costs (x, code, outer_code);
9291       else
9292         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9293       return true;
9294     }
9295
9296   switch (code)
9297     {
9298     case SET:
9299       *cost = 0;
9300       /* SET RTXs don't have a mode so we get it from the destination.  */
9301       mode = GET_MODE (SET_DEST (x));
9302
9303       if (REG_P (SET_SRC (x))
9304           && REG_P (SET_DEST (x)))
9305         {
9306           /* Assume that most copies can be done with a single insn,
9307              unless we don't have HW FP, in which case everything
9308              larger than word mode will require two insns.  */
9309           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9310                                    && GET_MODE_SIZE (mode) > 4)
9311                                   || mode == DImode)
9312                                  ? 2 : 1);
9313           /* Conditional register moves can be encoded
9314              in 16 bits in Thumb mode.  */
9315           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9316             *cost >>= 1;
9317
9318           return true;
9319         }
9320
9321       if (CONST_INT_P (SET_SRC (x)))
9322         {
9323           /* Handle CONST_INT here, since the value doesn't have a mode
9324              and we would otherwise be unable to work out the true cost.  */
9325           *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9326           outer_code = SET;
9327           /* Slightly lower the cost of setting a core reg to a constant.
9328              This helps break up chains and allows for better scheduling.  */
9329           if (REG_P (SET_DEST (x))
9330               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9331             *cost -= 1;
9332           x = SET_SRC (x);
9333           /* Immediate moves with an immediate in the range [0, 255] can be
9334              encoded in 16 bits in Thumb mode.  */
9335           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9336               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9337             *cost >>= 1;
9338           goto const_int_cost;
9339         }
9340
9341       return false;
9342
9343     case MEM:
9344       /* A memory access costs 1 insn if the mode is small, or the address is
9345          a single register, otherwise it costs one insn per word.  */
9346       if (REG_P (XEXP (x, 0)))
9347         *cost = COSTS_N_INSNS (1);
9348       else if (flag_pic
9349                && GET_CODE (XEXP (x, 0)) == PLUS
9350                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9351         /* This will be split into two instructions.
9352            See arm.md:calculate_pic_address.  */
9353         *cost = COSTS_N_INSNS (2);
9354       else
9355         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9356
9357       /* For speed optimizations, add the costs of the address and
9358          accessing memory.  */
9359       if (speed_p)
9360 #ifdef NOT_YET
9361         *cost += (extra_cost->ldst.load
9362                   + arm_address_cost (XEXP (x, 0), mode,
9363                                       ADDR_SPACE_GENERIC, speed_p));
9364 #else
9365         *cost += extra_cost->ldst.load;
9366 #endif
9367       return true;
9368
9369     case PARALLEL:
9370     {
9371    /* Calculations of LDM costs are complex.  We assume an initial cost
9372    (ldm_1st) which will load the number of registers mentioned in
9373    ldm_regs_per_insn_1st registers; then each additional
9374    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9375    formula for N regs is thus:
9376
9377    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9378                              + ldm_regs_per_insn_subsequent - 1)
9379                             / ldm_regs_per_insn_subsequent).
9380
9381    Additional costs may also be added for addressing.  A similar
9382    formula is used for STM.  */
9383
9384       bool is_ldm = load_multiple_operation (x, SImode);
9385       bool is_stm = store_multiple_operation (x, SImode);
9386
9387       *cost = COSTS_N_INSNS (1);
9388
9389       if (is_ldm || is_stm)
9390         {
9391           if (speed_p)
9392             {
9393               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9394               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9395                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9396                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9397               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9398                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9399                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9400
9401               *cost += regs_per_insn_1st
9402                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9403                                             + regs_per_insn_sub - 1)
9404                                           / regs_per_insn_sub);
9405               return true;
9406             }
9407
9408         }
9409       return false;
9410     }
9411     case DIV:
9412     case UDIV:
9413       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9414           && (mode == SFmode || !TARGET_VFP_SINGLE))
9415         *cost = COSTS_N_INSNS (speed_p
9416                                ? extra_cost->fp[mode != SFmode].div : 1);
9417       else if (mode == SImode && TARGET_IDIV)
9418         *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9419       else
9420         *cost = LIBCALL_COST (2);
9421       return false;     /* All arguments must be in registers.  */
9422
9423     case MOD:
9424     case UMOD:
9425       *cost = LIBCALL_COST (2);
9426       return false;     /* All arguments must be in registers.  */
9427
9428     case ROTATE:
9429       if (mode == SImode && REG_P (XEXP (x, 1)))
9430         {
9431           *cost = (COSTS_N_INSNS (2)
9432                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9433           if (speed_p)
9434             *cost += extra_cost->alu.shift_reg;
9435           return true;
9436         }
9437       /* Fall through */
9438     case ROTATERT:
9439     case ASHIFT:
9440     case LSHIFTRT:
9441     case ASHIFTRT:
9442       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9443         {
9444           *cost = (COSTS_N_INSNS (3)
9445                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9446           if (speed_p)
9447             *cost += 2 * extra_cost->alu.shift;
9448           return true;
9449         }
9450       else if (mode == SImode)
9451         {
9452           *cost = (COSTS_N_INSNS (1)
9453                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9454           /* Slightly disparage register shifts at -Os, but not by much.  */
9455           if (!CONST_INT_P (XEXP (x, 1)))
9456             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9457                       + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9458           return true;
9459         }
9460       else if (GET_MODE_CLASS (mode) == MODE_INT
9461                && GET_MODE_SIZE (mode) < 4)
9462         {
9463           if (code == ASHIFT)
9464             {
9465               *cost = (COSTS_N_INSNS (1)
9466                        + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9467               /* Slightly disparage register shifts at -Os, but not by
9468                  much.  */
9469               if (!CONST_INT_P (XEXP (x, 1)))
9470                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9471                           + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9472             }
9473           else if (code == LSHIFTRT || code == ASHIFTRT)
9474             {
9475               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9476                 {
9477                   /* Can use SBFX/UBFX.  */
9478                   *cost = COSTS_N_INSNS (1);
9479                   if (speed_p)
9480                     *cost += extra_cost->alu.bfx;
9481                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9482                 }
9483               else
9484                 {
9485                   *cost = COSTS_N_INSNS (2);
9486                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9487                   if (speed_p)
9488                     {
9489                       if (CONST_INT_P (XEXP (x, 1)))
9490                         *cost += 2 * extra_cost->alu.shift;
9491                       else
9492                         *cost += (extra_cost->alu.shift
9493                                   + extra_cost->alu.shift_reg);
9494                     }
9495                   else
9496                     /* Slightly disparage register shifts.  */
9497                     *cost += !CONST_INT_P (XEXP (x, 1));
9498                 }
9499             }
9500           else /* Rotates.  */
9501             {
9502               *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9503               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9504               if (speed_p)
9505                 {
9506                   if (CONST_INT_P (XEXP (x, 1)))
9507                     *cost += (2 * extra_cost->alu.shift
9508                               + extra_cost->alu.log_shift);
9509                   else
9510                     *cost += (extra_cost->alu.shift
9511                               + extra_cost->alu.shift_reg
9512                               + extra_cost->alu.log_shift_reg);
9513                 }
9514             }
9515           return true;
9516         }
9517
9518       *cost = LIBCALL_COST (2);
9519       return false;
9520
9521     case BSWAP:
9522       if (arm_arch6)
9523         {
9524           if (mode == SImode)
9525             {
9526               *cost = COSTS_N_INSNS (1);
9527               if (speed_p)
9528                 *cost += extra_cost->alu.rev;
9529
9530               return false;
9531             }
9532         }
9533       else
9534         {
9535         /* No rev instruction available.  Look at arm_legacy_rev
9536            and thumb_legacy_rev for the form of RTL used then.  */
9537           if (TARGET_THUMB)
9538             {
9539               *cost = COSTS_N_INSNS (10);
9540
9541               if (speed_p)
9542                 {
9543                   *cost += 6 * extra_cost->alu.shift;
9544                   *cost += 3 * extra_cost->alu.logical;
9545                 }
9546             }
9547           else
9548             {
9549               *cost = COSTS_N_INSNS (5);
9550
9551               if (speed_p)
9552                 {
9553                   *cost += 2 * extra_cost->alu.shift;
9554                   *cost += extra_cost->alu.arith_shift;
9555                   *cost += 2 * extra_cost->alu.logical;
9556                 }
9557             }
9558           return true;
9559         }
9560       return false;
9561
9562     case MINUS:
9563       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9564           && (mode == SFmode || !TARGET_VFP_SINGLE))
9565         {
9566           *cost = COSTS_N_INSNS (1);
9567           if (GET_CODE (XEXP (x, 0)) == MULT
9568               || GET_CODE (XEXP (x, 1)) == MULT)
9569             {
9570               rtx mul_op0, mul_op1, sub_op;
9571
9572               if (speed_p)
9573                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9574
9575               if (GET_CODE (XEXP (x, 0)) == MULT)
9576                 {
9577                   mul_op0 = XEXP (XEXP (x, 0), 0);
9578                   mul_op1 = XEXP (XEXP (x, 0), 1);
9579                   sub_op = XEXP (x, 1);
9580                 }
9581               else
9582                 {
9583                   mul_op0 = XEXP (XEXP (x, 1), 0);
9584                   mul_op1 = XEXP (XEXP (x, 1), 1);
9585                   sub_op = XEXP (x, 0);
9586                 }
9587
9588               /* The first operand of the multiply may be optionally
9589                  negated.  */
9590               if (GET_CODE (mul_op0) == NEG)
9591                 mul_op0 = XEXP (mul_op0, 0);
9592
9593               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9594                         + rtx_cost (mul_op1, code, 0, speed_p)
9595                         + rtx_cost (sub_op, code, 0, speed_p));
9596
9597               return true;
9598             }
9599
9600           if (speed_p)
9601             *cost += extra_cost->fp[mode != SFmode].addsub;
9602           return false;
9603         }
9604
9605       if (mode == SImode)
9606         {
9607           rtx shift_by_reg = NULL;
9608           rtx shift_op;
9609           rtx non_shift_op;
9610
9611           *cost = COSTS_N_INSNS (1);
9612
9613           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9614           if (shift_op == NULL)
9615             {
9616               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9617               non_shift_op = XEXP (x, 0);
9618             }
9619           else
9620             non_shift_op = XEXP (x, 1);
9621
9622           if (shift_op != NULL)
9623             {
9624               if (shift_by_reg != NULL)
9625                 {
9626                   if (speed_p)
9627                     *cost += extra_cost->alu.arith_shift_reg;
9628                   *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9629                 }
9630               else if (speed_p)
9631                 *cost += extra_cost->alu.arith_shift;
9632
9633               *cost += (rtx_cost (shift_op, code, 0, speed_p)
9634                         + rtx_cost (non_shift_op, code, 0, speed_p));
9635               return true;
9636             }
9637
9638           if (arm_arch_thumb2
9639               && GET_CODE (XEXP (x, 1)) == MULT)
9640             {
9641               /* MLS.  */
9642               if (speed_p)
9643                 *cost += extra_cost->mult[0].add;
9644               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9645                         + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9646                         + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9647               return true;
9648             }
9649
9650           if (CONST_INT_P (XEXP (x, 0)))
9651             {
9652               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9653                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9654                                             NULL_RTX, 1, 0);
9655               *cost = COSTS_N_INSNS (insns);
9656               if (speed_p)
9657                 *cost += insns * extra_cost->alu.arith;
9658               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9659               return true;
9660             }
9661           else if (speed_p)
9662             *cost += extra_cost->alu.arith;
9663
9664           return false;
9665         }
9666
9667       if (GET_MODE_CLASS (mode) == MODE_INT
9668           && GET_MODE_SIZE (mode) < 4)
9669         {
9670           rtx shift_op, shift_reg;
9671           shift_reg = NULL;
9672
9673           /* We check both sides of the MINUS for shifter operands since,
9674              unlike PLUS, it's not commutative.  */
9675
9676           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9677           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9678
9679           /* Slightly disparage, as we might need to widen the result.  */
9680           *cost = 1 + COSTS_N_INSNS (1);
9681           if (speed_p)
9682             *cost += extra_cost->alu.arith;
9683
9684           if (CONST_INT_P (XEXP (x, 0)))
9685             {
9686               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9687               return true;
9688             }
9689
9690           return false;
9691         }
9692
9693       if (mode == DImode)
9694         {
9695           *cost = COSTS_N_INSNS (2);
9696
9697           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9698             {
9699               rtx op1 = XEXP (x, 1);
9700
9701               if (speed_p)
9702                 *cost += 2 * extra_cost->alu.arith;
9703
9704               if (GET_CODE (op1) == ZERO_EXTEND)
9705                 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9706               else
9707                 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9708               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9709                                  0, speed_p);
9710               return true;
9711             }
9712           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9713             {
9714               if (speed_p)
9715                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9716               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9717                                   0, speed_p)
9718                         + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9719               return true;
9720             }
9721           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9722                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9723             {
9724               if (speed_p)
9725                 *cost += (extra_cost->alu.arith
9726                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9727                              ? extra_cost->alu.arith
9728                              : extra_cost->alu.arith_shift));
9729               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9730                         + rtx_cost (XEXP (XEXP (x, 1), 0),
9731                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9732               return true;
9733             }
9734
9735           if (speed_p)
9736             *cost += 2 * extra_cost->alu.arith;
9737           return false;
9738         }
9739
9740       /* Vector mode?  */
9741
9742       *cost = LIBCALL_COST (2);
9743       return false;
9744
9745     case PLUS:
9746       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9747           && (mode == SFmode || !TARGET_VFP_SINGLE))
9748         {
9749           *cost = COSTS_N_INSNS (1);
9750           if (GET_CODE (XEXP (x, 0)) == MULT)
9751             {
9752               rtx mul_op0, mul_op1, add_op;
9753
9754               if (speed_p)
9755                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9756
9757               mul_op0 = XEXP (XEXP (x, 0), 0);
9758               mul_op1 = XEXP (XEXP (x, 0), 1);
9759               add_op = XEXP (x, 1);
9760
9761               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9762                         + rtx_cost (mul_op1, code, 0, speed_p)
9763                         + rtx_cost (add_op, code, 0, speed_p));
9764
9765               return true;
9766             }
9767
9768           if (speed_p)
9769             *cost += extra_cost->fp[mode != SFmode].addsub;
9770           return false;
9771         }
9772       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9773         {
9774           *cost = LIBCALL_COST (2);
9775           return false;
9776         }
9777
9778         /* Narrow modes can be synthesized in SImode, but the range
9779            of useful sub-operations is limited.  Check for shift operations
9780            on one of the operands.  Only left shifts can be used in the
9781            narrow modes.  */
9782       if (GET_MODE_CLASS (mode) == MODE_INT
9783           && GET_MODE_SIZE (mode) < 4)
9784         {
9785           rtx shift_op, shift_reg;
9786           shift_reg = NULL;
9787
9788           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9789
9790           if (CONST_INT_P (XEXP (x, 1)))
9791             {
9792               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9793                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9794                                             NULL_RTX, 1, 0);
9795               *cost = COSTS_N_INSNS (insns);
9796               if (speed_p)
9797                 *cost += insns * extra_cost->alu.arith;
9798               /* Slightly penalize a narrow operation as the result may
9799                  need widening.  */
9800               *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9801               return true;
9802             }
9803
9804           /* Slightly penalize a narrow operation as the result may
9805              need widening.  */
9806           *cost = 1 + COSTS_N_INSNS (1);
9807           if (speed_p)
9808             *cost += extra_cost->alu.arith;
9809
9810           return false;
9811         }
9812
9813       if (mode == SImode)
9814         {
9815           rtx shift_op, shift_reg;
9816
9817           *cost = COSTS_N_INSNS (1);
9818           if (TARGET_INT_SIMD
9819               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9820                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9821             {
9822               /* UXTA[BH] or SXTA[BH].  */
9823               if (speed_p)
9824                 *cost += extra_cost->alu.extend_arith;
9825               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9826                                   speed_p)
9827                         + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9828               return true;
9829             }
9830
9831           shift_reg = NULL;
9832           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9833           if (shift_op != NULL)
9834             {
9835               if (shift_reg)
9836                 {
9837                   if (speed_p)
9838                     *cost += extra_cost->alu.arith_shift_reg;
9839                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9840                 }
9841               else if (speed_p)
9842                 *cost += extra_cost->alu.arith_shift;
9843
9844               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9845                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9846               return true;
9847             }
9848           if (GET_CODE (XEXP (x, 0)) == MULT)
9849             {
9850               rtx mul_op = XEXP (x, 0);
9851
9852               *cost = COSTS_N_INSNS (1);
9853
9854               if (TARGET_DSP_MULTIPLY
9855                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9856                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9857                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9858                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9859                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9860                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9861                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9862                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9863                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9864                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9865                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9866                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9867                                       == 16))))))
9868                 {
9869                   /* SMLA[BT][BT].  */
9870                   if (speed_p)
9871                     *cost += extra_cost->mult[0].extend_add;
9872                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9873                                       SIGN_EXTEND, 0, speed_p)
9874                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9875                                         SIGN_EXTEND, 0, speed_p)
9876                             + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9877                   return true;
9878                 }
9879
9880               if (speed_p)
9881                 *cost += extra_cost->mult[0].add;
9882               *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9883                         + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9884                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9885               return true;
9886             }
9887           if (CONST_INT_P (XEXP (x, 1)))
9888             {
9889               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9890                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9891                                             NULL_RTX, 1, 0);
9892               *cost = COSTS_N_INSNS (insns);
9893               if (speed_p)
9894                 *cost += insns * extra_cost->alu.arith;
9895               *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9896               return true;
9897             }
9898           else if (speed_p)
9899             *cost += extra_cost->alu.arith;
9900
9901           return false;
9902         }
9903
9904       if (mode == DImode)
9905         {
9906           if (arm_arch3m
9907               && GET_CODE (XEXP (x, 0)) == MULT
9908               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9909                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9910                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9911                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9912             {
9913               *cost = COSTS_N_INSNS (1);
9914               if (speed_p)
9915                 *cost += extra_cost->mult[1].extend_add;
9916               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9917                                   ZERO_EXTEND, 0, speed_p)
9918                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9919                                     ZERO_EXTEND, 0, speed_p)
9920                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9921               return true;
9922             }
9923
9924           *cost = COSTS_N_INSNS (2);
9925
9926           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9927               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9928             {
9929               if (speed_p)
9930                 *cost += (extra_cost->alu.arith
9931                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9932                              ? extra_cost->alu.arith
9933                              : extra_cost->alu.arith_shift));
9934
9935               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9936                                   speed_p)
9937                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9938               return true;
9939             }
9940
9941           if (speed_p)
9942             *cost += 2 * extra_cost->alu.arith;
9943           return false;
9944         }
9945
9946       /* Vector mode?  */
9947       *cost = LIBCALL_COST (2);
9948       return false;
9949     case IOR:
9950       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9951         {
9952           *cost = COSTS_N_INSNS (1);
9953           if (speed_p)
9954             *cost += extra_cost->alu.rev;
9955
9956           return true;
9957         }
9958     /* Fall through.  */
9959     case AND: case XOR:
9960       if (mode == SImode)
9961         {
9962           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9963           rtx op0 = XEXP (x, 0);
9964           rtx shift_op, shift_reg;
9965
9966           *cost = COSTS_N_INSNS (1);
9967
9968           if (subcode == NOT
9969               && (code == AND
9970                   || (code == IOR && TARGET_THUMB2)))
9971             op0 = XEXP (op0, 0);
9972
9973           shift_reg = NULL;
9974           shift_op = shifter_op_p (op0, &shift_reg);
9975           if (shift_op != NULL)
9976             {
9977               if (shift_reg)
9978                 {
9979                   if (speed_p)
9980                     *cost += extra_cost->alu.log_shift_reg;
9981                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9982                 }
9983               else if (speed_p)
9984                 *cost += extra_cost->alu.log_shift;
9985
9986               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9987                         + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9988               return true;
9989             }
9990
9991           if (CONST_INT_P (XEXP (x, 1)))
9992             {
9993               int insns = arm_gen_constant (code, SImode, NULL_RTX,
9994                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9995                                             NULL_RTX, 1, 0);
9996
9997               *cost = COSTS_N_INSNS (insns);
9998               if (speed_p)
9999                 *cost += insns * extra_cost->alu.logical;
10000               *cost += rtx_cost (op0, code, 0, speed_p);
10001               return true;
10002             }
10003
10004           if (speed_p)
10005             *cost += extra_cost->alu.logical;
10006           *cost += (rtx_cost (op0, code, 0, speed_p)
10007                     + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10008           return true;
10009         }
10010
10011       if (mode == DImode)
10012         {
10013           rtx op0 = XEXP (x, 0);
10014           enum rtx_code subcode = GET_CODE (op0);
10015
10016           *cost = COSTS_N_INSNS (2);
10017
10018           if (subcode == NOT
10019               && (code == AND
10020                   || (code == IOR && TARGET_THUMB2)))
10021             op0 = XEXP (op0, 0);
10022
10023           if (GET_CODE (op0) == ZERO_EXTEND)
10024             {
10025               if (speed_p)
10026                 *cost += 2 * extra_cost->alu.logical;
10027
10028               *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10029                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10030               return true;
10031             }
10032           else if (GET_CODE (op0) == SIGN_EXTEND)
10033             {
10034               if (speed_p)
10035                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10036
10037               *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10038                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10039               return true;
10040             }
10041
10042           if (speed_p)
10043             *cost += 2 * extra_cost->alu.logical;
10044
10045           return true;
10046         }
10047       /* Vector mode?  */
10048
10049       *cost = LIBCALL_COST (2);
10050       return false;
10051
10052     case MULT:
10053       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10054           && (mode == SFmode || !TARGET_VFP_SINGLE))
10055         {
10056           rtx op0 = XEXP (x, 0);
10057
10058           *cost = COSTS_N_INSNS (1);
10059
10060           if (GET_CODE (op0) == NEG)
10061             op0 = XEXP (op0, 0);
10062
10063           if (speed_p)
10064             *cost += extra_cost->fp[mode != SFmode].mult;
10065
10066           *cost += (rtx_cost (op0, MULT, 0, speed_p)
10067                     + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10068           return true;
10069         }
10070       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10071         {
10072           *cost = LIBCALL_COST (2);
10073           return false;
10074         }
10075
10076       if (mode == SImode)
10077         {
10078           *cost = COSTS_N_INSNS (1);
10079           if (TARGET_DSP_MULTIPLY
10080               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10081                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10082                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10083                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10084                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10085                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10086                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10087                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10088                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10089                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10090                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10091                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10092                                   == 16))))))
10093             {
10094               /* SMUL[TB][TB].  */
10095               if (speed_p)
10096                 *cost += extra_cost->mult[0].extend;
10097               *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10098                         + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10099               return true;
10100             }
10101           if (speed_p)
10102             *cost += extra_cost->mult[0].simple;
10103           return false;
10104         }
10105
10106       if (mode == DImode)
10107         {
10108           if (arm_arch3m
10109               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10110                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10111                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10112                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10113             {
10114               *cost = COSTS_N_INSNS (1);
10115               if (speed_p)
10116                 *cost += extra_cost->mult[1].extend;
10117               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10118                                   ZERO_EXTEND, 0, speed_p)
10119                         + rtx_cost (XEXP (XEXP (x, 1), 0),
10120                                     ZERO_EXTEND, 0, speed_p));
10121               return true;
10122             }
10123
10124           *cost = LIBCALL_COST (2);
10125           return false;
10126         }
10127
10128       /* Vector mode?  */
10129       *cost = LIBCALL_COST (2);
10130       return false;
10131
10132     case NEG:
10133       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10134           && (mode == SFmode || !TARGET_VFP_SINGLE))
10135         {
10136           *cost = COSTS_N_INSNS (1);
10137           if (speed_p)
10138             *cost += extra_cost->fp[mode != SFmode].neg;
10139
10140           return false;
10141         }
10142       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10143         {
10144           *cost = LIBCALL_COST (1);
10145           return false;
10146         }
10147
10148       if (mode == SImode)
10149         {
10150           if (GET_CODE (XEXP (x, 0)) == ABS)
10151             {
10152               *cost = COSTS_N_INSNS (2);
10153               /* Assume the non-flag-changing variant.  */
10154               if (speed_p)
10155                 *cost += (extra_cost->alu.log_shift
10156                           + extra_cost->alu.arith_shift);
10157               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10158               return true;
10159             }
10160
10161           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10162               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10163             {
10164               *cost = COSTS_N_INSNS (2);
10165               /* No extra cost for MOV imm and MVN imm.  */
10166               /* If the comparison op is using the flags, there's no further
10167                  cost, otherwise we need to add the cost of the comparison.  */
10168               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10169                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10170                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10171                 {
10172                   *cost += (COSTS_N_INSNS (1)
10173                             + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10174                                         speed_p)
10175                             + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10176                                         speed_p));
10177                   if (speed_p)
10178                     *cost += extra_cost->alu.arith;
10179                 }
10180               return true;
10181             }
10182           *cost = COSTS_N_INSNS (1);
10183           if (speed_p)
10184             *cost += extra_cost->alu.arith;
10185           return false;
10186         }
10187
10188       if (GET_MODE_CLASS (mode) == MODE_INT
10189           && GET_MODE_SIZE (mode) < 4)
10190         {
10191           /* Slightly disparage, as we might need an extend operation.  */
10192           *cost = 1 + COSTS_N_INSNS (1);
10193           if (speed_p)
10194             *cost += extra_cost->alu.arith;
10195           return false;
10196         }
10197
10198       if (mode == DImode)
10199         {
10200           *cost = COSTS_N_INSNS (2);
10201           if (speed_p)
10202             *cost += 2 * extra_cost->alu.arith;
10203           return false;
10204         }
10205
10206       /* Vector mode?  */
10207       *cost = LIBCALL_COST (1);
10208       return false;
10209
10210     case NOT:
10211       if (mode == SImode)
10212         {
10213           rtx shift_op;
10214           rtx shift_reg = NULL;
10215
10216           *cost = COSTS_N_INSNS (1);
10217           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10218
10219           if (shift_op)
10220             {
10221               if (shift_reg != NULL)
10222                 {
10223                   if (speed_p)
10224                     *cost += extra_cost->alu.log_shift_reg;
10225                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10226                 }
10227               else if (speed_p)
10228                 *cost += extra_cost->alu.log_shift;
10229               *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10230               return true;
10231             }
10232
10233           if (speed_p)
10234             *cost += extra_cost->alu.logical;
10235           return false;
10236         }
10237       if (mode == DImode)
10238         {
10239           *cost = COSTS_N_INSNS (2);
10240           return false;
10241         }
10242
10243       /* Vector mode?  */
10244
10245       *cost += LIBCALL_COST (1);
10246       return false;
10247
10248     case IF_THEN_ELSE:
10249       {
10250         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10251           {
10252             *cost = COSTS_N_INSNS (4);
10253             return true;
10254           }
10255         int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10256         int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10257
10258         *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10259         /* Assume that if one arm of the if_then_else is a register,
10260            that it will be tied with the result and eliminate the
10261            conditional insn.  */
10262         if (REG_P (XEXP (x, 1)))
10263           *cost += op2cost;
10264         else if (REG_P (XEXP (x, 2)))
10265           *cost += op1cost;
10266         else
10267           {
10268             if (speed_p)
10269               {
10270                 if (extra_cost->alu.non_exec_costs_exec)
10271                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10272                 else
10273                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10274               }
10275             else
10276               *cost += op1cost + op2cost;
10277           }
10278       }
10279       return true;
10280
10281     case COMPARE:
10282       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10283         *cost = 0;
10284       else
10285         {
10286           machine_mode op0mode;
10287           /* We'll mostly assume that the cost of a compare is the cost of the
10288              LHS.  However, there are some notable exceptions.  */
10289
10290           /* Floating point compares are never done as side-effects.  */
10291           op0mode = GET_MODE (XEXP (x, 0));
10292           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10293               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10294             {
10295               *cost = COSTS_N_INSNS (1);
10296               if (speed_p)
10297                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10298
10299               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10300                 {
10301                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10302                   return true;
10303                 }
10304
10305               return false;
10306             }
10307           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10308             {
10309               *cost = LIBCALL_COST (2);
10310               return false;
10311             }
10312
10313           /* DImode compares normally take two insns.  */
10314           if (op0mode == DImode)
10315             {
10316               *cost = COSTS_N_INSNS (2);
10317               if (speed_p)
10318                 *cost += 2 * extra_cost->alu.arith;
10319               return false;
10320             }
10321
10322           if (op0mode == SImode)
10323             {
10324               rtx shift_op;
10325               rtx shift_reg;
10326
10327               if (XEXP (x, 1) == const0_rtx
10328                   && !(REG_P (XEXP (x, 0))
10329                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10330                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10331                 {
10332                   *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10333
10334                   /* Multiply operations that set the flags are often
10335                      significantly more expensive.  */
10336                   if (speed_p
10337                       && GET_CODE (XEXP (x, 0)) == MULT
10338                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10339                     *cost += extra_cost->mult[0].flag_setting;
10340
10341                   if (speed_p
10342                       && GET_CODE (XEXP (x, 0)) == PLUS
10343                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10344                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10345                                                             0), 1), mode))
10346                     *cost += extra_cost->mult[0].flag_setting;
10347                   return true;
10348                 }
10349
10350               shift_reg = NULL;
10351               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10352               if (shift_op != NULL)
10353                 {
10354                   *cost = COSTS_N_INSNS (1);
10355                   if (shift_reg != NULL)
10356                     {
10357                       *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10358                       if (speed_p)
10359                         *cost += extra_cost->alu.arith_shift_reg;
10360                     }
10361                   else if (speed_p)
10362                     *cost += extra_cost->alu.arith_shift;
10363                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10364                             + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10365                   return true;
10366                 }
10367
10368               *cost = COSTS_N_INSNS (1);
10369               if (speed_p)
10370                 *cost += extra_cost->alu.arith;
10371               if (CONST_INT_P (XEXP (x, 1))
10372                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10373                 {
10374                   *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10375                   return true;
10376                 }
10377               return false;
10378             }
10379
10380           /* Vector mode?  */
10381
10382           *cost = LIBCALL_COST (2);
10383           return false;
10384         }
10385       return true;
10386
10387     case EQ:
10388     case NE:
10389     case LT:
10390     case LE:
10391     case GT:
10392     case GE:
10393     case LTU:
10394     case LEU:
10395     case GEU:
10396     case GTU:
10397     case ORDERED:
10398     case UNORDERED:
10399     case UNEQ:
10400     case UNLE:
10401     case UNLT:
10402     case UNGE:
10403     case UNGT:
10404     case LTGT:
10405       if (outer_code == SET)
10406         {
10407           /* Is it a store-flag operation?  */
10408           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10409               && XEXP (x, 1) == const0_rtx)
10410             {
10411               /* Thumb also needs an IT insn.  */
10412               *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10413               return true;
10414             }
10415           if (XEXP (x, 1) == const0_rtx)
10416             {
10417               switch (code)
10418                 {
10419                 case LT:
10420                   /* LSR Rd, Rn, #31.  */
10421                   *cost = COSTS_N_INSNS (1);
10422                   if (speed_p)
10423                     *cost += extra_cost->alu.shift;
10424                   break;
10425
10426                 case EQ:
10427                   /* RSBS T1, Rn, #0
10428                      ADC  Rd, Rn, T1.  */
10429
10430                 case NE:
10431                   /* SUBS T1, Rn, #1
10432                      SBC  Rd, Rn, T1.  */
10433                   *cost = COSTS_N_INSNS (2);
10434                   break;
10435
10436                 case LE:
10437                   /* RSBS T1, Rn, Rn, LSR #31
10438                      ADC  Rd, Rn, T1. */
10439                   *cost = COSTS_N_INSNS (2);
10440                   if (speed_p)
10441                     *cost += extra_cost->alu.arith_shift;
10442                   break;
10443
10444                 case GT:
10445                   /* RSB  Rd, Rn, Rn, ASR #1
10446                      LSR  Rd, Rd, #31.  */
10447                   *cost = COSTS_N_INSNS (2);
10448                   if (speed_p)
10449                     *cost += (extra_cost->alu.arith_shift
10450                               + extra_cost->alu.shift);
10451                   break;
10452
10453                 case GE:
10454                   /* ASR  Rd, Rn, #31
10455                      ADD  Rd, Rn, #1.  */
10456                   *cost = COSTS_N_INSNS (2);
10457                   if (speed_p)
10458                     *cost += extra_cost->alu.shift;
10459                   break;
10460
10461                 default:
10462                   /* Remaining cases are either meaningless or would take
10463                      three insns anyway.  */
10464                   *cost = COSTS_N_INSNS (3);
10465                   break;
10466                 }
10467               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10468               return true;
10469             }
10470           else
10471             {
10472               *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10473               if (CONST_INT_P (XEXP (x, 1))
10474                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10475                 {
10476                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10477                   return true;
10478                 }
10479
10480               return false;
10481             }
10482         }
10483       /* Not directly inside a set.  If it involves the condition code
10484          register it must be the condition for a branch, cond_exec or
10485          I_T_E operation.  Since the comparison is performed elsewhere
10486          this is just the control part which has no additional
10487          cost.  */
10488       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10489                && XEXP (x, 1) == const0_rtx)
10490         {
10491           *cost = 0;
10492           return true;
10493         }
10494       return false;
10495
10496     case ABS:
10497       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10498           && (mode == SFmode || !TARGET_VFP_SINGLE))
10499         {
10500           *cost = COSTS_N_INSNS (1);
10501           if (speed_p)
10502             *cost += extra_cost->fp[mode != SFmode].neg;
10503
10504           return false;
10505         }
10506       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10507         {
10508           *cost = LIBCALL_COST (1);
10509           return false;
10510         }
10511
10512       if (mode == SImode)
10513         {
10514           *cost = COSTS_N_INSNS (1);
10515           if (speed_p)
10516             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10517           return false;
10518         }
10519       /* Vector mode?  */
10520       *cost = LIBCALL_COST (1);
10521       return false;
10522
10523     case SIGN_EXTEND:
10524       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10525           && MEM_P (XEXP (x, 0)))
10526         {
10527           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10528
10529           if (mode == DImode)
10530             *cost += COSTS_N_INSNS (1);
10531
10532           if (!speed_p)
10533             return true;
10534
10535           if (GET_MODE (XEXP (x, 0)) == SImode)
10536             *cost += extra_cost->ldst.load;
10537           else
10538             *cost += extra_cost->ldst.load_sign_extend;
10539
10540           if (mode == DImode)
10541             *cost += extra_cost->alu.shift;
10542
10543           return true;
10544         }
10545
10546       /* Widening from less than 32-bits requires an extend operation.  */
10547       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10548         {
10549           /* We have SXTB/SXTH.  */
10550           *cost = COSTS_N_INSNS (1);
10551           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10552           if (speed_p)
10553             *cost += extra_cost->alu.extend;
10554         }
10555       else if (GET_MODE (XEXP (x, 0)) != SImode)
10556         {
10557           /* Needs two shifts.  */
10558           *cost = COSTS_N_INSNS (2);
10559           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10560           if (speed_p)
10561             *cost += 2 * extra_cost->alu.shift;
10562         }
10563
10564       /* Widening beyond 32-bits requires one more insn.  */
10565       if (mode == DImode)
10566         {
10567           *cost += COSTS_N_INSNS (1);
10568           if (speed_p)
10569             *cost += extra_cost->alu.shift;
10570         }
10571
10572       return true;
10573
10574     case ZERO_EXTEND:
10575       if ((arm_arch4
10576            || GET_MODE (XEXP (x, 0)) == SImode
10577            || GET_MODE (XEXP (x, 0)) == QImode)
10578           && MEM_P (XEXP (x, 0)))
10579         {
10580           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10581
10582           if (mode == DImode)
10583             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10584
10585           return true;
10586         }
10587
10588       /* Widening from less than 32-bits requires an extend operation.  */
10589       if (GET_MODE (XEXP (x, 0)) == QImode)
10590         {
10591           /* UXTB can be a shorter instruction in Thumb2, but it might
10592              be slower than the AND Rd, Rn, #255 alternative.  When
10593              optimizing for speed it should never be slower to use
10594              AND, and we don't really model 16-bit vs 32-bit insns
10595              here.  */
10596           *cost = COSTS_N_INSNS (1);
10597           if (speed_p)
10598             *cost += extra_cost->alu.logical;
10599         }
10600       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10601         {
10602           /* We have UXTB/UXTH.  */
10603           *cost = COSTS_N_INSNS (1);
10604           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10605           if (speed_p)
10606             *cost += extra_cost->alu.extend;
10607         }
10608       else if (GET_MODE (XEXP (x, 0)) != SImode)
10609         {
10610           /* Needs two shifts.  It's marginally preferable to use
10611              shifts rather than two BIC instructions as the second
10612              shift may merge with a subsequent insn as a shifter
10613              op.  */
10614           *cost = COSTS_N_INSNS (2);
10615           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10616           if (speed_p)
10617             *cost += 2 * extra_cost->alu.shift;
10618         }
10619       else  /* GET_MODE (XEXP (x, 0)) == SImode.  */
10620         *cost = COSTS_N_INSNS (1);
10621
10622       /* Widening beyond 32-bits requires one more insn.  */
10623       if (mode == DImode)
10624         {
10625           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10626         }
10627
10628       return true;
10629
10630     case CONST_INT:
10631       *cost = 0;
10632       /* CONST_INT has no mode, so we cannot tell for sure how many
10633          insns are really going to be needed.  The best we can do is
10634          look at the value passed.  If it fits in SImode, then assume
10635          that's the mode it will be used for.  Otherwise assume it
10636          will be used in DImode.  */
10637       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10638         mode = SImode;
10639       else
10640         mode = DImode;
10641
10642       /* Avoid blowing up in arm_gen_constant ().  */
10643       if (!(outer_code == PLUS
10644             || outer_code == AND
10645             || outer_code == IOR
10646             || outer_code == XOR
10647             || outer_code == MINUS))
10648         outer_code = SET;
10649
10650     const_int_cost:
10651       if (mode == SImode)
10652         {
10653           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10654                                                     INTVAL (x), NULL, NULL,
10655                                                     0, 0));
10656           /* Extra costs?  */
10657         }
10658       else
10659         {
10660           *cost += COSTS_N_INSNS (arm_gen_constant
10661                                   (outer_code, SImode, NULL,
10662                                    trunc_int_for_mode (INTVAL (x), SImode),
10663                                    NULL, NULL, 0, 0)
10664                                   + arm_gen_constant (outer_code, SImode, NULL,
10665                                                       INTVAL (x) >> 32, NULL,
10666                                                       NULL, 0, 0));
10667           /* Extra costs?  */
10668         }
10669
10670       return true;
10671
10672     case CONST:
10673     case LABEL_REF:
10674     case SYMBOL_REF:
10675       if (speed_p)
10676         {
10677           if (arm_arch_thumb2 && !flag_pic)
10678             *cost = COSTS_N_INSNS (2);
10679           else
10680             *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10681         }
10682       else
10683         *cost = COSTS_N_INSNS (2);
10684
10685       if (flag_pic)
10686         {
10687           *cost += COSTS_N_INSNS (1);
10688           if (speed_p)
10689             *cost += extra_cost->alu.arith;
10690         }
10691
10692       return true;
10693
10694     case CONST_FIXED:
10695       *cost = COSTS_N_INSNS (4);
10696       /* Fixme.  */
10697       return true;
10698
10699     case CONST_DOUBLE:
10700       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10701           && (mode == SFmode || !TARGET_VFP_SINGLE))
10702         {
10703           if (vfp3_const_double_rtx (x))
10704             {
10705               *cost = COSTS_N_INSNS (1);
10706               if (speed_p)
10707                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10708               return true;
10709             }
10710
10711           if (speed_p)
10712             {
10713               *cost = COSTS_N_INSNS (1);
10714               if (mode == DFmode)
10715                 *cost += extra_cost->ldst.loadd;
10716               else
10717                 *cost += extra_cost->ldst.loadf;
10718             }
10719           else
10720             *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10721
10722           return true;
10723         }
10724       *cost = COSTS_N_INSNS (4);
10725       return true;
10726
10727     case CONST_VECTOR:
10728       /* Fixme.  */
10729       if (TARGET_NEON
10730           && TARGET_HARD_FLOAT
10731           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10732           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10733         *cost = COSTS_N_INSNS (1);
10734       else
10735         *cost = COSTS_N_INSNS (4);
10736       return true;
10737
10738     case HIGH:
10739     case LO_SUM:
10740       *cost = COSTS_N_INSNS (1);
10741       /* When optimizing for size, we prefer constant pool entries to
10742          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10743       if (!speed_p)
10744         *cost += 1;
10745       return true;
10746
10747     case CLZ:
10748       *cost = COSTS_N_INSNS (1);
10749       if (speed_p)
10750         *cost += extra_cost->alu.clz;
10751       return false;
10752
10753     case SMIN:
10754       if (XEXP (x, 1) == const0_rtx)
10755         {
10756           *cost = COSTS_N_INSNS (1);
10757           if (speed_p)
10758             *cost += extra_cost->alu.log_shift;
10759           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10760           return true;
10761         }
10762       /* Fall through.  */
10763     case SMAX:
10764     case UMIN:
10765     case UMAX:
10766       *cost = COSTS_N_INSNS (2);
10767       return false;
10768
10769     case TRUNCATE:
10770       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10771           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10772           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10773           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10774           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10775                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10776               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10777                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10778                       == ZERO_EXTEND))))
10779         {
10780           *cost = COSTS_N_INSNS (1);
10781           if (speed_p)
10782             *cost += extra_cost->mult[1].extend;
10783           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10784                               speed_p)
10785                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10786                                 0, speed_p));
10787           return true;
10788         }
10789       *cost = LIBCALL_COST (1);
10790       return false;
10791
10792     case UNSPEC:
10793       return arm_unspec_cost (x, outer_code, speed_p, cost);
10794
10795     case PC:
10796       /* Reading the PC is like reading any other register.  Writing it
10797          is more expensive, but we take that into account elsewhere.  */
10798       *cost = 0;
10799       return true;
10800
10801     case ZERO_EXTRACT:
10802       /* TODO: Simple zero_extract of bottom bits using AND.  */
10803       /* Fall through.  */
10804     case SIGN_EXTRACT:
10805       if (arm_arch6
10806           && mode == SImode
10807           && CONST_INT_P (XEXP (x, 1))
10808           && CONST_INT_P (XEXP (x, 2)))
10809         {
10810           *cost = COSTS_N_INSNS (1);
10811           if (speed_p)
10812             *cost += extra_cost->alu.bfx;
10813           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10814           return true;
10815         }
10816       /* Without UBFX/SBFX, need to resort to shift operations.  */
10817       *cost = COSTS_N_INSNS (2);
10818       if (speed_p)
10819         *cost += 2 * extra_cost->alu.shift;
10820       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10821       return true;
10822
10823     case FLOAT_EXTEND:
10824       if (TARGET_HARD_FLOAT)
10825         {
10826           *cost = COSTS_N_INSNS (1);
10827           if (speed_p)
10828             *cost += extra_cost->fp[mode == DFmode].widen;
10829           if (!TARGET_FPU_ARMV8
10830               && GET_MODE (XEXP (x, 0)) == HFmode)
10831             {
10832               /* Pre v8, widening HF->DF is a two-step process, first
10833                  widening to SFmode.  */
10834               *cost += COSTS_N_INSNS (1);
10835               if (speed_p)
10836                 *cost += extra_cost->fp[0].widen;
10837             }
10838           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10839           return true;
10840         }
10841
10842       *cost = LIBCALL_COST (1);
10843       return false;
10844
10845     case FLOAT_TRUNCATE:
10846       if (TARGET_HARD_FLOAT)
10847         {
10848           *cost = COSTS_N_INSNS (1);
10849           if (speed_p)
10850             *cost += extra_cost->fp[mode == DFmode].narrow;
10851           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10852           return true;
10853           /* Vector modes?  */
10854         }
10855       *cost = LIBCALL_COST (1);
10856       return false;
10857
10858     case FMA:
10859       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10860         {
10861           rtx op0 = XEXP (x, 0);
10862           rtx op1 = XEXP (x, 1);
10863           rtx op2 = XEXP (x, 2);
10864
10865           *cost = COSTS_N_INSNS (1);
10866
10867           /* vfms or vfnma.  */
10868           if (GET_CODE (op0) == NEG)
10869             op0 = XEXP (op0, 0);
10870
10871           /* vfnms or vfnma.  */
10872           if (GET_CODE (op2) == NEG)
10873             op2 = XEXP (op2, 0);
10874
10875           *cost += rtx_cost (op0, FMA, 0, speed_p);
10876           *cost += rtx_cost (op1, FMA, 1, speed_p);
10877           *cost += rtx_cost (op2, FMA, 2, speed_p);
10878
10879           if (speed_p)
10880             *cost += extra_cost->fp[mode ==DFmode].fma;
10881
10882           return true;
10883         }
10884
10885       *cost = LIBCALL_COST (3);
10886       return false;
10887
10888     case FIX:
10889     case UNSIGNED_FIX:
10890       if (TARGET_HARD_FLOAT)
10891         {
10892           if (GET_MODE_CLASS (mode) == MODE_INT)
10893             {
10894               *cost = COSTS_N_INSNS (1);
10895               if (speed_p)
10896                 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10897               /* Strip of the 'cost' of rounding towards zero.  */
10898               if (GET_CODE (XEXP (x, 0)) == FIX)
10899                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10900               else
10901                 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10902               /* ??? Increase the cost to deal with transferring from
10903                  FP -> CORE registers?  */
10904               return true;
10905             }
10906           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10907                    && TARGET_FPU_ARMV8)
10908             {
10909               *cost = COSTS_N_INSNS (1);
10910               if (speed_p)
10911                 *cost += extra_cost->fp[mode == DFmode].roundint;
10912               return false;
10913             }
10914           /* Vector costs? */
10915         }
10916       *cost = LIBCALL_COST (1);
10917       return false;
10918
10919     case FLOAT:
10920     case UNSIGNED_FLOAT:
10921       if (TARGET_HARD_FLOAT)
10922         {
10923           /* ??? Increase the cost to deal with transferring from CORE
10924              -> FP registers?  */
10925           *cost = COSTS_N_INSNS (1);
10926           if (speed_p)
10927             *cost += extra_cost->fp[mode == DFmode].fromint;
10928           return false;
10929         }
10930       *cost = LIBCALL_COST (1);
10931       return false;
10932
10933     case CALL:
10934       *cost = COSTS_N_INSNS (1);
10935       return true;
10936
10937     case ASM_OPERANDS:
10938       {
10939       /* Just a guess.  Guess number of instructions in the asm
10940          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
10941          though (see PR60663).  */
10942         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10943         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10944
10945         *cost = COSTS_N_INSNS (asm_length + num_operands);
10946         return true;
10947       }
10948     default:
10949       if (mode != VOIDmode)
10950         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10951       else
10952         *cost = COSTS_N_INSNS (4); /* Who knows?  */
10953       return false;
10954     }
10955 }
10956
10957 #undef HANDLE_NARROW_SHIFT_ARITH
10958
10959 /* RTX costs when optimizing for size.  */
10960 static bool
10961 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10962                int *total, bool speed)
10963 {
10964   bool result;
10965
10966   if (TARGET_OLD_RTX_COSTS
10967       || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10968     {
10969       /* Old way.  (Deprecated.)  */
10970       if (!speed)
10971         result = arm_size_rtx_costs (x, (enum rtx_code) code,
10972                                      (enum rtx_code) outer_code, total);
10973       else
10974         result = current_tune->rtx_costs (x,  (enum rtx_code) code,
10975                                           (enum rtx_code) outer_code, total,
10976                                           speed);
10977     }
10978   else
10979     {
10980     /* New way.  */
10981       if (current_tune->insn_extra_cost)
10982         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
10983                                      (enum rtx_code) outer_code,
10984                                      current_tune->insn_extra_cost,
10985                                      total, speed);
10986     /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10987        && current_tune->insn_extra_cost != NULL  */
10988       else
10989         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
10990                                     (enum rtx_code) outer_code,
10991                                     &generic_extra_costs, total, speed);
10992     }
10993
10994   if (dump_file && (dump_flags & TDF_DETAILS))
10995     {
10996       print_rtl_single (dump_file, x);
10997       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10998                *total, result ? "final" : "partial");
10999     }
11000   return result;
11001 }
11002
11003 /* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
11004    supported on any "slowmul" cores, so it can be ignored.  */
11005
11006 static bool
11007 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11008                        int *total, bool speed)
11009 {
11010   machine_mode mode = GET_MODE (x);
11011
11012   if (TARGET_THUMB)
11013     {
11014       *total = thumb1_rtx_costs (x, code, outer_code);
11015       return true;
11016     }
11017
11018   switch (code)
11019     {
11020     case MULT:
11021       if (GET_MODE_CLASS (mode) == MODE_FLOAT
11022           || mode == DImode)
11023         {
11024           *total = COSTS_N_INSNS (20);
11025           return false;
11026         }
11027
11028       if (CONST_INT_P (XEXP (x, 1)))
11029         {
11030           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11031                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11032           int cost, const_ok = const_ok_for_arm (i);
11033           int j, booth_unit_size;
11034
11035           /* Tune as appropriate.  */
11036           cost = const_ok ? 4 : 8;
11037           booth_unit_size = 2;
11038           for (j = 0; i && j < 32; j += booth_unit_size)
11039             {
11040               i >>= booth_unit_size;
11041               cost++;
11042             }
11043
11044           *total = COSTS_N_INSNS (cost);
11045           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11046           return true;
11047         }
11048
11049       *total = COSTS_N_INSNS (20);
11050       return false;
11051
11052     default:
11053       return arm_rtx_costs_1 (x, outer_code, total, speed);;
11054     }
11055 }
11056
11057
11058 /* RTX cost for cores with a fast multiply unit (M variants).  */
11059
11060 static bool
11061 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11062                        int *total, bool speed)
11063 {
11064   machine_mode mode = GET_MODE (x);
11065
11066   if (TARGET_THUMB1)
11067     {
11068       *total = thumb1_rtx_costs (x, code, outer_code);
11069       return true;
11070     }
11071
11072   /* ??? should thumb2 use different costs?  */
11073   switch (code)
11074     {
11075     case MULT:
11076       /* There is no point basing this on the tuning, since it is always the
11077          fast variant if it exists at all.  */
11078       if (mode == DImode
11079           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11080           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11081               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11082         {
11083           *total = COSTS_N_INSNS(2);
11084           return false;
11085         }
11086
11087
11088       if (mode == DImode)
11089         {
11090           *total = COSTS_N_INSNS (5);
11091           return false;
11092         }
11093
11094       if (CONST_INT_P (XEXP (x, 1)))
11095         {
11096           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11097                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11098           int cost, const_ok = const_ok_for_arm (i);
11099           int j, booth_unit_size;
11100
11101           /* Tune as appropriate.  */
11102           cost = const_ok ? 4 : 8;
11103           booth_unit_size = 8;
11104           for (j = 0; i && j < 32; j += booth_unit_size)
11105             {
11106               i >>= booth_unit_size;
11107               cost++;
11108             }
11109
11110           *total = COSTS_N_INSNS(cost);
11111           return false;
11112         }
11113
11114       if (mode == SImode)
11115         {
11116           *total = COSTS_N_INSNS (4);
11117           return false;
11118         }
11119
11120       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11121         {
11122           if (TARGET_HARD_FLOAT
11123               && (mode == SFmode
11124                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11125             {
11126               *total = COSTS_N_INSNS (1);
11127               return false;
11128             }
11129         }
11130
11131       /* Requires a lib call */
11132       *total = COSTS_N_INSNS (20);
11133       return false;
11134
11135     default:
11136       return arm_rtx_costs_1 (x, outer_code, total, speed);
11137     }
11138 }
11139
11140
11141 /* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
11142    so it can be ignored.  */
11143
11144 static bool
11145 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11146                       int *total, bool speed)
11147 {
11148   machine_mode mode = GET_MODE (x);
11149
11150   if (TARGET_THUMB)
11151     {
11152       *total = thumb1_rtx_costs (x, code, outer_code);
11153       return true;
11154     }
11155
11156   switch (code)
11157     {
11158     case COMPARE:
11159       if (GET_CODE (XEXP (x, 0)) != MULT)
11160         return arm_rtx_costs_1 (x, outer_code, total, speed);
11161
11162       /* A COMPARE of a MULT is slow on XScale; the muls instruction
11163          will stall until the multiplication is complete.  */
11164       *total = COSTS_N_INSNS (3);
11165       return false;
11166
11167     case MULT:
11168       /* There is no point basing this on the tuning, since it is always the
11169          fast variant if it exists at all.  */
11170       if (mode == DImode
11171           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11172           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11173               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11174         {
11175           *total = COSTS_N_INSNS (2);
11176           return false;
11177         }
11178
11179
11180       if (mode == DImode)
11181         {
11182           *total = COSTS_N_INSNS (5);
11183           return false;
11184         }
11185
11186       if (CONST_INT_P (XEXP (x, 1)))
11187         {
11188           /* If operand 1 is a constant we can more accurately
11189              calculate the cost of the multiply.  The multiplier can
11190              retire 15 bits on the first cycle and a further 12 on the
11191              second.  We do, of course, have to load the constant into
11192              a register first.  */
11193           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11194           /* There's a general overhead of one cycle.  */
11195           int cost = 1;
11196           unsigned HOST_WIDE_INT masked_const;
11197
11198           if (i & 0x80000000)
11199             i = ~i;
11200
11201           i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11202
11203           masked_const = i & 0xffff8000;
11204           if (masked_const != 0)
11205             {
11206               cost++;
11207               masked_const = i & 0xf8000000;
11208               if (masked_const != 0)
11209                 cost++;
11210             }
11211           *total = COSTS_N_INSNS (cost);
11212           return false;
11213         }
11214
11215       if (mode == SImode)
11216         {
11217           *total = COSTS_N_INSNS (3);
11218           return false;
11219         }
11220
11221       /* Requires a lib call */
11222       *total = COSTS_N_INSNS (20);
11223       return false;
11224
11225     default:
11226       return arm_rtx_costs_1 (x, outer_code, total, speed);
11227     }
11228 }
11229
11230
11231 /* RTX costs for 9e (and later) cores.  */
11232
11233 static bool
11234 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11235                   int *total, bool speed)
11236 {
11237   machine_mode mode = GET_MODE (x);
11238
11239   if (TARGET_THUMB1)
11240     {
11241       switch (code)
11242         {
11243         case MULT:
11244           /* Small multiply: 32 cycles for an integer multiply inst.  */
11245           if (arm_arch6m && arm_m_profile_small_mul)
11246             *total = COSTS_N_INSNS (32);
11247           else
11248             *total = COSTS_N_INSNS (3);
11249           return true;
11250
11251         default:
11252           *total = thumb1_rtx_costs (x, code, outer_code);
11253           return true;
11254         }
11255     }
11256
11257   switch (code)
11258     {
11259     case MULT:
11260       /* There is no point basing this on the tuning, since it is always the
11261          fast variant if it exists at all.  */
11262       if (mode == DImode
11263           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11264           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11265               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11266         {
11267           *total = COSTS_N_INSNS (2);
11268           return false;
11269         }
11270
11271
11272       if (mode == DImode)
11273         {
11274           *total = COSTS_N_INSNS (5);
11275           return false;
11276         }
11277
11278       if (mode == SImode)
11279         {
11280           *total = COSTS_N_INSNS (2);
11281           return false;
11282         }
11283
11284       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11285         {
11286           if (TARGET_HARD_FLOAT
11287               && (mode == SFmode
11288                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11289             {
11290               *total = COSTS_N_INSNS (1);
11291               return false;
11292             }
11293         }
11294
11295       *total = COSTS_N_INSNS (20);
11296       return false;
11297
11298     default:
11299       return arm_rtx_costs_1 (x, outer_code, total, speed);
11300     }
11301 }
11302 /* All address computations that can be done are free, but rtx cost returns
11303    the same for practically all of them.  So we weight the different types
11304    of address here in the order (most pref first):
11305    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11306 static inline int
11307 arm_arm_address_cost (rtx x)
11308 {
11309   enum rtx_code c  = GET_CODE (x);
11310
11311   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11312     return 0;
11313   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11314     return 10;
11315
11316   if (c == PLUS)
11317     {
11318       if (CONST_INT_P (XEXP (x, 1)))
11319         return 2;
11320
11321       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11322         return 3;
11323
11324       return 4;
11325     }
11326
11327   return 6;
11328 }
11329
11330 static inline int
11331 arm_thumb_address_cost (rtx x)
11332 {
11333   enum rtx_code c  = GET_CODE (x);
11334
11335   if (c == REG)
11336     return 1;
11337   if (c == PLUS
11338       && REG_P (XEXP (x, 0))
11339       && CONST_INT_P (XEXP (x, 1)))
11340     return 1;
11341
11342   return 2;
11343 }
11344
11345 static int
11346 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11347                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11348 {
11349   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11350 }
11351
11352 /* Adjust cost hook for XScale.  */
11353 static bool
11354 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11355 {
11356   /* Some true dependencies can have a higher cost depending
11357      on precisely how certain input operands are used.  */
11358   if (REG_NOTE_KIND(link) == 0
11359       && recog_memoized (insn) >= 0
11360       && recog_memoized (dep) >= 0)
11361     {
11362       int shift_opnum = get_attr_shift (insn);
11363       enum attr_type attr_type = get_attr_type (dep);
11364
11365       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11366          operand for INSN.  If we have a shifted input operand and the
11367          instruction we depend on is another ALU instruction, then we may
11368          have to account for an additional stall.  */
11369       if (shift_opnum != 0
11370           && (attr_type == TYPE_ALU_SHIFT_IMM
11371               || attr_type == TYPE_ALUS_SHIFT_IMM
11372               || attr_type == TYPE_LOGIC_SHIFT_IMM
11373               || attr_type == TYPE_LOGICS_SHIFT_IMM
11374               || attr_type == TYPE_ALU_SHIFT_REG
11375               || attr_type == TYPE_ALUS_SHIFT_REG
11376               || attr_type == TYPE_LOGIC_SHIFT_REG
11377               || attr_type == TYPE_LOGICS_SHIFT_REG
11378               || attr_type == TYPE_MOV_SHIFT
11379               || attr_type == TYPE_MVN_SHIFT
11380               || attr_type == TYPE_MOV_SHIFT_REG
11381               || attr_type == TYPE_MVN_SHIFT_REG))
11382         {
11383           rtx shifted_operand;
11384           int opno;
11385
11386           /* Get the shifted operand.  */
11387           extract_insn (insn);
11388           shifted_operand = recog_data.operand[shift_opnum];
11389
11390           /* Iterate over all the operands in DEP.  If we write an operand
11391              that overlaps with SHIFTED_OPERAND, then we have increase the
11392              cost of this dependency.  */
11393           extract_insn (dep);
11394           preprocess_constraints (dep);
11395           for (opno = 0; opno < recog_data.n_operands; opno++)
11396             {
11397               /* We can ignore strict inputs.  */
11398               if (recog_data.operand_type[opno] == OP_IN)
11399                 continue;
11400
11401               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11402                                            shifted_operand))
11403                 {
11404                   *cost = 2;
11405                   return false;
11406                 }
11407             }
11408         }
11409     }
11410   return true;
11411 }
11412
11413 /* Adjust cost hook for Cortex A9.  */
11414 static bool
11415 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11416 {
11417   switch (REG_NOTE_KIND (link))
11418     {
11419     case REG_DEP_ANTI:
11420       *cost = 0;
11421       return false;
11422
11423     case REG_DEP_TRUE:
11424     case REG_DEP_OUTPUT:
11425         if (recog_memoized (insn) >= 0
11426             && recog_memoized (dep) >= 0)
11427           {
11428             if (GET_CODE (PATTERN (insn)) == SET)
11429               {
11430                 if (GET_MODE_CLASS
11431                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11432                   || GET_MODE_CLASS
11433                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11434                   {
11435                     enum attr_type attr_type_insn = get_attr_type (insn);
11436                     enum attr_type attr_type_dep = get_attr_type (dep);
11437
11438                     /* By default all dependencies of the form
11439                        s0 = s0 <op> s1
11440                        s0 = s0 <op> s2
11441                        have an extra latency of 1 cycle because
11442                        of the input and output dependency in this
11443                        case. However this gets modeled as an true
11444                        dependency and hence all these checks.  */
11445                     if (REG_P (SET_DEST (PATTERN (insn)))
11446                         && REG_P (SET_DEST (PATTERN (dep)))
11447                         && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11448                                                     SET_DEST (PATTERN (dep))))
11449                       {
11450                         /* FMACS is a special case where the dependent
11451                            instruction can be issued 3 cycles before
11452                            the normal latency in case of an output
11453                            dependency.  */
11454                         if ((attr_type_insn == TYPE_FMACS
11455                              || attr_type_insn == TYPE_FMACD)
11456                             && (attr_type_dep == TYPE_FMACS
11457                                 || attr_type_dep == TYPE_FMACD))
11458                           {
11459                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11460                               *cost = insn_default_latency (dep) - 3;
11461                             else
11462                               *cost = insn_default_latency (dep);
11463                             return false;
11464                           }
11465                         else
11466                           {
11467                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11468                               *cost = insn_default_latency (dep) + 1;
11469                             else
11470                               *cost = insn_default_latency (dep);
11471                           }
11472                         return false;
11473                       }
11474                   }
11475               }
11476           }
11477         break;
11478
11479     default:
11480       gcc_unreachable ();
11481     }
11482
11483   return true;
11484 }
11485
11486 /* Adjust cost hook for FA726TE.  */
11487 static bool
11488 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11489 {
11490   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11491      have penalty of 3.  */
11492   if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11493       && recog_memoized (insn) >= 0
11494       && recog_memoized (dep) >= 0
11495       && get_attr_conds (dep) == CONDS_SET)
11496     {
11497       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11498       if (get_attr_conds (insn) == CONDS_USE
11499           && get_attr_type (insn) != TYPE_BRANCH)
11500         {
11501           *cost = 3;
11502           return false;
11503         }
11504
11505       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11506           || get_attr_conds (insn) == CONDS_USE)
11507         {
11508           *cost = 0;
11509           return false;
11510         }
11511     }
11512
11513   return true;
11514 }
11515
11516 /* Implement TARGET_REGISTER_MOVE_COST.
11517
11518    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11519    it is typically more expensive than a single memory access.  We set
11520    the cost to less than two memory accesses so that floating
11521    point to integer conversion does not go through memory.  */
11522
11523 int
11524 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11525                         reg_class_t from, reg_class_t to)
11526 {
11527   if (TARGET_32BIT)
11528     {
11529       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11530           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11531         return 15;
11532       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11533                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11534         return 4;
11535       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11536         return 20;
11537       else
11538         return 2;
11539     }
11540   else
11541     {
11542       if (from == HI_REGS || to == HI_REGS)
11543         return 4;
11544       else
11545         return 2;
11546     }
11547 }
11548
11549 /* Implement TARGET_MEMORY_MOVE_COST.  */
11550
11551 int
11552 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11553                       bool in ATTRIBUTE_UNUSED)
11554 {
11555   if (TARGET_32BIT)
11556     return 10;
11557   else
11558     {
11559       if (GET_MODE_SIZE (mode) < 4)
11560         return 8;
11561       else
11562         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11563     }
11564 }
11565
11566 /* Vectorizer cost model implementation.  */
11567
11568 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11569 static int
11570 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11571                                 tree vectype,
11572                                 int misalign ATTRIBUTE_UNUSED)
11573 {
11574   unsigned elements;
11575
11576   switch (type_of_cost)
11577     {
11578       case scalar_stmt:
11579         return current_tune->vec_costs->scalar_stmt_cost;
11580
11581       case scalar_load:
11582         return current_tune->vec_costs->scalar_load_cost;
11583
11584       case scalar_store:
11585         return current_tune->vec_costs->scalar_store_cost;
11586
11587       case vector_stmt:
11588         return current_tune->vec_costs->vec_stmt_cost;
11589
11590       case vector_load:
11591         return current_tune->vec_costs->vec_align_load_cost;
11592
11593       case vector_store:
11594         return current_tune->vec_costs->vec_store_cost;
11595
11596       case vec_to_scalar:
11597         return current_tune->vec_costs->vec_to_scalar_cost;
11598
11599       case scalar_to_vec:
11600         return current_tune->vec_costs->scalar_to_vec_cost;
11601
11602       case unaligned_load:
11603         return current_tune->vec_costs->vec_unalign_load_cost;
11604
11605       case unaligned_store:
11606         return current_tune->vec_costs->vec_unalign_store_cost;
11607
11608       case cond_branch_taken:
11609         return current_tune->vec_costs->cond_taken_branch_cost;
11610
11611       case cond_branch_not_taken:
11612         return current_tune->vec_costs->cond_not_taken_branch_cost;
11613
11614       case vec_perm:
11615       case vec_promote_demote:
11616         return current_tune->vec_costs->vec_stmt_cost;
11617
11618       case vec_construct:
11619         elements = TYPE_VECTOR_SUBPARTS (vectype);
11620         return elements / 2 + 1;
11621
11622       default:
11623         gcc_unreachable ();
11624     }
11625 }
11626
11627 /* Implement targetm.vectorize.add_stmt_cost.  */
11628
11629 static unsigned
11630 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11631                    struct _stmt_vec_info *stmt_info, int misalign,
11632                    enum vect_cost_model_location where)
11633 {
11634   unsigned *cost = (unsigned *) data;
11635   unsigned retval = 0;
11636
11637   if (flag_vect_cost_model)
11638     {
11639       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11640       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11641
11642       /* Statements in an inner loop relative to the loop being
11643          vectorized are weighted more heavily.  The value here is
11644          arbitrary and could potentially be improved with analysis.  */
11645       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11646         count *= 50;  /* FIXME.  */
11647
11648       retval = (unsigned) (count * stmt_cost);
11649       cost[where] += retval;
11650     }
11651
11652   return retval;
11653 }
11654
11655 /* Return true if and only if this insn can dual-issue only as older.  */
11656 static bool
11657 cortexa7_older_only (rtx_insn *insn)
11658 {
11659   if (recog_memoized (insn) < 0)
11660     return false;
11661
11662   switch (get_attr_type (insn))
11663     {
11664     case TYPE_ALU_DSP_REG:
11665     case TYPE_ALU_SREG:
11666     case TYPE_ALUS_SREG:
11667     case TYPE_LOGIC_REG:
11668     case TYPE_LOGICS_REG:
11669     case TYPE_ADC_REG:
11670     case TYPE_ADCS_REG:
11671     case TYPE_ADR:
11672     case TYPE_BFM:
11673     case TYPE_REV:
11674     case TYPE_MVN_REG:
11675     case TYPE_SHIFT_IMM:
11676     case TYPE_SHIFT_REG:
11677     case TYPE_LOAD_BYTE:
11678     case TYPE_LOAD1:
11679     case TYPE_STORE1:
11680     case TYPE_FFARITHS:
11681     case TYPE_FADDS:
11682     case TYPE_FFARITHD:
11683     case TYPE_FADDD:
11684     case TYPE_FMOV:
11685     case TYPE_F_CVT:
11686     case TYPE_FCMPS:
11687     case TYPE_FCMPD:
11688     case TYPE_FCONSTS:
11689     case TYPE_FCONSTD:
11690     case TYPE_FMULS:
11691     case TYPE_FMACS:
11692     case TYPE_FMULD:
11693     case TYPE_FMACD:
11694     case TYPE_FDIVS:
11695     case TYPE_FDIVD:
11696     case TYPE_F_MRC:
11697     case TYPE_F_MRRC:
11698     case TYPE_F_FLAG:
11699     case TYPE_F_LOADS:
11700     case TYPE_F_STORES:
11701       return true;
11702     default:
11703       return false;
11704     }
11705 }
11706
11707 /* Return true if and only if this insn can dual-issue as younger.  */
11708 static bool
11709 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11710 {
11711   if (recog_memoized (insn) < 0)
11712     {
11713       if (verbose > 5)
11714         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11715       return false;
11716     }
11717
11718   switch (get_attr_type (insn))
11719     {
11720     case TYPE_ALU_IMM:
11721     case TYPE_ALUS_IMM:
11722     case TYPE_LOGIC_IMM:
11723     case TYPE_LOGICS_IMM:
11724     case TYPE_EXTEND:
11725     case TYPE_MVN_IMM:
11726     case TYPE_MOV_IMM:
11727     case TYPE_MOV_REG:
11728     case TYPE_MOV_SHIFT:
11729     case TYPE_MOV_SHIFT_REG:
11730     case TYPE_BRANCH:
11731     case TYPE_CALL:
11732       return true;
11733     default:
11734       return false;
11735     }
11736 }
11737
11738
11739 /* Look for an instruction that can dual issue only as an older
11740    instruction, and move it in front of any instructions that can
11741    dual-issue as younger, while preserving the relative order of all
11742    other instructions in the ready list.  This is a hueuristic to help
11743    dual-issue in later cycles, by postponing issue of more flexible
11744    instructions.  This heuristic may affect dual issue opportunities
11745    in the current cycle.  */
11746 static void
11747 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11748                         int *n_readyp, int clock)
11749 {
11750   int i;
11751   int first_older_only = -1, first_younger = -1;
11752
11753   if (verbose > 5)
11754     fprintf (file,
11755              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11756              clock,
11757              *n_readyp);
11758
11759   /* Traverse the ready list from the head (the instruction to issue
11760      first), and looking for the first instruction that can issue as
11761      younger and the first instruction that can dual-issue only as
11762      older.  */
11763   for (i = *n_readyp - 1; i >= 0; i--)
11764     {
11765       rtx_insn *insn = ready[i];
11766       if (cortexa7_older_only (insn))
11767         {
11768           first_older_only = i;
11769           if (verbose > 5)
11770             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11771           break;
11772         }
11773       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11774         first_younger = i;
11775     }
11776
11777   /* Nothing to reorder because either no younger insn found or insn
11778      that can dual-issue only as older appears before any insn that
11779      can dual-issue as younger.  */
11780   if (first_younger == -1)
11781     {
11782       if (verbose > 5)
11783         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11784       return;
11785     }
11786
11787   /* Nothing to reorder because no older-only insn in the ready list.  */
11788   if (first_older_only == -1)
11789     {
11790       if (verbose > 5)
11791         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11792       return;
11793     }
11794
11795   /* Move first_older_only insn before first_younger.  */
11796   if (verbose > 5)
11797     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11798              INSN_UID(ready [first_older_only]),
11799              INSN_UID(ready [first_younger]));
11800   rtx_insn *first_older_only_insn = ready [first_older_only];
11801   for (i = first_older_only; i < first_younger; i++)
11802     {
11803       ready[i] = ready[i+1];
11804     }
11805
11806   ready[i] = first_older_only_insn;
11807   return;
11808 }
11809
11810 /* Implement TARGET_SCHED_REORDER. */
11811 static int
11812 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11813                    int clock)
11814 {
11815   switch (arm_tune)
11816     {
11817     case cortexa7:
11818       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11819       break;
11820     default:
11821       /* Do nothing for other cores.  */
11822       break;
11823     }
11824
11825   return arm_issue_rate ();
11826 }
11827
11828 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11829    It corrects the value of COST based on the relationship between
11830    INSN and DEP through the dependence LINK.  It returns the new
11831    value. There is a per-core adjust_cost hook to adjust scheduler costs
11832    and the per-core hook can choose to completely override the generic
11833    adjust_cost function. Only put bits of code into arm_adjust_cost that
11834    are common across all cores.  */
11835 static int
11836 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11837 {
11838   rtx i_pat, d_pat;
11839
11840  /* When generating Thumb-1 code, we want to place flag-setting operations
11841     close to a conditional branch which depends on them, so that we can
11842     omit the comparison. */
11843   if (TARGET_THUMB1
11844       && REG_NOTE_KIND (link) == 0
11845       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11846       && recog_memoized (dep) >= 0
11847       && get_attr_conds (dep) == CONDS_SET)
11848     return 0;
11849
11850   if (current_tune->sched_adjust_cost != NULL)
11851     {
11852       if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11853         return cost;
11854     }
11855
11856   /* XXX Is this strictly true?  */
11857   if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11858       || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11859     return 0;
11860
11861   /* Call insns don't incur a stall, even if they follow a load.  */
11862   if (REG_NOTE_KIND (link) == 0
11863       && CALL_P (insn))
11864     return 1;
11865
11866   if ((i_pat = single_set (insn)) != NULL
11867       && MEM_P (SET_SRC (i_pat))
11868       && (d_pat = single_set (dep)) != NULL
11869       && MEM_P (SET_DEST (d_pat)))
11870     {
11871       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11872       /* This is a load after a store, there is no conflict if the load reads
11873          from a cached area.  Assume that loads from the stack, and from the
11874          constant pool are cached, and that others will miss.  This is a
11875          hack.  */
11876
11877       if ((GET_CODE (src_mem) == SYMBOL_REF
11878            && CONSTANT_POOL_ADDRESS_P (src_mem))
11879           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11880           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11881           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11882         return 1;
11883     }
11884
11885   return cost;
11886 }
11887
11888 int
11889 arm_max_conditional_execute (void)
11890 {
11891   return max_insns_skipped;
11892 }
11893
11894 static int
11895 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11896 {
11897   if (TARGET_32BIT)
11898     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11899   else
11900     return (optimize > 0) ? 2 : 0;
11901 }
11902
11903 static int
11904 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11905 {
11906   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11907 }
11908
11909 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11910    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11911    sequences of non-executed instructions in IT blocks probably take the same
11912    amount of time as executed instructions (and the IT instruction itself takes
11913    space in icache).  This function was experimentally determined to give good
11914    results on a popular embedded benchmark.  */
11915
11916 static int
11917 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11918 {
11919   return (TARGET_32BIT && speed_p) ? 1
11920          : arm_default_branch_cost (speed_p, predictable_p);
11921 }
11922
11923 static int
11924 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11925 {
11926   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11927 }
11928
11929 static bool fp_consts_inited = false;
11930
11931 static REAL_VALUE_TYPE value_fp0;
11932
11933 static void
11934 init_fp_table (void)
11935 {
11936   REAL_VALUE_TYPE r;
11937
11938   r = REAL_VALUE_ATOF ("0", DFmode);
11939   value_fp0 = r;
11940   fp_consts_inited = true;
11941 }
11942
11943 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11944 int
11945 arm_const_double_rtx (rtx x)
11946 {
11947   REAL_VALUE_TYPE r;
11948
11949   if (!fp_consts_inited)
11950     init_fp_table ();
11951
11952   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11953   if (REAL_VALUE_MINUS_ZERO (r))
11954     return 0;
11955
11956   if (REAL_VALUES_EQUAL (r, value_fp0))
11957     return 1;
11958
11959   return 0;
11960 }
11961
11962 /* VFPv3 has a fairly wide range of representable immediates, formed from
11963    "quarter-precision" floating-point values. These can be evaluated using this
11964    formula (with ^ for exponentiation):
11965
11966      -1^s * n * 2^-r
11967
11968    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11969    16 <= n <= 31 and 0 <= r <= 7.
11970
11971    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11972
11973      - A (most-significant) is the sign bit.
11974      - BCD are the exponent (encoded as r XOR 3).
11975      - EFGH are the mantissa (encoded as n - 16).
11976 */
11977
11978 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11979    fconst[sd] instruction, or -1 if X isn't suitable.  */
11980 static int
11981 vfp3_const_double_index (rtx x)
11982 {
11983   REAL_VALUE_TYPE r, m;
11984   int sign, exponent;
11985   unsigned HOST_WIDE_INT mantissa, mant_hi;
11986   unsigned HOST_WIDE_INT mask;
11987   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11988   bool fail;
11989
11990   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11991     return -1;
11992
11993   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11994
11995   /* We can't represent these things, so detect them first.  */
11996   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11997     return -1;
11998
11999   /* Extract sign, exponent and mantissa.  */
12000   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12001   r = real_value_abs (&r);
12002   exponent = REAL_EXP (&r);
12003   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12004      highest (sign) bit, with a fixed binary point at bit point_pos.
12005      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12006      bits for the mantissa, this may fail (low bits would be lost).  */
12007   real_ldexp (&m, &r, point_pos - exponent);
12008   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12009   mantissa = w.elt (0);
12010   mant_hi = w.elt (1);
12011
12012   /* If there are bits set in the low part of the mantissa, we can't
12013      represent this value.  */
12014   if (mantissa != 0)
12015     return -1;
12016
12017   /* Now make it so that mantissa contains the most-significant bits, and move
12018      the point_pos to indicate that the least-significant bits have been
12019      discarded.  */
12020   point_pos -= HOST_BITS_PER_WIDE_INT;
12021   mantissa = mant_hi;
12022
12023   /* We can permit four significant bits of mantissa only, plus a high bit
12024      which is always 1.  */
12025   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12026   if ((mantissa & mask) != 0)
12027     return -1;
12028
12029   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12030   mantissa >>= point_pos - 5;
12031
12032   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12033      floating-point immediate zero with Neon using an integer-zero load, but
12034      that case is handled elsewhere.)  */
12035   if (mantissa == 0)
12036     return -1;
12037
12038   gcc_assert (mantissa >= 16 && mantissa <= 31);
12039
12040   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12041      normalized significands are in the range [1, 2). (Our mantissa is shifted
12042      left 4 places at this point relative to normalized IEEE754 values).  GCC
12043      internally uses [0.5, 1) (see real.c), so the exponent returned from
12044      REAL_EXP must be altered.  */
12045   exponent = 5 - exponent;
12046
12047   if (exponent < 0 || exponent > 7)
12048     return -1;
12049
12050   /* Sign, mantissa and exponent are now in the correct form to plug into the
12051      formula described in the comment above.  */
12052   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12053 }
12054
12055 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12056 int
12057 vfp3_const_double_rtx (rtx x)
12058 {
12059   if (!TARGET_VFP3)
12060     return 0;
12061
12062   return vfp3_const_double_index (x) != -1;
12063 }
12064
12065 /* Recognize immediates which can be used in various Neon instructions. Legal
12066    immediates are described by the following table (for VMVN variants, the
12067    bitwise inverse of the constant shown is recognized. In either case, VMOV
12068    is output and the correct instruction to use for a given constant is chosen
12069    by the assembler). The constant shown is replicated across all elements of
12070    the destination vector.
12071
12072    insn elems variant constant (binary)
12073    ---- ----- ------- -----------------
12074    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12075    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12076    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12077    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12078    vmov  i16     4    00000000 abcdefgh
12079    vmov  i16     5    abcdefgh 00000000
12080    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12081    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12082    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12083    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12084    vmvn  i16    10    00000000 abcdefgh
12085    vmvn  i16    11    abcdefgh 00000000
12086    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12087    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12088    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12089    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12090    vmov   i8    16    abcdefgh
12091    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12092                       eeeeeeee ffffffff gggggggg hhhhhhhh
12093    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12094    vmov  f32    19    00000000 00000000 00000000 00000000
12095
12096    For case 18, B = !b. Representable values are exactly those accepted by
12097    vfp3_const_double_index, but are output as floating-point numbers rather
12098    than indices.
12099
12100    For case 19, we will change it to vmov.i32 when assembling.
12101
12102    Variants 0-5 (inclusive) may also be used as immediates for the second
12103    operand of VORR/VBIC instructions.
12104
12105    The INVERSE argument causes the bitwise inverse of the given operand to be
12106    recognized instead (used for recognizing legal immediates for the VAND/VORN
12107    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12108    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12109    output, rather than the real insns vbic/vorr).
12110
12111    INVERSE makes no difference to the recognition of float vectors.
12112
12113    The return value is the variant of immediate as shown in the above table, or
12114    -1 if the given value doesn't match any of the listed patterns.
12115 */
12116 static int
12117 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12118                       rtx *modconst, int *elementwidth)
12119 {
12120 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12121   matches = 1;                                  \
12122   for (i = 0; i < idx; i += (STRIDE))           \
12123     if (!(TEST))                                \
12124       matches = 0;                              \
12125   if (matches)                                  \
12126     {                                           \
12127       immtype = (CLASS);                        \
12128       elsize = (ELSIZE);                        \
12129       break;                                    \
12130     }
12131
12132   unsigned int i, elsize = 0, idx = 0, n_elts;
12133   unsigned int innersize;
12134   unsigned char bytes[16];
12135   int immtype = -1, matches;
12136   unsigned int invmask = inverse ? 0xff : 0;
12137   bool vector = GET_CODE (op) == CONST_VECTOR;
12138
12139   if (vector)
12140     {
12141       n_elts = CONST_VECTOR_NUNITS (op);
12142       innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12143     }
12144   else
12145     {
12146       n_elts = 1;
12147       if (mode == VOIDmode)
12148         mode = DImode;
12149       innersize = GET_MODE_SIZE (mode);
12150     }
12151
12152   /* Vectors of float constants.  */
12153   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12154     {
12155       rtx el0 = CONST_VECTOR_ELT (op, 0);
12156       REAL_VALUE_TYPE r0;
12157
12158       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12159         return -1;
12160
12161       REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12162
12163       for (i = 1; i < n_elts; i++)
12164         {
12165           rtx elt = CONST_VECTOR_ELT (op, i);
12166           REAL_VALUE_TYPE re;
12167
12168           REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12169
12170           if (!REAL_VALUES_EQUAL (r0, re))
12171             return -1;
12172         }
12173
12174       if (modconst)
12175         *modconst = CONST_VECTOR_ELT (op, 0);
12176
12177       if (elementwidth)
12178         *elementwidth = 0;
12179
12180       if (el0 == CONST0_RTX (GET_MODE (el0)))
12181         return 19;
12182       else
12183         return 18;
12184     }
12185
12186   /* Splat vector constant out into a byte vector.  */
12187   for (i = 0; i < n_elts; i++)
12188     {
12189       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12190       unsigned HOST_WIDE_INT elpart;
12191       unsigned int part, parts;
12192
12193       if (CONST_INT_P (el))
12194         {
12195           elpart = INTVAL (el);
12196           parts = 1;
12197         }
12198       else if (CONST_DOUBLE_P (el))
12199         {
12200           elpart = CONST_DOUBLE_LOW (el);
12201           parts = 2;
12202         }
12203       else
12204         gcc_unreachable ();
12205
12206       for (part = 0; part < parts; part++)
12207         {
12208           unsigned int byte;
12209           for (byte = 0; byte < innersize; byte++)
12210             {
12211               bytes[idx++] = (elpart & 0xff) ^ invmask;
12212               elpart >>= BITS_PER_UNIT;
12213             }
12214           if (CONST_DOUBLE_P (el))
12215             elpart = CONST_DOUBLE_HIGH (el);
12216         }
12217     }
12218
12219   /* Sanity check.  */
12220   gcc_assert (idx == GET_MODE_SIZE (mode));
12221
12222   do
12223     {
12224       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12225                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12226
12227       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12228                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12229
12230       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12231                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12232
12233       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12234                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12235
12236       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12237
12238       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12239
12240       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12241                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12242
12243       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12244                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12245
12246       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12247                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12248
12249       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12250                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12251
12252       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12253
12254       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12255
12256       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12257                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12258
12259       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12260                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12261
12262       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12263                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12264
12265       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12266                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12267
12268       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12269
12270       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12271                         && bytes[i] == bytes[(i + 8) % idx]);
12272     }
12273   while (0);
12274
12275   if (immtype == -1)
12276     return -1;
12277
12278   if (elementwidth)
12279     *elementwidth = elsize;
12280
12281   if (modconst)
12282     {
12283       unsigned HOST_WIDE_INT imm = 0;
12284
12285       /* Un-invert bytes of recognized vector, if necessary.  */
12286       if (invmask != 0)
12287         for (i = 0; i < idx; i++)
12288           bytes[i] ^= invmask;
12289
12290       if (immtype == 17)
12291         {
12292           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12293           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12294
12295           for (i = 0; i < 8; i++)
12296             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12297                    << (i * BITS_PER_UNIT);
12298
12299           *modconst = GEN_INT (imm);
12300         }
12301       else
12302         {
12303           unsigned HOST_WIDE_INT imm = 0;
12304
12305           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12306             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12307
12308           *modconst = GEN_INT (imm);
12309         }
12310     }
12311
12312   return immtype;
12313 #undef CHECK
12314 }
12315
12316 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12317    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12318    float elements), and a modified constant (whatever should be output for a
12319    VMOV) in *MODCONST.  */
12320
12321 int
12322 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12323                                rtx *modconst, int *elementwidth)
12324 {
12325   rtx tmpconst;
12326   int tmpwidth;
12327   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12328
12329   if (retval == -1)
12330     return 0;
12331
12332   if (modconst)
12333     *modconst = tmpconst;
12334
12335   if (elementwidth)
12336     *elementwidth = tmpwidth;
12337
12338   return 1;
12339 }
12340
12341 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12342    the immediate is valid, write a constant suitable for using as an operand
12343    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12344    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12345
12346 int
12347 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12348                                 rtx *modconst, int *elementwidth)
12349 {
12350   rtx tmpconst;
12351   int tmpwidth;
12352   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12353
12354   if (retval < 0 || retval > 5)
12355     return 0;
12356
12357   if (modconst)
12358     *modconst = tmpconst;
12359
12360   if (elementwidth)
12361     *elementwidth = tmpwidth;
12362
12363   return 1;
12364 }
12365
12366 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12367    the immediate is valid, write a constant suitable for using as an operand
12368    to VSHR/VSHL to *MODCONST and the corresponding element width to
12369    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12370    because they have different limitations.  */
12371
12372 int
12373 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12374                                 rtx *modconst, int *elementwidth,
12375                                 bool isleftshift)
12376 {
12377   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12378   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12379   unsigned HOST_WIDE_INT last_elt = 0;
12380   unsigned HOST_WIDE_INT maxshift;
12381
12382   /* Split vector constant out into a byte vector.  */
12383   for (i = 0; i < n_elts; i++)
12384     {
12385       rtx el = CONST_VECTOR_ELT (op, i);
12386       unsigned HOST_WIDE_INT elpart;
12387
12388       if (CONST_INT_P (el))
12389         elpart = INTVAL (el);
12390       else if (CONST_DOUBLE_P (el))
12391         return 0;
12392       else
12393         gcc_unreachable ();
12394
12395       if (i != 0 && elpart != last_elt)
12396         return 0;
12397
12398       last_elt = elpart;
12399     }
12400
12401   /* Shift less than element size.  */
12402   maxshift = innersize * 8;
12403
12404   if (isleftshift)
12405     {
12406       /* Left shift immediate value can be from 0 to <size>-1.  */
12407       if (last_elt >= maxshift)
12408         return 0;
12409     }
12410   else
12411     {
12412       /* Right shift immediate value can be from 1 to <size>.  */
12413       if (last_elt == 0 || last_elt > maxshift)
12414         return 0;
12415     }
12416
12417   if (elementwidth)
12418     *elementwidth = innersize * 8;
12419
12420   if (modconst)
12421     *modconst = CONST_VECTOR_ELT (op, 0);
12422
12423   return 1;
12424 }
12425
12426 /* Return a string suitable for output of Neon immediate logic operation
12427    MNEM.  */
12428
12429 char *
12430 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12431                              int inverse, int quad)
12432 {
12433   int width, is_valid;
12434   static char templ[40];
12435
12436   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12437
12438   gcc_assert (is_valid != 0);
12439
12440   if (quad)
12441     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12442   else
12443     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12444
12445   return templ;
12446 }
12447
12448 /* Return a string suitable for output of Neon immediate shift operation
12449    (VSHR or VSHL) MNEM.  */
12450
12451 char *
12452 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12453                              machine_mode mode, int quad,
12454                              bool isleftshift)
12455 {
12456   int width, is_valid;
12457   static char templ[40];
12458
12459   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12460   gcc_assert (is_valid != 0);
12461
12462   if (quad)
12463     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12464   else
12465     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12466
12467   return templ;
12468 }
12469
12470 /* Output a sequence of pairwise operations to implement a reduction.
12471    NOTE: We do "too much work" here, because pairwise operations work on two
12472    registers-worth of operands in one go. Unfortunately we can't exploit those
12473    extra calculations to do the full operation in fewer steps, I don't think.
12474    Although all vector elements of the result but the first are ignored, we
12475    actually calculate the same result in each of the elements. An alternative
12476    such as initially loading a vector with zero to use as each of the second
12477    operands would use up an additional register and take an extra instruction,
12478    for no particular gain.  */
12479
12480 void
12481 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12482                       rtx (*reduc) (rtx, rtx, rtx))
12483 {
12484   machine_mode inner = GET_MODE_INNER (mode);
12485   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12486   rtx tmpsum = op1;
12487
12488   for (i = parts / 2; i >= 1; i /= 2)
12489     {
12490       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12491       emit_insn (reduc (dest, tmpsum, tmpsum));
12492       tmpsum = dest;
12493     }
12494 }
12495
12496 /* If VALS is a vector constant that can be loaded into a register
12497    using VDUP, generate instructions to do so and return an RTX to
12498    assign to the register.  Otherwise return NULL_RTX.  */
12499
12500 static rtx
12501 neon_vdup_constant (rtx vals)
12502 {
12503   machine_mode mode = GET_MODE (vals);
12504   machine_mode inner_mode = GET_MODE_INNER (mode);
12505   int n_elts = GET_MODE_NUNITS (mode);
12506   bool all_same = true;
12507   rtx x;
12508   int i;
12509
12510   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12511     return NULL_RTX;
12512
12513   for (i = 0; i < n_elts; ++i)
12514     {
12515       x = XVECEXP (vals, 0, i);
12516       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12517         all_same = false;
12518     }
12519
12520   if (!all_same)
12521     /* The elements are not all the same.  We could handle repeating
12522        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12523        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12524        vdup.i16).  */
12525     return NULL_RTX;
12526
12527   /* We can load this constant by using VDUP and a constant in a
12528      single ARM register.  This will be cheaper than a vector
12529      load.  */
12530
12531   x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12532   return gen_rtx_VEC_DUPLICATE (mode, x);
12533 }
12534
12535 /* Generate code to load VALS, which is a PARALLEL containing only
12536    constants (for vec_init) or CONST_VECTOR, efficiently into a
12537    register.  Returns an RTX to copy into the register, or NULL_RTX
12538    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12539
12540 rtx
12541 neon_make_constant (rtx vals)
12542 {
12543   machine_mode mode = GET_MODE (vals);
12544   rtx target;
12545   rtx const_vec = NULL_RTX;
12546   int n_elts = GET_MODE_NUNITS (mode);
12547   int n_const = 0;
12548   int i;
12549
12550   if (GET_CODE (vals) == CONST_VECTOR)
12551     const_vec = vals;
12552   else if (GET_CODE (vals) == PARALLEL)
12553     {
12554       /* A CONST_VECTOR must contain only CONST_INTs and
12555          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12556          Only store valid constants in a CONST_VECTOR.  */
12557       for (i = 0; i < n_elts; ++i)
12558         {
12559           rtx x = XVECEXP (vals, 0, i);
12560           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12561             n_const++;
12562         }
12563       if (n_const == n_elts)
12564         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12565     }
12566   else
12567     gcc_unreachable ();
12568
12569   if (const_vec != NULL
12570       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12571     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12572     return const_vec;
12573   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12574     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12575        pipeline cycle; creating the constant takes one or two ARM
12576        pipeline cycles.  */
12577     return target;
12578   else if (const_vec != NULL_RTX)
12579     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12580        (for either double or quad vectors).  We can not take advantage
12581        of single-cycle VLD1 because we need a PC-relative addressing
12582        mode.  */
12583     return const_vec;
12584   else
12585     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12586        We can not construct an initializer.  */
12587     return NULL_RTX;
12588 }
12589
12590 /* Initialize vector TARGET to VALS.  */
12591
12592 void
12593 neon_expand_vector_init (rtx target, rtx vals)
12594 {
12595   machine_mode mode = GET_MODE (target);
12596   machine_mode inner_mode = GET_MODE_INNER (mode);
12597   int n_elts = GET_MODE_NUNITS (mode);
12598   int n_var = 0, one_var = -1;
12599   bool all_same = true;
12600   rtx x, mem;
12601   int i;
12602
12603   for (i = 0; i < n_elts; ++i)
12604     {
12605       x = XVECEXP (vals, 0, i);
12606       if (!CONSTANT_P (x))
12607         ++n_var, one_var = i;
12608
12609       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12610         all_same = false;
12611     }
12612
12613   if (n_var == 0)
12614     {
12615       rtx constant = neon_make_constant (vals);
12616       if (constant != NULL_RTX)
12617         {
12618           emit_move_insn (target, constant);
12619           return;
12620         }
12621     }
12622
12623   /* Splat a single non-constant element if we can.  */
12624   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12625     {
12626       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12627       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12628       return;
12629     }
12630
12631   /* One field is non-constant.  Load constant then overwrite varying
12632      field.  This is more efficient than using the stack.  */
12633   if (n_var == 1)
12634     {
12635       rtx copy = copy_rtx (vals);
12636       rtx index = GEN_INT (one_var);
12637
12638       /* Load constant part of vector, substitute neighboring value for
12639          varying element.  */
12640       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12641       neon_expand_vector_init (target, copy);
12642
12643       /* Insert variable.  */
12644       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12645       switch (mode)
12646         {
12647         case V8QImode:
12648           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12649           break;
12650         case V16QImode:
12651           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12652           break;
12653         case V4HImode:
12654           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12655           break;
12656         case V8HImode:
12657           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12658           break;
12659         case V2SImode:
12660           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12661           break;
12662         case V4SImode:
12663           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12664           break;
12665         case V2SFmode:
12666           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12667           break;
12668         case V4SFmode:
12669           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12670           break;
12671         case V2DImode:
12672           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12673           break;
12674         default:
12675           gcc_unreachable ();
12676         }
12677       return;
12678     }
12679
12680   /* Construct the vector in memory one field at a time
12681      and load the whole vector.  */
12682   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12683   for (i = 0; i < n_elts; i++)
12684     emit_move_insn (adjust_address_nv (mem, inner_mode,
12685                                     i * GET_MODE_SIZE (inner_mode)),
12686                     XVECEXP (vals, 0, i));
12687   emit_move_insn (target, mem);
12688 }
12689
12690 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12691    ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
12692    reported source locations are bogus.  */
12693
12694 static void
12695 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12696               const char *err)
12697 {
12698   HOST_WIDE_INT lane;
12699
12700   gcc_assert (CONST_INT_P (operand));
12701
12702   lane = INTVAL (operand);
12703
12704   if (lane < low || lane >= high)
12705     error (err);
12706 }
12707
12708 /* Bounds-check lanes.  */
12709
12710 void
12711 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12712 {
12713   bounds_check (operand, low, high, "lane out of range");
12714 }
12715
12716 /* Bounds-check constants.  */
12717
12718 void
12719 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12720 {
12721   bounds_check (operand, low, high, "constant out of range");
12722 }
12723
12724 HOST_WIDE_INT
12725 neon_element_bits (machine_mode mode)
12726 {
12727   if (mode == DImode)
12728     return GET_MODE_BITSIZE (mode);
12729   else
12730     return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12731 }
12732
12733 \f
12734 /* Predicates for `match_operand' and `match_operator'.  */
12735
12736 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12737    WB is true if full writeback address modes are allowed and is false
12738    if limited writeback address modes (POST_INC and PRE_DEC) are
12739    allowed.  */
12740
12741 int
12742 arm_coproc_mem_operand (rtx op, bool wb)
12743 {
12744   rtx ind;
12745
12746   /* Reject eliminable registers.  */
12747   if (! (reload_in_progress || reload_completed || lra_in_progress)
12748       && (   reg_mentioned_p (frame_pointer_rtx, op)
12749           || reg_mentioned_p (arg_pointer_rtx, op)
12750           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12751           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12752           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12753           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12754     return FALSE;
12755
12756   /* Constants are converted into offsets from labels.  */
12757   if (!MEM_P (op))
12758     return FALSE;
12759
12760   ind = XEXP (op, 0);
12761
12762   if (reload_completed
12763       && (GET_CODE (ind) == LABEL_REF
12764           || (GET_CODE (ind) == CONST
12765               && GET_CODE (XEXP (ind, 0)) == PLUS
12766               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12767               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12768     return TRUE;
12769
12770   /* Match: (mem (reg)).  */
12771   if (REG_P (ind))
12772     return arm_address_register_rtx_p (ind, 0);
12773
12774   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12775      acceptable in any case (subject to verification by
12776      arm_address_register_rtx_p).  We need WB to be true to accept
12777      PRE_INC and POST_DEC.  */
12778   if (GET_CODE (ind) == POST_INC
12779       || GET_CODE (ind) == PRE_DEC
12780       || (wb
12781           && (GET_CODE (ind) == PRE_INC
12782               || GET_CODE (ind) == POST_DEC)))
12783     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12784
12785   if (wb
12786       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12787       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12788       && GET_CODE (XEXP (ind, 1)) == PLUS
12789       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12790     ind = XEXP (ind, 1);
12791
12792   /* Match:
12793      (plus (reg)
12794            (const)).  */
12795   if (GET_CODE (ind) == PLUS
12796       && REG_P (XEXP (ind, 0))
12797       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12798       && CONST_INT_P (XEXP (ind, 1))
12799       && INTVAL (XEXP (ind, 1)) > -1024
12800       && INTVAL (XEXP (ind, 1)) <  1024
12801       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12802     return TRUE;
12803
12804   return FALSE;
12805 }
12806
12807 /* Return TRUE if OP is a memory operand which we can load or store a vector
12808    to/from. TYPE is one of the following values:
12809     0 - Vector load/stor (vldr)
12810     1 - Core registers (ldm)
12811     2 - Element/structure loads (vld1)
12812  */
12813 int
12814 neon_vector_mem_operand (rtx op, int type, bool strict)
12815 {
12816   rtx ind;
12817
12818   /* Reject eliminable registers.  */
12819   if (! (reload_in_progress || reload_completed)
12820       && (   reg_mentioned_p (frame_pointer_rtx, op)
12821           || reg_mentioned_p (arg_pointer_rtx, op)
12822           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12823           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12824           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12825           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12826     return !strict;
12827
12828   /* Constants are converted into offsets from labels.  */
12829   if (!MEM_P (op))
12830     return FALSE;
12831
12832   ind = XEXP (op, 0);
12833
12834   if (reload_completed
12835       && (GET_CODE (ind) == LABEL_REF
12836           || (GET_CODE (ind) == CONST
12837               && GET_CODE (XEXP (ind, 0)) == PLUS
12838               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12839               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12840     return TRUE;
12841
12842   /* Match: (mem (reg)).  */
12843   if (REG_P (ind))
12844     return arm_address_register_rtx_p (ind, 0);
12845
12846   /* Allow post-increment with Neon registers.  */
12847   if ((type != 1 && GET_CODE (ind) == POST_INC)
12848       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12849     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12850
12851   /* Allow post-increment by register for VLDn */
12852   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12853       && GET_CODE (XEXP (ind, 1)) == PLUS
12854       && REG_P (XEXP (XEXP (ind, 1), 1)))
12855      return true;
12856
12857   /* Match:
12858      (plus (reg)
12859           (const)).  */
12860   if (type == 0
12861       && GET_CODE (ind) == PLUS
12862       && REG_P (XEXP (ind, 0))
12863       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12864       && CONST_INT_P (XEXP (ind, 1))
12865       && INTVAL (XEXP (ind, 1)) > -1024
12866       /* For quad modes, we restrict the constant offset to be slightly less
12867          than what the instruction format permits.  We have no such constraint
12868          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12869       && (INTVAL (XEXP (ind, 1))
12870           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12871       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12872     return TRUE;
12873
12874   return FALSE;
12875 }
12876
12877 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12878    type.  */
12879 int
12880 neon_struct_mem_operand (rtx op)
12881 {
12882   rtx ind;
12883
12884   /* Reject eliminable registers.  */
12885   if (! (reload_in_progress || reload_completed)
12886       && (   reg_mentioned_p (frame_pointer_rtx, op)
12887           || reg_mentioned_p (arg_pointer_rtx, op)
12888           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12889           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12890           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12891           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12892     return FALSE;
12893
12894   /* Constants are converted into offsets from labels.  */
12895   if (!MEM_P (op))
12896     return FALSE;
12897
12898   ind = XEXP (op, 0);
12899
12900   if (reload_completed
12901       && (GET_CODE (ind) == LABEL_REF
12902           || (GET_CODE (ind) == CONST
12903               && GET_CODE (XEXP (ind, 0)) == PLUS
12904               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12905               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12906     return TRUE;
12907
12908   /* Match: (mem (reg)).  */
12909   if (REG_P (ind))
12910     return arm_address_register_rtx_p (ind, 0);
12911
12912   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12913   if (GET_CODE (ind) == POST_INC
12914       || GET_CODE (ind) == PRE_DEC)
12915     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12916
12917   return FALSE;
12918 }
12919
12920 /* Return true if X is a register that will be eliminated later on.  */
12921 int
12922 arm_eliminable_register (rtx x)
12923 {
12924   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12925                        || REGNO (x) == ARG_POINTER_REGNUM
12926                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12927                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12928 }
12929
12930 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12931    coprocessor registers.  Otherwise return NO_REGS.  */
12932
12933 enum reg_class
12934 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12935 {
12936   if (mode == HFmode)
12937     {
12938       if (!TARGET_NEON_FP16)
12939         return GENERAL_REGS;
12940       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12941         return NO_REGS;
12942       return GENERAL_REGS;
12943     }
12944
12945   /* The neon move patterns handle all legitimate vector and struct
12946      addresses.  */
12947   if (TARGET_NEON
12948       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12949       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12950           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12951           || VALID_NEON_STRUCT_MODE (mode)))
12952     return NO_REGS;
12953
12954   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12955     return NO_REGS;
12956
12957   return GENERAL_REGS;
12958 }
12959
12960 /* Values which must be returned in the most-significant end of the return
12961    register.  */
12962
12963 static bool
12964 arm_return_in_msb (const_tree valtype)
12965 {
12966   return (TARGET_AAPCS_BASED
12967           && BYTES_BIG_ENDIAN
12968           && (AGGREGATE_TYPE_P (valtype)
12969               || TREE_CODE (valtype) == COMPLEX_TYPE
12970               || FIXED_POINT_TYPE_P (valtype)));
12971 }
12972
12973 /* Return TRUE if X references a SYMBOL_REF.  */
12974 int
12975 symbol_mentioned_p (rtx x)
12976 {
12977   const char * fmt;
12978   int i;
12979
12980   if (GET_CODE (x) == SYMBOL_REF)
12981     return 1;
12982
12983   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12984      are constant offsets, not symbols.  */
12985   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12986     return 0;
12987
12988   fmt = GET_RTX_FORMAT (GET_CODE (x));
12989
12990   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12991     {
12992       if (fmt[i] == 'E')
12993         {
12994           int j;
12995
12996           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12997             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12998               return 1;
12999         }
13000       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13001         return 1;
13002     }
13003
13004   return 0;
13005 }
13006
13007 /* Return TRUE if X references a LABEL_REF.  */
13008 int
13009 label_mentioned_p (rtx x)
13010 {
13011   const char * fmt;
13012   int i;
13013
13014   if (GET_CODE (x) == LABEL_REF)
13015     return 1;
13016
13017   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13018      instruction, but they are constant offsets, not symbols.  */
13019   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13020     return 0;
13021
13022   fmt = GET_RTX_FORMAT (GET_CODE (x));
13023   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13024     {
13025       if (fmt[i] == 'E')
13026         {
13027           int j;
13028
13029           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13030             if (label_mentioned_p (XVECEXP (x, i, j)))
13031               return 1;
13032         }
13033       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13034         return 1;
13035     }
13036
13037   return 0;
13038 }
13039
13040 int
13041 tls_mentioned_p (rtx x)
13042 {
13043   switch (GET_CODE (x))
13044     {
13045     case CONST:
13046       return tls_mentioned_p (XEXP (x, 0));
13047
13048     case UNSPEC:
13049       if (XINT (x, 1) == UNSPEC_TLS)
13050         return 1;
13051
13052     default:
13053       return 0;
13054     }
13055 }
13056
13057 /* Must not copy any rtx that uses a pc-relative address.  */
13058
13059 static bool
13060 arm_cannot_copy_insn_p (rtx_insn *insn)
13061 {
13062   /* The tls call insn cannot be copied, as it is paired with a data
13063      word.  */
13064   if (recog_memoized (insn) == CODE_FOR_tlscall)
13065     return true;
13066
13067   subrtx_iterator::array_type array;
13068   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13069     {
13070       const_rtx x = *iter;
13071       if (GET_CODE (x) == UNSPEC
13072           && (XINT (x, 1) == UNSPEC_PIC_BASE
13073               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13074         return true;
13075     }
13076   return false;
13077 }
13078
13079 enum rtx_code
13080 minmax_code (rtx x)
13081 {
13082   enum rtx_code code = GET_CODE (x);
13083
13084   switch (code)
13085     {
13086     case SMAX:
13087       return GE;
13088     case SMIN:
13089       return LE;
13090     case UMIN:
13091       return LEU;
13092     case UMAX:
13093       return GEU;
13094     default:
13095       gcc_unreachable ();
13096     }
13097 }
13098
13099 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
13100
13101 bool
13102 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13103                         int *mask, bool *signed_sat)
13104 {
13105   /* The high bound must be a power of two minus one.  */
13106   int log = exact_log2 (INTVAL (hi_bound) + 1);
13107   if (log == -1)
13108     return false;
13109
13110   /* The low bound is either zero (for usat) or one less than the
13111      negation of the high bound (for ssat).  */
13112   if (INTVAL (lo_bound) == 0)
13113     {
13114       if (mask)
13115         *mask = log;
13116       if (signed_sat)
13117         *signed_sat = false;
13118
13119       return true;
13120     }
13121
13122   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13123     {
13124       if (mask)
13125         *mask = log + 1;
13126       if (signed_sat)
13127         *signed_sat = true;
13128
13129       return true;
13130     }
13131
13132   return false;
13133 }
13134
13135 /* Return 1 if memory locations are adjacent.  */
13136 int
13137 adjacent_mem_locations (rtx a, rtx b)
13138 {
13139   /* We don't guarantee to preserve the order of these memory refs.  */
13140   if (volatile_refs_p (a) || volatile_refs_p (b))
13141     return 0;
13142
13143   if ((REG_P (XEXP (a, 0))
13144        || (GET_CODE (XEXP (a, 0)) == PLUS
13145            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13146       && (REG_P (XEXP (b, 0))
13147           || (GET_CODE (XEXP (b, 0)) == PLUS
13148               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13149     {
13150       HOST_WIDE_INT val0 = 0, val1 = 0;
13151       rtx reg0, reg1;
13152       int val_diff;
13153
13154       if (GET_CODE (XEXP (a, 0)) == PLUS)
13155         {
13156           reg0 = XEXP (XEXP (a, 0), 0);
13157           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13158         }
13159       else
13160         reg0 = XEXP (a, 0);
13161
13162       if (GET_CODE (XEXP (b, 0)) == PLUS)
13163         {
13164           reg1 = XEXP (XEXP (b, 0), 0);
13165           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13166         }
13167       else
13168         reg1 = XEXP (b, 0);
13169
13170       /* Don't accept any offset that will require multiple
13171          instructions to handle, since this would cause the
13172          arith_adjacentmem pattern to output an overlong sequence.  */
13173       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13174         return 0;
13175
13176       /* Don't allow an eliminable register: register elimination can make
13177          the offset too large.  */
13178       if (arm_eliminable_register (reg0))
13179         return 0;
13180
13181       val_diff = val1 - val0;
13182
13183       if (arm_ld_sched)
13184         {
13185           /* If the target has load delay slots, then there's no benefit
13186              to using an ldm instruction unless the offset is zero and
13187              we are optimizing for size.  */
13188           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13189                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13190                   && (val_diff == 4 || val_diff == -4));
13191         }
13192
13193       return ((REGNO (reg0) == REGNO (reg1))
13194               && (val_diff == 4 || val_diff == -4));
13195     }
13196
13197   return 0;
13198 }
13199
13200 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13201    for load operations, false for store operations.  CONSECUTIVE is true
13202    if the register numbers in the operation must be consecutive in the register
13203    bank. RETURN_PC is true if value is to be loaded in PC.
13204    The pattern we are trying to match for load is:
13205      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13206       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13207        :
13208        :
13209       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13210      ]
13211      where
13212      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13213      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13214      3.  If consecutive is TRUE, then for kth register being loaded,
13215          REGNO (R_dk) = REGNO (R_d0) + k.
13216    The pattern for store is similar.  */
13217 bool
13218 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13219                      bool consecutive, bool return_pc)
13220 {
13221   HOST_WIDE_INT count = XVECLEN (op, 0);
13222   rtx reg, mem, addr;
13223   unsigned regno;
13224   unsigned first_regno;
13225   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13226   rtx elt;
13227   bool addr_reg_in_reglist = false;
13228   bool update = false;
13229   int reg_increment;
13230   int offset_adj;
13231   int regs_per_val;
13232
13233   /* If not in SImode, then registers must be consecutive
13234      (e.g., VLDM instructions for DFmode).  */
13235   gcc_assert ((mode == SImode) || consecutive);
13236   /* Setting return_pc for stores is illegal.  */
13237   gcc_assert (!return_pc || load);
13238
13239   /* Set up the increments and the regs per val based on the mode.  */
13240   reg_increment = GET_MODE_SIZE (mode);
13241   regs_per_val = reg_increment / 4;
13242   offset_adj = return_pc ? 1 : 0;
13243
13244   if (count <= 1
13245       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13246       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13247     return false;
13248
13249   /* Check if this is a write-back.  */
13250   elt = XVECEXP (op, 0, offset_adj);
13251   if (GET_CODE (SET_SRC (elt)) == PLUS)
13252     {
13253       i++;
13254       base = 1;
13255       update = true;
13256
13257       /* The offset adjustment must be the number of registers being
13258          popped times the size of a single register.  */
13259       if (!REG_P (SET_DEST (elt))
13260           || !REG_P (XEXP (SET_SRC (elt), 0))
13261           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13262           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13263           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13264              ((count - 1 - offset_adj) * reg_increment))
13265         return false;
13266     }
13267
13268   i = i + offset_adj;
13269   base = base + offset_adj;
13270   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13271      success depends on the type: VLDM can do just one reg,
13272      LDM must do at least two.  */
13273   if ((count <= i) && (mode == SImode))
13274       return false;
13275
13276   elt = XVECEXP (op, 0, i - 1);
13277   if (GET_CODE (elt) != SET)
13278     return false;
13279
13280   if (load)
13281     {
13282       reg = SET_DEST (elt);
13283       mem = SET_SRC (elt);
13284     }
13285   else
13286     {
13287       reg = SET_SRC (elt);
13288       mem = SET_DEST (elt);
13289     }
13290
13291   if (!REG_P (reg) || !MEM_P (mem))
13292     return false;
13293
13294   regno = REGNO (reg);
13295   first_regno = regno;
13296   addr = XEXP (mem, 0);
13297   if (GET_CODE (addr) == PLUS)
13298     {
13299       if (!CONST_INT_P (XEXP (addr, 1)))
13300         return false;
13301
13302       offset = INTVAL (XEXP (addr, 1));
13303       addr = XEXP (addr, 0);
13304     }
13305
13306   if (!REG_P (addr))
13307     return false;
13308
13309   /* Don't allow SP to be loaded unless it is also the base register. It
13310      guarantees that SP is reset correctly when an LDM instruction
13311      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13312   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13313     return false;
13314
13315   for (; i < count; i++)
13316     {
13317       elt = XVECEXP (op, 0, i);
13318       if (GET_CODE (elt) != SET)
13319         return false;
13320
13321       if (load)
13322         {
13323           reg = SET_DEST (elt);
13324           mem = SET_SRC (elt);
13325         }
13326       else
13327         {
13328           reg = SET_SRC (elt);
13329           mem = SET_DEST (elt);
13330         }
13331
13332       if (!REG_P (reg)
13333           || GET_MODE (reg) != mode
13334           || REGNO (reg) <= regno
13335           || (consecutive
13336               && (REGNO (reg) !=
13337                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13338           /* Don't allow SP to be loaded unless it is also the base register. It
13339              guarantees that SP is reset correctly when an LDM instruction
13340              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13341           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13342           || !MEM_P (mem)
13343           || GET_MODE (mem) != mode
13344           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13345                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13346                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13347                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13348                    offset + (i - base) * reg_increment))
13349               && (!REG_P (XEXP (mem, 0))
13350                   || offset + (i - base) * reg_increment != 0)))
13351         return false;
13352
13353       regno = REGNO (reg);
13354       if (regno == REGNO (addr))
13355         addr_reg_in_reglist = true;
13356     }
13357
13358   if (load)
13359     {
13360       if (update && addr_reg_in_reglist)
13361         return false;
13362
13363       /* For Thumb-1, address register is always modified - either by write-back
13364          or by explicit load.  If the pattern does not describe an update,
13365          then the address register must be in the list of loaded registers.  */
13366       if (TARGET_THUMB1)
13367         return update || addr_reg_in_reglist;
13368     }
13369
13370   return true;
13371 }
13372
13373 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13374    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13375    instruction.  ADD_OFFSET is nonzero if the base address register needs
13376    to be modified with an add instruction before we can use it.  */
13377
13378 static bool
13379 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13380                                  int nops, HOST_WIDE_INT add_offset)
13381  {
13382   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13383      if the offset isn't small enough.  The reason 2 ldrs are faster
13384      is because these ARMs are able to do more than one cache access
13385      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13386      whilst the ARM8 has a double bandwidth cache.  This means that
13387      these cores can do both an instruction fetch and a data fetch in
13388      a single cycle, so the trick of calculating the address into a
13389      scratch register (one of the result regs) and then doing a load
13390      multiple actually becomes slower (and no smaller in code size).
13391      That is the transformation
13392
13393         ldr     rd1, [rbase + offset]
13394         ldr     rd2, [rbase + offset + 4]
13395
13396      to
13397
13398         add     rd1, rbase, offset
13399         ldmia   rd1, {rd1, rd2}
13400
13401      produces worse code -- '3 cycles + any stalls on rd2' instead of
13402      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13403      access per cycle, the first sequence could never complete in less
13404      than 6 cycles, whereas the ldm sequence would only take 5 and
13405      would make better use of sequential accesses if not hitting the
13406      cache.
13407
13408      We cheat here and test 'arm_ld_sched' which we currently know to
13409      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13410      changes, then the test below needs to be reworked.  */
13411   if (nops == 2 && arm_ld_sched && add_offset != 0)
13412     return false;
13413
13414   /* XScale has load-store double instructions, but they have stricter
13415      alignment requirements than load-store multiple, so we cannot
13416      use them.
13417
13418      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13419      the pipeline until completion.
13420
13421         NREGS           CYCLES
13422           1               3
13423           2               4
13424           3               5
13425           4               6
13426
13427      An ldr instruction takes 1-3 cycles, but does not block the
13428      pipeline.
13429
13430         NREGS           CYCLES
13431           1              1-3
13432           2              2-6
13433           3              3-9
13434           4              4-12
13435
13436      Best case ldr will always win.  However, the more ldr instructions
13437      we issue, the less likely we are to be able to schedule them well.
13438      Using ldr instructions also increases code size.
13439
13440      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13441      for counts of 3 or 4 regs.  */
13442   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13443     return false;
13444   return true;
13445 }
13446
13447 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13448    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13449    an array ORDER which describes the sequence to use when accessing the
13450    offsets that produces an ascending order.  In this sequence, each
13451    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13452    must have been filled in with the lowest offset by the caller.
13453    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13454    we use to verify that ORDER produces an ascending order of registers.
13455    Return true if it was possible to construct such an order, false if
13456    not.  */
13457
13458 static bool
13459 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13460                       int *unsorted_regs)
13461 {
13462   int i;
13463   for (i = 1; i < nops; i++)
13464     {
13465       int j;
13466
13467       order[i] = order[i - 1];
13468       for (j = 0; j < nops; j++)
13469         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13470           {
13471             /* We must find exactly one offset that is higher than the
13472                previous one by 4.  */
13473             if (order[i] != order[i - 1])
13474               return false;
13475             order[i] = j;
13476           }
13477       if (order[i] == order[i - 1])
13478         return false;
13479       /* The register numbers must be ascending.  */
13480       if (unsorted_regs != NULL
13481           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13482         return false;
13483     }
13484   return true;
13485 }
13486
13487 /* Used to determine in a peephole whether a sequence of load
13488    instructions can be changed into a load-multiple instruction.
13489    NOPS is the number of separate load instructions we are examining.  The
13490    first NOPS entries in OPERANDS are the destination registers, the
13491    next NOPS entries are memory operands.  If this function is
13492    successful, *BASE is set to the common base register of the memory
13493    accesses; *LOAD_OFFSET is set to the first memory location's offset
13494    from that base register.
13495    REGS is an array filled in with the destination register numbers.
13496    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13497    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13498    the sequence of registers in REGS matches the loads from ascending memory
13499    locations, and the function verifies that the register numbers are
13500    themselves ascending.  If CHECK_REGS is false, the register numbers
13501    are stored in the order they are found in the operands.  */
13502 static int
13503 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13504                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13505 {
13506   int unsorted_regs[MAX_LDM_STM_OPS];
13507   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13508   int order[MAX_LDM_STM_OPS];
13509   rtx base_reg_rtx = NULL;
13510   int base_reg = -1;
13511   int i, ldm_case;
13512
13513   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13514      easily extended if required.  */
13515   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13516
13517   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13518
13519   /* Loop over the operands and check that the memory references are
13520      suitable (i.e. immediate offsets from the same base register).  At
13521      the same time, extract the target register, and the memory
13522      offsets.  */
13523   for (i = 0; i < nops; i++)
13524     {
13525       rtx reg;
13526       rtx offset;
13527
13528       /* Convert a subreg of a mem into the mem itself.  */
13529       if (GET_CODE (operands[nops + i]) == SUBREG)
13530         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13531
13532       gcc_assert (MEM_P (operands[nops + i]));
13533
13534       /* Don't reorder volatile memory references; it doesn't seem worth
13535          looking for the case where the order is ok anyway.  */
13536       if (MEM_VOLATILE_P (operands[nops + i]))
13537         return 0;
13538
13539       offset = const0_rtx;
13540
13541       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13542            || (GET_CODE (reg) == SUBREG
13543                && REG_P (reg = SUBREG_REG (reg))))
13544           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13545               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13546                   || (GET_CODE (reg) == SUBREG
13547                       && REG_P (reg = SUBREG_REG (reg))))
13548               && (CONST_INT_P (offset
13549                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13550         {
13551           if (i == 0)
13552             {
13553               base_reg = REGNO (reg);
13554               base_reg_rtx = reg;
13555               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13556                 return 0;
13557             }
13558           else if (base_reg != (int) REGNO (reg))
13559             /* Not addressed from the same base register.  */
13560             return 0;
13561
13562           unsorted_regs[i] = (REG_P (operands[i])
13563                               ? REGNO (operands[i])
13564                               : REGNO (SUBREG_REG (operands[i])));
13565
13566           /* If it isn't an integer register, or if it overwrites the
13567              base register but isn't the last insn in the list, then
13568              we can't do this.  */
13569           if (unsorted_regs[i] < 0
13570               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13571               || unsorted_regs[i] > 14
13572               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13573             return 0;
13574
13575           /* Don't allow SP to be loaded unless it is also the base
13576              register.  It guarantees that SP is reset correctly when
13577              an LDM instruction is interrupted.  Otherwise, we might
13578              end up with a corrupt stack.  */
13579           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13580             return 0;
13581
13582           unsorted_offsets[i] = INTVAL (offset);
13583           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13584             order[0] = i;
13585         }
13586       else
13587         /* Not a suitable memory address.  */
13588         return 0;
13589     }
13590
13591   /* All the useful information has now been extracted from the
13592      operands into unsorted_regs and unsorted_offsets; additionally,
13593      order[0] has been set to the lowest offset in the list.  Sort
13594      the offsets into order, verifying that they are adjacent, and
13595      check that the register numbers are ascending.  */
13596   if (!compute_offset_order (nops, unsorted_offsets, order,
13597                              check_regs ? unsorted_regs : NULL))
13598     return 0;
13599
13600   if (saved_order)
13601     memcpy (saved_order, order, sizeof order);
13602
13603   if (base)
13604     {
13605       *base = base_reg;
13606
13607       for (i = 0; i < nops; i++)
13608         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13609
13610       *load_offset = unsorted_offsets[order[0]];
13611     }
13612
13613   if (TARGET_THUMB1
13614       && !peep2_reg_dead_p (nops, base_reg_rtx))
13615     return 0;
13616
13617   if (unsorted_offsets[order[0]] == 0)
13618     ldm_case = 1; /* ldmia */
13619   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13620     ldm_case = 2; /* ldmib */
13621   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13622     ldm_case = 3; /* ldmda */
13623   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13624     ldm_case = 4; /* ldmdb */
13625   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13626            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13627     ldm_case = 5;
13628   else
13629     return 0;
13630
13631   if (!multiple_operation_profitable_p (false, nops,
13632                                         ldm_case == 5
13633                                         ? unsorted_offsets[order[0]] : 0))
13634     return 0;
13635
13636   return ldm_case;
13637 }
13638
13639 /* Used to determine in a peephole whether a sequence of store instructions can
13640    be changed into a store-multiple instruction.
13641    NOPS is the number of separate store instructions we are examining.
13642    NOPS_TOTAL is the total number of instructions recognized by the peephole
13643    pattern.
13644    The first NOPS entries in OPERANDS are the source registers, the next
13645    NOPS entries are memory operands.  If this function is successful, *BASE is
13646    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13647    to the first memory location's offset from that base register.  REGS is an
13648    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13649    likewise filled with the corresponding rtx's.
13650    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13651    numbers to an ascending order of stores.
13652    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13653    from ascending memory locations, and the function verifies that the register
13654    numbers are themselves ascending.  If CHECK_REGS is false, the register
13655    numbers are stored in the order they are found in the operands.  */
13656 static int
13657 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13658                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13659                          HOST_WIDE_INT *load_offset, bool check_regs)
13660 {
13661   int unsorted_regs[MAX_LDM_STM_OPS];
13662   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13663   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13664   int order[MAX_LDM_STM_OPS];
13665   int base_reg = -1;
13666   rtx base_reg_rtx = NULL;
13667   int i, stm_case;
13668
13669   /* Write back of base register is currently only supported for Thumb 1.  */
13670   int base_writeback = TARGET_THUMB1;
13671
13672   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13673      easily extended if required.  */
13674   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13675
13676   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13677
13678   /* Loop over the operands and check that the memory references are
13679      suitable (i.e. immediate offsets from the same base register).  At
13680      the same time, extract the target register, and the memory
13681      offsets.  */
13682   for (i = 0; i < nops; i++)
13683     {
13684       rtx reg;
13685       rtx offset;
13686
13687       /* Convert a subreg of a mem into the mem itself.  */
13688       if (GET_CODE (operands[nops + i]) == SUBREG)
13689         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13690
13691       gcc_assert (MEM_P (operands[nops + i]));
13692
13693       /* Don't reorder volatile memory references; it doesn't seem worth
13694          looking for the case where the order is ok anyway.  */
13695       if (MEM_VOLATILE_P (operands[nops + i]))
13696         return 0;
13697
13698       offset = const0_rtx;
13699
13700       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13701            || (GET_CODE (reg) == SUBREG
13702                && REG_P (reg = SUBREG_REG (reg))))
13703           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13704               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13705                   || (GET_CODE (reg) == SUBREG
13706                       && REG_P (reg = SUBREG_REG (reg))))
13707               && (CONST_INT_P (offset
13708                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13709         {
13710           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13711                                   ? operands[i] : SUBREG_REG (operands[i]));
13712           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13713
13714           if (i == 0)
13715             {
13716               base_reg = REGNO (reg);
13717               base_reg_rtx = reg;
13718               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13719                 return 0;
13720             }
13721           else if (base_reg != (int) REGNO (reg))
13722             /* Not addressed from the same base register.  */
13723             return 0;
13724
13725           /* If it isn't an integer register, then we can't do this.  */
13726           if (unsorted_regs[i] < 0
13727               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13728               /* The effects are unpredictable if the base register is
13729                  both updated and stored.  */
13730               || (base_writeback && unsorted_regs[i] == base_reg)
13731               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13732               || unsorted_regs[i] > 14)
13733             return 0;
13734
13735           unsorted_offsets[i] = INTVAL (offset);
13736           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13737             order[0] = i;
13738         }
13739       else
13740         /* Not a suitable memory address.  */
13741         return 0;
13742     }
13743
13744   /* All the useful information has now been extracted from the
13745      operands into unsorted_regs and unsorted_offsets; additionally,
13746      order[0] has been set to the lowest offset in the list.  Sort
13747      the offsets into order, verifying that they are adjacent, and
13748      check that the register numbers are ascending.  */
13749   if (!compute_offset_order (nops, unsorted_offsets, order,
13750                              check_regs ? unsorted_regs : NULL))
13751     return 0;
13752
13753   if (saved_order)
13754     memcpy (saved_order, order, sizeof order);
13755
13756   if (base)
13757     {
13758       *base = base_reg;
13759
13760       for (i = 0; i < nops; i++)
13761         {
13762           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13763           if (reg_rtxs)
13764             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13765         }
13766
13767       *load_offset = unsorted_offsets[order[0]];
13768     }
13769
13770   if (TARGET_THUMB1
13771       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13772     return 0;
13773
13774   if (unsorted_offsets[order[0]] == 0)
13775     stm_case = 1; /* stmia */
13776   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13777     stm_case = 2; /* stmib */
13778   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13779     stm_case = 3; /* stmda */
13780   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13781     stm_case = 4; /* stmdb */
13782   else
13783     return 0;
13784
13785   if (!multiple_operation_profitable_p (false, nops, 0))
13786     return 0;
13787
13788   return stm_case;
13789 }
13790 \f
13791 /* Routines for use in generating RTL.  */
13792
13793 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13794    the instruction; REGS and MEMS are arrays containing the operands.
13795    BASEREG is the base register to be used in addressing the memory operands.
13796    WBACK_OFFSET is nonzero if the instruction should update the base
13797    register.  */
13798
13799 static rtx
13800 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13801                          HOST_WIDE_INT wback_offset)
13802 {
13803   int i = 0, j;
13804   rtx result;
13805
13806   if (!multiple_operation_profitable_p (false, count, 0))
13807     {
13808       rtx seq;
13809
13810       start_sequence ();
13811
13812       for (i = 0; i < count; i++)
13813         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13814
13815       if (wback_offset != 0)
13816         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13817
13818       seq = get_insns ();
13819       end_sequence ();
13820
13821       return seq;
13822     }
13823
13824   result = gen_rtx_PARALLEL (VOIDmode,
13825                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13826   if (wback_offset != 0)
13827     {
13828       XVECEXP (result, 0, 0)
13829         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13830       i = 1;
13831       count++;
13832     }
13833
13834   for (j = 0; i < count; i++, j++)
13835     XVECEXP (result, 0, i)
13836       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13837
13838   return result;
13839 }
13840
13841 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13842    the instruction; REGS and MEMS are arrays containing the operands.
13843    BASEREG is the base register to be used in addressing the memory operands.
13844    WBACK_OFFSET is nonzero if the instruction should update the base
13845    register.  */
13846
13847 static rtx
13848 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13849                           HOST_WIDE_INT wback_offset)
13850 {
13851   int i = 0, j;
13852   rtx result;
13853
13854   if (GET_CODE (basereg) == PLUS)
13855     basereg = XEXP (basereg, 0);
13856
13857   if (!multiple_operation_profitable_p (false, count, 0))
13858     {
13859       rtx seq;
13860
13861       start_sequence ();
13862
13863       for (i = 0; i < count; i++)
13864         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13865
13866       if (wback_offset != 0)
13867         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13868
13869       seq = get_insns ();
13870       end_sequence ();
13871
13872       return seq;
13873     }
13874
13875   result = gen_rtx_PARALLEL (VOIDmode,
13876                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13877   if (wback_offset != 0)
13878     {
13879       XVECEXP (result, 0, 0)
13880         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13881       i = 1;
13882       count++;
13883     }
13884
13885   for (j = 0; i < count; i++, j++)
13886     XVECEXP (result, 0, i)
13887       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13888
13889   return result;
13890 }
13891
13892 /* Generate either a load-multiple or a store-multiple instruction.  This
13893    function can be used in situations where we can start with a single MEM
13894    rtx and adjust its address upwards.
13895    COUNT is the number of operations in the instruction, not counting a
13896    possible update of the base register.  REGS is an array containing the
13897    register operands.
13898    BASEREG is the base register to be used in addressing the memory operands,
13899    which are constructed from BASEMEM.
13900    WRITE_BACK specifies whether the generated instruction should include an
13901    update of the base register.
13902    OFFSETP is used to pass an offset to and from this function; this offset
13903    is not used when constructing the address (instead BASEMEM should have an
13904    appropriate offset in its address), it is used only for setting
13905    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13906
13907 static rtx
13908 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13909                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13910 {
13911   rtx mems[MAX_LDM_STM_OPS];
13912   HOST_WIDE_INT offset = *offsetp;
13913   int i;
13914
13915   gcc_assert (count <= MAX_LDM_STM_OPS);
13916
13917   if (GET_CODE (basereg) == PLUS)
13918     basereg = XEXP (basereg, 0);
13919
13920   for (i = 0; i < count; i++)
13921     {
13922       rtx addr = plus_constant (Pmode, basereg, i * 4);
13923       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13924       offset += 4;
13925     }
13926
13927   if (write_back)
13928     *offsetp = offset;
13929
13930   if (is_load)
13931     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13932                                     write_back ? 4 * count : 0);
13933   else
13934     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13935                                      write_back ? 4 * count : 0);
13936 }
13937
13938 rtx
13939 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13940                        rtx basemem, HOST_WIDE_INT *offsetp)
13941 {
13942   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13943                               offsetp);
13944 }
13945
13946 rtx
13947 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13948                         rtx basemem, HOST_WIDE_INT *offsetp)
13949 {
13950   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13951                               offsetp);
13952 }
13953
13954 /* Called from a peephole2 expander to turn a sequence of loads into an
13955    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13956    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13957    is true if we can reorder the registers because they are used commutatively
13958    subsequently.
13959    Returns true iff we could generate a new instruction.  */
13960
13961 bool
13962 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13963 {
13964   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13965   rtx mems[MAX_LDM_STM_OPS];
13966   int i, j, base_reg;
13967   rtx base_reg_rtx;
13968   HOST_WIDE_INT offset;
13969   int write_back = FALSE;
13970   int ldm_case;
13971   rtx addr;
13972
13973   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13974                                      &base_reg, &offset, !sort_regs);
13975
13976   if (ldm_case == 0)
13977     return false;
13978
13979   if (sort_regs)
13980     for (i = 0; i < nops - 1; i++)
13981       for (j = i + 1; j < nops; j++)
13982         if (regs[i] > regs[j])
13983           {
13984             int t = regs[i];
13985             regs[i] = regs[j];
13986             regs[j] = t;
13987           }
13988   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13989
13990   if (TARGET_THUMB1)
13991     {
13992       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13993       gcc_assert (ldm_case == 1 || ldm_case == 5);
13994       write_back = TRUE;
13995     }
13996
13997   if (ldm_case == 5)
13998     {
13999       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14000       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14001       offset = 0;
14002       if (!TARGET_THUMB1)
14003         {
14004           base_reg = regs[0];
14005           base_reg_rtx = newbase;
14006         }
14007     }
14008
14009   for (i = 0; i < nops; i++)
14010     {
14011       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14012       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14013                                               SImode, addr, 0);
14014     }
14015   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14016                                       write_back ? offset + i * 4 : 0));
14017   return true;
14018 }
14019
14020 /* Called from a peephole2 expander to turn a sequence of stores into an
14021    STM instruction.  OPERANDS are the operands found by the peephole matcher;
14022    NOPS indicates how many separate stores we are trying to combine.
14023    Returns true iff we could generate a new instruction.  */
14024
14025 bool
14026 gen_stm_seq (rtx *operands, int nops)
14027 {
14028   int i;
14029   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14030   rtx mems[MAX_LDM_STM_OPS];
14031   int base_reg;
14032   rtx base_reg_rtx;
14033   HOST_WIDE_INT offset;
14034   int write_back = FALSE;
14035   int stm_case;
14036   rtx addr;
14037   bool base_reg_dies;
14038
14039   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14040                                       mem_order, &base_reg, &offset, true);
14041
14042   if (stm_case == 0)
14043     return false;
14044
14045   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14046
14047   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14048   if (TARGET_THUMB1)
14049     {
14050       gcc_assert (base_reg_dies);
14051       write_back = TRUE;
14052     }
14053
14054   if (stm_case == 5)
14055     {
14056       gcc_assert (base_reg_dies);
14057       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14058       offset = 0;
14059     }
14060
14061   addr = plus_constant (Pmode, base_reg_rtx, offset);
14062
14063   for (i = 0; i < nops; i++)
14064     {
14065       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14066       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14067                                               SImode, addr, 0);
14068     }
14069   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14070                                        write_back ? offset + i * 4 : 0));
14071   return true;
14072 }
14073
14074 /* Called from a peephole2 expander to turn a sequence of stores that are
14075    preceded by constant loads into an STM instruction.  OPERANDS are the
14076    operands found by the peephole matcher; NOPS indicates how many
14077    separate stores we are trying to combine; there are 2 * NOPS
14078    instructions in the peephole.
14079    Returns true iff we could generate a new instruction.  */
14080
14081 bool
14082 gen_const_stm_seq (rtx *operands, int nops)
14083 {
14084   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14085   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14086   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14087   rtx mems[MAX_LDM_STM_OPS];
14088   int base_reg;
14089   rtx base_reg_rtx;
14090   HOST_WIDE_INT offset;
14091   int write_back = FALSE;
14092   int stm_case;
14093   rtx addr;
14094   bool base_reg_dies;
14095   int i, j;
14096   HARD_REG_SET allocated;
14097
14098   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14099                                       mem_order, &base_reg, &offset, false);
14100
14101   if (stm_case == 0)
14102     return false;
14103
14104   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14105
14106   /* If the same register is used more than once, try to find a free
14107      register.  */
14108   CLEAR_HARD_REG_SET (allocated);
14109   for (i = 0; i < nops; i++)
14110     {
14111       for (j = i + 1; j < nops; j++)
14112         if (regs[i] == regs[j])
14113           {
14114             rtx t = peep2_find_free_register (0, nops * 2,
14115                                               TARGET_THUMB1 ? "l" : "r",
14116                                               SImode, &allocated);
14117             if (t == NULL_RTX)
14118               return false;
14119             reg_rtxs[i] = t;
14120             regs[i] = REGNO (t);
14121           }
14122     }
14123
14124   /* Compute an ordering that maps the register numbers to an ascending
14125      sequence.  */
14126   reg_order[0] = 0;
14127   for (i = 0; i < nops; i++)
14128     if (regs[i] < regs[reg_order[0]])
14129       reg_order[0] = i;
14130
14131   for (i = 1; i < nops; i++)
14132     {
14133       int this_order = reg_order[i - 1];
14134       for (j = 0; j < nops; j++)
14135         if (regs[j] > regs[reg_order[i - 1]]
14136             && (this_order == reg_order[i - 1]
14137                 || regs[j] < regs[this_order]))
14138           this_order = j;
14139       reg_order[i] = this_order;
14140     }
14141
14142   /* Ensure that registers that must be live after the instruction end
14143      up with the correct value.  */
14144   for (i = 0; i < nops; i++)
14145     {
14146       int this_order = reg_order[i];
14147       if ((this_order != mem_order[i]
14148            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14149           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14150         return false;
14151     }
14152
14153   /* Load the constants.  */
14154   for (i = 0; i < nops; i++)
14155     {
14156       rtx op = operands[2 * nops + mem_order[i]];
14157       sorted_regs[i] = regs[reg_order[i]];
14158       emit_move_insn (reg_rtxs[reg_order[i]], op);
14159     }
14160
14161   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14162
14163   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14164   if (TARGET_THUMB1)
14165     {
14166       gcc_assert (base_reg_dies);
14167       write_back = TRUE;
14168     }
14169
14170   if (stm_case == 5)
14171     {
14172       gcc_assert (base_reg_dies);
14173       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14174       offset = 0;
14175     }
14176
14177   addr = plus_constant (Pmode, base_reg_rtx, offset);
14178
14179   for (i = 0; i < nops; i++)
14180     {
14181       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14182       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14183                                               SImode, addr, 0);
14184     }
14185   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14186                                        write_back ? offset + i * 4 : 0));
14187   return true;
14188 }
14189
14190 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14191    unaligned copies on processors which support unaligned semantics for those
14192    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14193    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14194    An interleave factor of 1 (the minimum) will perform no interleaving.
14195    Load/store multiple are used for aligned addresses where possible.  */
14196
14197 static void
14198 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14199                                    HOST_WIDE_INT length,
14200                                    unsigned int interleave_factor)
14201 {
14202   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14203   int *regnos = XALLOCAVEC (int, interleave_factor);
14204   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14205   HOST_WIDE_INT i, j;
14206   HOST_WIDE_INT remaining = length, words;
14207   rtx halfword_tmp = NULL, byte_tmp = NULL;
14208   rtx dst, src;
14209   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14210   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14211   HOST_WIDE_INT srcoffset, dstoffset;
14212   HOST_WIDE_INT src_autoinc, dst_autoinc;
14213   rtx mem, addr;
14214
14215   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14216
14217   /* Use hard registers if we have aligned source or destination so we can use
14218      load/store multiple with contiguous registers.  */
14219   if (dst_aligned || src_aligned)
14220     for (i = 0; i < interleave_factor; i++)
14221       regs[i] = gen_rtx_REG (SImode, i);
14222   else
14223     for (i = 0; i < interleave_factor; i++)
14224       regs[i] = gen_reg_rtx (SImode);
14225
14226   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14227   src = copy_addr_to_reg (XEXP (srcbase, 0));
14228
14229   srcoffset = dstoffset = 0;
14230
14231   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14232      For copying the last bytes we want to subtract this offset again.  */
14233   src_autoinc = dst_autoinc = 0;
14234
14235   for (i = 0; i < interleave_factor; i++)
14236     regnos[i] = i;
14237
14238   /* Copy BLOCK_SIZE_BYTES chunks.  */
14239
14240   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14241     {
14242       /* Load words.  */
14243       if (src_aligned && interleave_factor > 1)
14244         {
14245           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14246                                             TRUE, srcbase, &srcoffset));
14247           src_autoinc += UNITS_PER_WORD * interleave_factor;
14248         }
14249       else
14250         {
14251           for (j = 0; j < interleave_factor; j++)
14252             {
14253               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14254                                                  - src_autoinc));
14255               mem = adjust_automodify_address (srcbase, SImode, addr,
14256                                                srcoffset + j * UNITS_PER_WORD);
14257               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14258             }
14259           srcoffset += block_size_bytes;
14260         }
14261
14262       /* Store words.  */
14263       if (dst_aligned && interleave_factor > 1)
14264         {
14265           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14266                                              TRUE, dstbase, &dstoffset));
14267           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14268         }
14269       else
14270         {
14271           for (j = 0; j < interleave_factor; j++)
14272             {
14273               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14274                                                  - dst_autoinc));
14275               mem = adjust_automodify_address (dstbase, SImode, addr,
14276                                                dstoffset + j * UNITS_PER_WORD);
14277               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14278             }
14279           dstoffset += block_size_bytes;
14280         }
14281
14282       remaining -= block_size_bytes;
14283     }
14284
14285   /* Copy any whole words left (note these aren't interleaved with any
14286      subsequent halfword/byte load/stores in the interests of simplicity).  */
14287
14288   words = remaining / UNITS_PER_WORD;
14289
14290   gcc_assert (words < interleave_factor);
14291
14292   if (src_aligned && words > 1)
14293     {
14294       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14295                                         &srcoffset));
14296       src_autoinc += UNITS_PER_WORD * words;
14297     }
14298   else
14299     {
14300       for (j = 0; j < words; j++)
14301         {
14302           addr = plus_constant (Pmode, src,
14303                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14304           mem = adjust_automodify_address (srcbase, SImode, addr,
14305                                            srcoffset + j * UNITS_PER_WORD);
14306           emit_insn (gen_unaligned_loadsi (regs[j], mem));
14307         }
14308       srcoffset += words * UNITS_PER_WORD;
14309     }
14310
14311   if (dst_aligned && words > 1)
14312     {
14313       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14314                                          &dstoffset));
14315       dst_autoinc += words * UNITS_PER_WORD;
14316     }
14317   else
14318     {
14319       for (j = 0; j < words; j++)
14320         {
14321           addr = plus_constant (Pmode, dst,
14322                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14323           mem = adjust_automodify_address (dstbase, SImode, addr,
14324                                            dstoffset + j * UNITS_PER_WORD);
14325           emit_insn (gen_unaligned_storesi (mem, regs[j]));
14326         }
14327       dstoffset += words * UNITS_PER_WORD;
14328     }
14329
14330   remaining -= words * UNITS_PER_WORD;
14331
14332   gcc_assert (remaining < 4);
14333
14334   /* Copy a halfword if necessary.  */
14335
14336   if (remaining >= 2)
14337     {
14338       halfword_tmp = gen_reg_rtx (SImode);
14339
14340       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14341       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14342       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14343
14344       /* Either write out immediately, or delay until we've loaded the last
14345          byte, depending on interleave factor.  */
14346       if (interleave_factor == 1)
14347         {
14348           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14349           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14350           emit_insn (gen_unaligned_storehi (mem,
14351                        gen_lowpart (HImode, halfword_tmp)));
14352           halfword_tmp = NULL;
14353           dstoffset += 2;
14354         }
14355
14356       remaining -= 2;
14357       srcoffset += 2;
14358     }
14359
14360   gcc_assert (remaining < 2);
14361
14362   /* Copy last byte.  */
14363
14364   if ((remaining & 1) != 0)
14365     {
14366       byte_tmp = gen_reg_rtx (SImode);
14367
14368       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14369       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14370       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14371
14372       if (interleave_factor == 1)
14373         {
14374           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14375           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14376           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14377           byte_tmp = NULL;
14378           dstoffset++;
14379         }
14380
14381       remaining--;
14382       srcoffset++;
14383     }
14384
14385   /* Store last halfword if we haven't done so already.  */
14386
14387   if (halfword_tmp)
14388     {
14389       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14390       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14391       emit_insn (gen_unaligned_storehi (mem,
14392                    gen_lowpart (HImode, halfword_tmp)));
14393       dstoffset += 2;
14394     }
14395
14396   /* Likewise for last byte.  */
14397
14398   if (byte_tmp)
14399     {
14400       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14401       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14402       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14403       dstoffset++;
14404     }
14405
14406   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14407 }
14408
14409 /* From mips_adjust_block_mem:
14410
14411    Helper function for doing a loop-based block operation on memory
14412    reference MEM.  Each iteration of the loop will operate on LENGTH
14413    bytes of MEM.
14414
14415    Create a new base register for use within the loop and point it to
14416    the start of MEM.  Create a new memory reference that uses this
14417    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14418
14419 static void
14420 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14421                       rtx *loop_mem)
14422 {
14423   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14424
14425   /* Although the new mem does not refer to a known location,
14426      it does keep up to LENGTH bytes of alignment.  */
14427   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14428   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14429 }
14430
14431 /* From mips_block_move_loop:
14432
14433    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14434    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14435    the memory regions do not overlap.  */
14436
14437 static void
14438 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14439                                unsigned int interleave_factor,
14440                                HOST_WIDE_INT bytes_per_iter)
14441 {
14442   rtx src_reg, dest_reg, final_src, test;
14443   HOST_WIDE_INT leftover;
14444
14445   leftover = length % bytes_per_iter;
14446   length -= leftover;
14447
14448   /* Create registers and memory references for use within the loop.  */
14449   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14450   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14451
14452   /* Calculate the value that SRC_REG should have after the last iteration of
14453      the loop.  */
14454   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14455                                    0, 0, OPTAB_WIDEN);
14456
14457   /* Emit the start of the loop.  */
14458   rtx_code_label *label = gen_label_rtx ();
14459   emit_label (label);
14460
14461   /* Emit the loop body.  */
14462   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14463                                      interleave_factor);
14464
14465   /* Move on to the next block.  */
14466   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14467   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14468
14469   /* Emit the loop condition.  */
14470   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14471   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14472
14473   /* Mop up any left-over bytes.  */
14474   if (leftover)
14475     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14476 }
14477
14478 /* Emit a block move when either the source or destination is unaligned (not
14479    aligned to a four-byte boundary).  This may need further tuning depending on
14480    core type, optimize_size setting, etc.  */
14481
14482 static int
14483 arm_movmemqi_unaligned (rtx *operands)
14484 {
14485   HOST_WIDE_INT length = INTVAL (operands[2]);
14486
14487   if (optimize_size)
14488     {
14489       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14490       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14491       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14492          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14493          or dst_aligned though: allow more interleaving in those cases since the
14494          resulting code can be smaller.  */
14495       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14496       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14497
14498       if (length > 12)
14499         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14500                                        interleave_factor, bytes_per_iter);
14501       else
14502         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14503                                            interleave_factor);
14504     }
14505   else
14506     {
14507       /* Note that the loop created by arm_block_move_unaligned_loop may be
14508          subject to loop unrolling, which makes tuning this condition a little
14509          redundant.  */
14510       if (length > 32)
14511         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14512       else
14513         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14514     }
14515
14516   return 1;
14517 }
14518
14519 int
14520 arm_gen_movmemqi (rtx *operands)
14521 {
14522   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14523   HOST_WIDE_INT srcoffset, dstoffset;
14524   int i;
14525   rtx src, dst, srcbase, dstbase;
14526   rtx part_bytes_reg = NULL;
14527   rtx mem;
14528
14529   if (!CONST_INT_P (operands[2])
14530       || !CONST_INT_P (operands[3])
14531       || INTVAL (operands[2]) > 64)
14532     return 0;
14533
14534   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14535     return arm_movmemqi_unaligned (operands);
14536
14537   if (INTVAL (operands[3]) & 3)
14538     return 0;
14539
14540   dstbase = operands[0];
14541   srcbase = operands[1];
14542
14543   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14544   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14545
14546   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14547   out_words_to_go = INTVAL (operands[2]) / 4;
14548   last_bytes = INTVAL (operands[2]) & 3;
14549   dstoffset = srcoffset = 0;
14550
14551   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14552     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14553
14554   for (i = 0; in_words_to_go >= 2; i+=4)
14555     {
14556       if (in_words_to_go > 4)
14557         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14558                                           TRUE, srcbase, &srcoffset));
14559       else
14560         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14561                                           src, FALSE, srcbase,
14562                                           &srcoffset));
14563
14564       if (out_words_to_go)
14565         {
14566           if (out_words_to_go > 4)
14567             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14568                                                TRUE, dstbase, &dstoffset));
14569           else if (out_words_to_go != 1)
14570             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14571                                                out_words_to_go, dst,
14572                                                (last_bytes == 0
14573                                                 ? FALSE : TRUE),
14574                                                dstbase, &dstoffset));
14575           else
14576             {
14577               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14578               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14579               if (last_bytes != 0)
14580                 {
14581                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14582                   dstoffset += 4;
14583                 }
14584             }
14585         }
14586
14587       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14588       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14589     }
14590
14591   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14592   if (out_words_to_go)
14593     {
14594       rtx sreg;
14595
14596       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14597       sreg = copy_to_reg (mem);
14598
14599       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14600       emit_move_insn (mem, sreg);
14601       in_words_to_go--;
14602
14603       gcc_assert (!in_words_to_go);     /* Sanity check */
14604     }
14605
14606   if (in_words_to_go)
14607     {
14608       gcc_assert (in_words_to_go > 0);
14609
14610       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14611       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14612     }
14613
14614   gcc_assert (!last_bytes || part_bytes_reg);
14615
14616   if (BYTES_BIG_ENDIAN && last_bytes)
14617     {
14618       rtx tmp = gen_reg_rtx (SImode);
14619
14620       /* The bytes we want are in the top end of the word.  */
14621       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14622                               GEN_INT (8 * (4 - last_bytes))));
14623       part_bytes_reg = tmp;
14624
14625       while (last_bytes)
14626         {
14627           mem = adjust_automodify_address (dstbase, QImode,
14628                                            plus_constant (Pmode, dst,
14629                                                           last_bytes - 1),
14630                                            dstoffset + last_bytes - 1);
14631           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14632
14633           if (--last_bytes)
14634             {
14635               tmp = gen_reg_rtx (SImode);
14636               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14637               part_bytes_reg = tmp;
14638             }
14639         }
14640
14641     }
14642   else
14643     {
14644       if (last_bytes > 1)
14645         {
14646           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14647           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14648           last_bytes -= 2;
14649           if (last_bytes)
14650             {
14651               rtx tmp = gen_reg_rtx (SImode);
14652               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14653               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14654               part_bytes_reg = tmp;
14655               dstoffset += 2;
14656             }
14657         }
14658
14659       if (last_bytes)
14660         {
14661           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14662           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14663         }
14664     }
14665
14666   return 1;
14667 }
14668
14669 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14670 by mode size.  */
14671 inline static rtx
14672 next_consecutive_mem (rtx mem)
14673 {
14674   machine_mode mode = GET_MODE (mem);
14675   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14676   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14677
14678   return adjust_automodify_address (mem, mode, addr, offset);
14679 }
14680
14681 /* Copy using LDRD/STRD instructions whenever possible.
14682    Returns true upon success. */
14683 bool
14684 gen_movmem_ldrd_strd (rtx *operands)
14685 {
14686   unsigned HOST_WIDE_INT len;
14687   HOST_WIDE_INT align;
14688   rtx src, dst, base;
14689   rtx reg0;
14690   bool src_aligned, dst_aligned;
14691   bool src_volatile, dst_volatile;
14692
14693   gcc_assert (CONST_INT_P (operands[2]));
14694   gcc_assert (CONST_INT_P (operands[3]));
14695
14696   len = UINTVAL (operands[2]);
14697   if (len > 64)
14698     return false;
14699
14700   /* Maximum alignment we can assume for both src and dst buffers.  */
14701   align = INTVAL (operands[3]);
14702
14703   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14704     return false;
14705
14706   /* Place src and dst addresses in registers
14707      and update the corresponding mem rtx.  */
14708   dst = operands[0];
14709   dst_volatile = MEM_VOLATILE_P (dst);
14710   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14711   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14712   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14713
14714   src = operands[1];
14715   src_volatile = MEM_VOLATILE_P (src);
14716   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14717   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14718   src = adjust_automodify_address (src, VOIDmode, base, 0);
14719
14720   if (!unaligned_access && !(src_aligned && dst_aligned))
14721     return false;
14722
14723   if (src_volatile || dst_volatile)
14724     return false;
14725
14726   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14727   if (!(dst_aligned || src_aligned))
14728     return arm_gen_movmemqi (operands);
14729
14730   src = adjust_address (src, DImode, 0);
14731   dst = adjust_address (dst, DImode, 0);
14732   while (len >= 8)
14733     {
14734       len -= 8;
14735       reg0 = gen_reg_rtx (DImode);
14736       if (src_aligned)
14737         emit_move_insn (reg0, src);
14738       else
14739         emit_insn (gen_unaligned_loaddi (reg0, src));
14740
14741       if (dst_aligned)
14742         emit_move_insn (dst, reg0);
14743       else
14744         emit_insn (gen_unaligned_storedi (dst, reg0));
14745
14746       src = next_consecutive_mem (src);
14747       dst = next_consecutive_mem (dst);
14748     }
14749
14750   gcc_assert (len < 8);
14751   if (len >= 4)
14752     {
14753       /* More than a word but less than a double-word to copy.  Copy a word.  */
14754       reg0 = gen_reg_rtx (SImode);
14755       src = adjust_address (src, SImode, 0);
14756       dst = adjust_address (dst, SImode, 0);
14757       if (src_aligned)
14758         emit_move_insn (reg0, src);
14759       else
14760         emit_insn (gen_unaligned_loadsi (reg0, src));
14761
14762       if (dst_aligned)
14763         emit_move_insn (dst, reg0);
14764       else
14765         emit_insn (gen_unaligned_storesi (dst, reg0));
14766
14767       src = next_consecutive_mem (src);
14768       dst = next_consecutive_mem (dst);
14769       len -= 4;
14770     }
14771
14772   if (len == 0)
14773     return true;
14774
14775   /* Copy the remaining bytes.  */
14776   if (len >= 2)
14777     {
14778       dst = adjust_address (dst, HImode, 0);
14779       src = adjust_address (src, HImode, 0);
14780       reg0 = gen_reg_rtx (SImode);
14781       if (src_aligned)
14782         emit_insn (gen_zero_extendhisi2 (reg0, src));
14783       else
14784         emit_insn (gen_unaligned_loadhiu (reg0, src));
14785
14786       if (dst_aligned)
14787         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14788       else
14789         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14790
14791       src = next_consecutive_mem (src);
14792       dst = next_consecutive_mem (dst);
14793       if (len == 2)
14794         return true;
14795     }
14796
14797   dst = adjust_address (dst, QImode, 0);
14798   src = adjust_address (src, QImode, 0);
14799   reg0 = gen_reg_rtx (QImode);
14800   emit_move_insn (reg0, src);
14801   emit_move_insn (dst, reg0);
14802   return true;
14803 }
14804
14805 /* Select a dominance comparison mode if possible for a test of the general
14806    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14807    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14808    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14809    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14810    In all cases OP will be either EQ or NE, but we don't need to know which
14811    here.  If we are unable to support a dominance comparison we return
14812    CC mode.  This will then fail to match for the RTL expressions that
14813    generate this call.  */
14814 machine_mode
14815 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14816 {
14817   enum rtx_code cond1, cond2;
14818   int swapped = 0;
14819
14820   /* Currently we will probably get the wrong result if the individual
14821      comparisons are not simple.  This also ensures that it is safe to
14822      reverse a comparison if necessary.  */
14823   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14824        != CCmode)
14825       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14826           != CCmode))
14827     return CCmode;
14828
14829   /* The if_then_else variant of this tests the second condition if the
14830      first passes, but is true if the first fails.  Reverse the first
14831      condition to get a true "inclusive-or" expression.  */
14832   if (cond_or == DOM_CC_NX_OR_Y)
14833     cond1 = reverse_condition (cond1);
14834
14835   /* If the comparisons are not equal, and one doesn't dominate the other,
14836      then we can't do this.  */
14837   if (cond1 != cond2
14838       && !comparison_dominates_p (cond1, cond2)
14839       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14840     return CCmode;
14841
14842   if (swapped)
14843     std::swap (cond1, cond2);
14844
14845   switch (cond1)
14846     {
14847     case EQ:
14848       if (cond_or == DOM_CC_X_AND_Y)
14849         return CC_DEQmode;
14850
14851       switch (cond2)
14852         {
14853         case EQ: return CC_DEQmode;
14854         case LE: return CC_DLEmode;
14855         case LEU: return CC_DLEUmode;
14856         case GE: return CC_DGEmode;
14857         case GEU: return CC_DGEUmode;
14858         default: gcc_unreachable ();
14859         }
14860
14861     case LT:
14862       if (cond_or == DOM_CC_X_AND_Y)
14863         return CC_DLTmode;
14864
14865       switch (cond2)
14866         {
14867         case  LT:
14868             return CC_DLTmode;
14869         case LE:
14870           return CC_DLEmode;
14871         case NE:
14872           return CC_DNEmode;
14873         default:
14874           gcc_unreachable ();
14875         }
14876
14877     case GT:
14878       if (cond_or == DOM_CC_X_AND_Y)
14879         return CC_DGTmode;
14880
14881       switch (cond2)
14882         {
14883         case GT:
14884           return CC_DGTmode;
14885         case GE:
14886           return CC_DGEmode;
14887         case NE:
14888           return CC_DNEmode;
14889         default:
14890           gcc_unreachable ();
14891         }
14892
14893     case LTU:
14894       if (cond_or == DOM_CC_X_AND_Y)
14895         return CC_DLTUmode;
14896
14897       switch (cond2)
14898         {
14899         case LTU:
14900           return CC_DLTUmode;
14901         case LEU:
14902           return CC_DLEUmode;
14903         case NE:
14904           return CC_DNEmode;
14905         default:
14906           gcc_unreachable ();
14907         }
14908
14909     case GTU:
14910       if (cond_or == DOM_CC_X_AND_Y)
14911         return CC_DGTUmode;
14912
14913       switch (cond2)
14914         {
14915         case GTU:
14916           return CC_DGTUmode;
14917         case GEU:
14918           return CC_DGEUmode;
14919         case NE:
14920           return CC_DNEmode;
14921         default:
14922           gcc_unreachable ();
14923         }
14924
14925     /* The remaining cases only occur when both comparisons are the
14926        same.  */
14927     case NE:
14928       gcc_assert (cond1 == cond2);
14929       return CC_DNEmode;
14930
14931     case LE:
14932       gcc_assert (cond1 == cond2);
14933       return CC_DLEmode;
14934
14935     case GE:
14936       gcc_assert (cond1 == cond2);
14937       return CC_DGEmode;
14938
14939     case LEU:
14940       gcc_assert (cond1 == cond2);
14941       return CC_DLEUmode;
14942
14943     case GEU:
14944       gcc_assert (cond1 == cond2);
14945       return CC_DGEUmode;
14946
14947     default:
14948       gcc_unreachable ();
14949     }
14950 }
14951
14952 machine_mode
14953 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14954 {
14955   /* All floating point compares return CCFP if it is an equality
14956      comparison, and CCFPE otherwise.  */
14957   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14958     {
14959       switch (op)
14960         {
14961         case EQ:
14962         case NE:
14963         case UNORDERED:
14964         case ORDERED:
14965         case UNLT:
14966         case UNLE:
14967         case UNGT:
14968         case UNGE:
14969         case UNEQ:
14970         case LTGT:
14971           return CCFPmode;
14972
14973         case LT:
14974         case LE:
14975         case GT:
14976         case GE:
14977           return CCFPEmode;
14978
14979         default:
14980           gcc_unreachable ();
14981         }
14982     }
14983
14984   /* A compare with a shifted operand.  Because of canonicalization, the
14985      comparison will have to be swapped when we emit the assembler.  */
14986   if (GET_MODE (y) == SImode
14987       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14988       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14989           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14990           || GET_CODE (x) == ROTATERT))
14991     return CC_SWPmode;
14992
14993   /* This operation is performed swapped, but since we only rely on the Z
14994      flag we don't need an additional mode.  */
14995   if (GET_MODE (y) == SImode
14996       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14997       && GET_CODE (x) == NEG
14998       && (op == EQ || op == NE))
14999     return CC_Zmode;
15000
15001   /* This is a special case that is used by combine to allow a
15002      comparison of a shifted byte load to be split into a zero-extend
15003      followed by a comparison of the shifted integer (only valid for
15004      equalities and unsigned inequalities).  */
15005   if (GET_MODE (x) == SImode
15006       && GET_CODE (x) == ASHIFT
15007       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15008       && GET_CODE (XEXP (x, 0)) == SUBREG
15009       && MEM_P (SUBREG_REG (XEXP (x, 0)))
15010       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15011       && (op == EQ || op == NE
15012           || op == GEU || op == GTU || op == LTU || op == LEU)
15013       && CONST_INT_P (y))
15014     return CC_Zmode;
15015
15016   /* A construct for a conditional compare, if the false arm contains
15017      0, then both conditions must be true, otherwise either condition
15018      must be true.  Not all conditions are possible, so CCmode is
15019      returned if it can't be done.  */
15020   if (GET_CODE (x) == IF_THEN_ELSE
15021       && (XEXP (x, 2) == const0_rtx
15022           || XEXP (x, 2) == const1_rtx)
15023       && COMPARISON_P (XEXP (x, 0))
15024       && COMPARISON_P (XEXP (x, 1)))
15025     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15026                                          INTVAL (XEXP (x, 2)));
15027
15028   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
15029   if (GET_CODE (x) == AND
15030       && (op == EQ || op == NE)
15031       && COMPARISON_P (XEXP (x, 0))
15032       && COMPARISON_P (XEXP (x, 1)))
15033     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15034                                          DOM_CC_X_AND_Y);
15035
15036   if (GET_CODE (x) == IOR
15037       && (op == EQ || op == NE)
15038       && COMPARISON_P (XEXP (x, 0))
15039       && COMPARISON_P (XEXP (x, 1)))
15040     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15041                                          DOM_CC_X_OR_Y);
15042
15043   /* An operation (on Thumb) where we want to test for a single bit.
15044      This is done by shifting that bit up into the top bit of a
15045      scratch register; we can then branch on the sign bit.  */
15046   if (TARGET_THUMB1
15047       && GET_MODE (x) == SImode
15048       && (op == EQ || op == NE)
15049       && GET_CODE (x) == ZERO_EXTRACT
15050       && XEXP (x, 1) == const1_rtx)
15051     return CC_Nmode;
15052
15053   /* An operation that sets the condition codes as a side-effect, the
15054      V flag is not set correctly, so we can only use comparisons where
15055      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15056      instead.)  */
15057   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15058   if (GET_MODE (x) == SImode
15059       && y == const0_rtx
15060       && (op == EQ || op == NE || op == LT || op == GE)
15061       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15062           || GET_CODE (x) == AND || GET_CODE (x) == IOR
15063           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15064           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15065           || GET_CODE (x) == LSHIFTRT
15066           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15067           || GET_CODE (x) == ROTATERT
15068           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15069     return CC_NOOVmode;
15070
15071   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15072     return CC_Zmode;
15073
15074   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15075       && GET_CODE (x) == PLUS
15076       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15077     return CC_Cmode;
15078
15079   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15080     {
15081       switch (op)
15082         {
15083         case EQ:
15084         case NE:
15085           /* A DImode comparison against zero can be implemented by
15086              or'ing the two halves together.  */
15087           if (y == const0_rtx)
15088             return CC_Zmode;
15089
15090           /* We can do an equality test in three Thumb instructions.  */
15091           if (!TARGET_32BIT)
15092             return CC_Zmode;
15093
15094           /* FALLTHROUGH */
15095
15096         case LTU:
15097         case LEU:
15098         case GTU:
15099         case GEU:
15100           /* DImode unsigned comparisons can be implemented by cmp +
15101              cmpeq without a scratch register.  Not worth doing in
15102              Thumb-2.  */
15103           if (TARGET_32BIT)
15104             return CC_CZmode;
15105
15106           /* FALLTHROUGH */
15107
15108         case LT:
15109         case LE:
15110         case GT:
15111         case GE:
15112           /* DImode signed and unsigned comparisons can be implemented
15113              by cmp + sbcs with a scratch register, but that does not
15114              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
15115           gcc_assert (op != EQ && op != NE);
15116           return CC_NCVmode;
15117
15118         default:
15119           gcc_unreachable ();
15120         }
15121     }
15122
15123   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15124     return GET_MODE (x);
15125
15126   return CCmode;
15127 }
15128
15129 /* X and Y are two things to compare using CODE.  Emit the compare insn and
15130    return the rtx for register 0 in the proper mode.  FP means this is a
15131    floating point compare: I don't think that it is needed on the arm.  */
15132 rtx
15133 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15134 {
15135   machine_mode mode;
15136   rtx cc_reg;
15137   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15138
15139   /* We might have X as a constant, Y as a register because of the predicates
15140      used for cmpdi.  If so, force X to a register here.  */
15141   if (dimode_comparison && !REG_P (x))
15142     x = force_reg (DImode, x);
15143
15144   mode = SELECT_CC_MODE (code, x, y);
15145   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15146
15147   if (dimode_comparison
15148       && mode != CC_CZmode)
15149     {
15150       rtx clobber, set;
15151
15152       /* To compare two non-zero values for equality, XOR them and
15153          then compare against zero.  Not used for ARM mode; there
15154          CC_CZmode is cheaper.  */
15155       if (mode == CC_Zmode && y != const0_rtx)
15156         {
15157           gcc_assert (!reload_completed);
15158           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15159           y = const0_rtx;
15160         }
15161
15162       /* A scratch register is required.  */
15163       if (reload_completed)
15164         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15165       else
15166         scratch = gen_rtx_SCRATCH (SImode);
15167
15168       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15169       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15170       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15171     }
15172   else
15173     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15174
15175   return cc_reg;
15176 }
15177
15178 /* Generate a sequence of insns that will generate the correct return
15179    address mask depending on the physical architecture that the program
15180    is running on.  */
15181 rtx
15182 arm_gen_return_addr_mask (void)
15183 {
15184   rtx reg = gen_reg_rtx (Pmode);
15185
15186   emit_insn (gen_return_addr_mask (reg));
15187   return reg;
15188 }
15189
15190 void
15191 arm_reload_in_hi (rtx *operands)
15192 {
15193   rtx ref = operands[1];
15194   rtx base, scratch;
15195   HOST_WIDE_INT offset = 0;
15196
15197   if (GET_CODE (ref) == SUBREG)
15198     {
15199       offset = SUBREG_BYTE (ref);
15200       ref = SUBREG_REG (ref);
15201     }
15202
15203   if (REG_P (ref))
15204     {
15205       /* We have a pseudo which has been spilt onto the stack; there
15206          are two cases here: the first where there is a simple
15207          stack-slot replacement and a second where the stack-slot is
15208          out of range, or is used as a subreg.  */
15209       if (reg_equiv_mem (REGNO (ref)))
15210         {
15211           ref = reg_equiv_mem (REGNO (ref));
15212           base = find_replacement (&XEXP (ref, 0));
15213         }
15214       else
15215         /* The slot is out of range, or was dressed up in a SUBREG.  */
15216         base = reg_equiv_address (REGNO (ref));
15217     }
15218   else
15219     base = find_replacement (&XEXP (ref, 0));
15220
15221   /* Handle the case where the address is too complex to be offset by 1.  */
15222   if (GET_CODE (base) == MINUS
15223       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15224     {
15225       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15226
15227       emit_set_insn (base_plus, base);
15228       base = base_plus;
15229     }
15230   else if (GET_CODE (base) == PLUS)
15231     {
15232       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15233       HOST_WIDE_INT hi, lo;
15234
15235       offset += INTVAL (XEXP (base, 1));
15236       base = XEXP (base, 0);
15237
15238       /* Rework the address into a legal sequence of insns.  */
15239       /* Valid range for lo is -4095 -> 4095 */
15240       lo = (offset >= 0
15241             ? (offset & 0xfff)
15242             : -((-offset) & 0xfff));
15243
15244       /* Corner case, if lo is the max offset then we would be out of range
15245          once we have added the additional 1 below, so bump the msb into the
15246          pre-loading insn(s).  */
15247       if (lo == 4095)
15248         lo &= 0x7ff;
15249
15250       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15251              ^ (HOST_WIDE_INT) 0x80000000)
15252             - (HOST_WIDE_INT) 0x80000000);
15253
15254       gcc_assert (hi + lo == offset);
15255
15256       if (hi != 0)
15257         {
15258           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15259
15260           /* Get the base address; addsi3 knows how to handle constants
15261              that require more than one insn.  */
15262           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15263           base = base_plus;
15264           offset = lo;
15265         }
15266     }
15267
15268   /* Operands[2] may overlap operands[0] (though it won't overlap
15269      operands[1]), that's why we asked for a DImode reg -- so we can
15270      use the bit that does not overlap.  */
15271   if (REGNO (operands[2]) == REGNO (operands[0]))
15272     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15273   else
15274     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15275
15276   emit_insn (gen_zero_extendqisi2 (scratch,
15277                                    gen_rtx_MEM (QImode,
15278                                                 plus_constant (Pmode, base,
15279                                                                offset))));
15280   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15281                                    gen_rtx_MEM (QImode,
15282                                                 plus_constant (Pmode, base,
15283                                                                offset + 1))));
15284   if (!BYTES_BIG_ENDIAN)
15285     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15286                    gen_rtx_IOR (SImode,
15287                                 gen_rtx_ASHIFT
15288                                 (SImode,
15289                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15290                                  GEN_INT (8)),
15291                                 scratch));
15292   else
15293     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15294                    gen_rtx_IOR (SImode,
15295                                 gen_rtx_ASHIFT (SImode, scratch,
15296                                                 GEN_INT (8)),
15297                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15298 }
15299
15300 /* Handle storing a half-word to memory during reload by synthesizing as two
15301    byte stores.  Take care not to clobber the input values until after we
15302    have moved them somewhere safe.  This code assumes that if the DImode
15303    scratch in operands[2] overlaps either the input value or output address
15304    in some way, then that value must die in this insn (we absolutely need
15305    two scratch registers for some corner cases).  */
15306 void
15307 arm_reload_out_hi (rtx *operands)
15308 {
15309   rtx ref = operands[0];
15310   rtx outval = operands[1];
15311   rtx base, scratch;
15312   HOST_WIDE_INT offset = 0;
15313
15314   if (GET_CODE (ref) == SUBREG)
15315     {
15316       offset = SUBREG_BYTE (ref);
15317       ref = SUBREG_REG (ref);
15318     }
15319
15320   if (REG_P (ref))
15321     {
15322       /* We have a pseudo which has been spilt onto the stack; there
15323          are two cases here: the first where there is a simple
15324          stack-slot replacement and a second where the stack-slot is
15325          out of range, or is used as a subreg.  */
15326       if (reg_equiv_mem (REGNO (ref)))
15327         {
15328           ref = reg_equiv_mem (REGNO (ref));
15329           base = find_replacement (&XEXP (ref, 0));
15330         }
15331       else
15332         /* The slot is out of range, or was dressed up in a SUBREG.  */
15333         base = reg_equiv_address (REGNO (ref));
15334     }
15335   else
15336     base = find_replacement (&XEXP (ref, 0));
15337
15338   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15339
15340   /* Handle the case where the address is too complex to be offset by 1.  */
15341   if (GET_CODE (base) == MINUS
15342       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15343     {
15344       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15345
15346       /* Be careful not to destroy OUTVAL.  */
15347       if (reg_overlap_mentioned_p (base_plus, outval))
15348         {
15349           /* Updating base_plus might destroy outval, see if we can
15350              swap the scratch and base_plus.  */
15351           if (!reg_overlap_mentioned_p (scratch, outval))
15352             std::swap (scratch, base_plus);
15353           else
15354             {
15355               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15356
15357               /* Be conservative and copy OUTVAL into the scratch now,
15358                  this should only be necessary if outval is a subreg
15359                  of something larger than a word.  */
15360               /* XXX Might this clobber base?  I can't see how it can,
15361                  since scratch is known to overlap with OUTVAL, and
15362                  must be wider than a word.  */
15363               emit_insn (gen_movhi (scratch_hi, outval));
15364               outval = scratch_hi;
15365             }
15366         }
15367
15368       emit_set_insn (base_plus, base);
15369       base = base_plus;
15370     }
15371   else if (GET_CODE (base) == PLUS)
15372     {
15373       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15374       HOST_WIDE_INT hi, lo;
15375
15376       offset += INTVAL (XEXP (base, 1));
15377       base = XEXP (base, 0);
15378
15379       /* Rework the address into a legal sequence of insns.  */
15380       /* Valid range for lo is -4095 -> 4095 */
15381       lo = (offset >= 0
15382             ? (offset & 0xfff)
15383             : -((-offset) & 0xfff));
15384
15385       /* Corner case, if lo is the max offset then we would be out of range
15386          once we have added the additional 1 below, so bump the msb into the
15387          pre-loading insn(s).  */
15388       if (lo == 4095)
15389         lo &= 0x7ff;
15390
15391       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15392              ^ (HOST_WIDE_INT) 0x80000000)
15393             - (HOST_WIDE_INT) 0x80000000);
15394
15395       gcc_assert (hi + lo == offset);
15396
15397       if (hi != 0)
15398         {
15399           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15400
15401           /* Be careful not to destroy OUTVAL.  */
15402           if (reg_overlap_mentioned_p (base_plus, outval))
15403             {
15404               /* Updating base_plus might destroy outval, see if we
15405                  can swap the scratch and base_plus.  */
15406               if (!reg_overlap_mentioned_p (scratch, outval))
15407                 std::swap (scratch, base_plus);
15408               else
15409                 {
15410                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15411
15412                   /* Be conservative and copy outval into scratch now,
15413                      this should only be necessary if outval is a
15414                      subreg of something larger than a word.  */
15415                   /* XXX Might this clobber base?  I can't see how it
15416                      can, since scratch is known to overlap with
15417                      outval.  */
15418                   emit_insn (gen_movhi (scratch_hi, outval));
15419                   outval = scratch_hi;
15420                 }
15421             }
15422
15423           /* Get the base address; addsi3 knows how to handle constants
15424              that require more than one insn.  */
15425           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15426           base = base_plus;
15427           offset = lo;
15428         }
15429     }
15430
15431   if (BYTES_BIG_ENDIAN)
15432     {
15433       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15434                                          plus_constant (Pmode, base,
15435                                                         offset + 1)),
15436                             gen_lowpart (QImode, outval)));
15437       emit_insn (gen_lshrsi3 (scratch,
15438                               gen_rtx_SUBREG (SImode, outval, 0),
15439                               GEN_INT (8)));
15440       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15441                                                                 offset)),
15442                             gen_lowpart (QImode, scratch)));
15443     }
15444   else
15445     {
15446       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15447                                                                 offset)),
15448                             gen_lowpart (QImode, outval)));
15449       emit_insn (gen_lshrsi3 (scratch,
15450                               gen_rtx_SUBREG (SImode, outval, 0),
15451                               GEN_INT (8)));
15452       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15453                                          plus_constant (Pmode, base,
15454                                                         offset + 1)),
15455                             gen_lowpart (QImode, scratch)));
15456     }
15457 }
15458
15459 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15460    (padded to the size of a word) should be passed in a register.  */
15461
15462 static bool
15463 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15464 {
15465   if (TARGET_AAPCS_BASED)
15466     return must_pass_in_stack_var_size (mode, type);
15467   else
15468     return must_pass_in_stack_var_size_or_pad (mode, type);
15469 }
15470
15471
15472 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15473    Return true if an argument passed on the stack should be padded upwards,
15474    i.e. if the least-significant byte has useful data.
15475    For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
15476    aggregate types are placed in the lowest memory address.  */
15477
15478 bool
15479 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15480 {
15481   if (!TARGET_AAPCS_BASED)
15482     return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15483
15484   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15485     return false;
15486
15487   return true;
15488 }
15489
15490
15491 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15492    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15493    register has useful data, and return the opposite if the most
15494    significant byte does.  */
15495
15496 bool
15497 arm_pad_reg_upward (machine_mode mode,
15498                     tree type, int first ATTRIBUTE_UNUSED)
15499 {
15500   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15501     {
15502       /* For AAPCS, small aggregates, small fixed-point types,
15503          and small complex types are always padded upwards.  */
15504       if (type)
15505         {
15506           if ((AGGREGATE_TYPE_P (type)
15507                || TREE_CODE (type) == COMPLEX_TYPE
15508                || FIXED_POINT_TYPE_P (type))
15509               && int_size_in_bytes (type) <= 4)
15510             return true;
15511         }
15512       else
15513         {
15514           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15515               && GET_MODE_SIZE (mode) <= 4)
15516             return true;
15517         }
15518     }
15519
15520   /* Otherwise, use default padding.  */
15521   return !BYTES_BIG_ENDIAN;
15522 }
15523
15524 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15525    assuming that the address in the base register is word aligned.  */
15526 bool
15527 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15528 {
15529   HOST_WIDE_INT max_offset;
15530
15531   /* Offset must be a multiple of 4 in Thumb mode.  */
15532   if (TARGET_THUMB2 && ((offset & 3) != 0))
15533     return false;
15534
15535   if (TARGET_THUMB2)
15536     max_offset = 1020;
15537   else if (TARGET_ARM)
15538     max_offset = 255;
15539   else
15540     return false;
15541
15542   return ((offset <= max_offset) && (offset >= -max_offset));
15543 }
15544
15545 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15546    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15547    Assumes that the address in the base register RN is word aligned.  Pattern
15548    guarantees that both memory accesses use the same base register,
15549    the offsets are constants within the range, and the gap between the offsets is 4.
15550    If preload complete then check that registers are legal.  WBACK indicates whether
15551    address is updated.  LOAD indicates whether memory access is load or store.  */
15552 bool
15553 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15554                        bool wback, bool load)
15555 {
15556   unsigned int t, t2, n;
15557
15558   if (!reload_completed)
15559     return true;
15560
15561   if (!offset_ok_for_ldrd_strd (offset))
15562     return false;
15563
15564   t = REGNO (rt);
15565   t2 = REGNO (rt2);
15566   n = REGNO (rn);
15567
15568   if ((TARGET_THUMB2)
15569       && ((wback && (n == t || n == t2))
15570           || (t == SP_REGNUM)
15571           || (t == PC_REGNUM)
15572           || (t2 == SP_REGNUM)
15573           || (t2 == PC_REGNUM)
15574           || (!load && (n == PC_REGNUM))
15575           || (load && (t == t2))
15576           /* Triggers Cortex-M3 LDRD errata.  */
15577           || (!wback && load && fix_cm3_ldrd && (n == t))))
15578     return false;
15579
15580   if ((TARGET_ARM)
15581       && ((wback && (n == t || n == t2))
15582           || (t2 == PC_REGNUM)
15583           || (t % 2 != 0)   /* First destination register is not even.  */
15584           || (t2 != t + 1)
15585           /* PC can be used as base register (for offset addressing only),
15586              but it is depricated.  */
15587           || (n == PC_REGNUM)))
15588     return false;
15589
15590   return true;
15591 }
15592
15593 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15594    operand MEM's address contains an immediate offset from the base
15595    register and has no side effects, in which case it sets BASE and
15596    OFFSET accordingly.  */
15597 static bool
15598 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15599 {
15600   rtx addr;
15601
15602   gcc_assert (base != NULL && offset != NULL);
15603
15604   /* TODO: Handle more general memory operand patterns, such as
15605      PRE_DEC and PRE_INC.  */
15606
15607   if (side_effects_p (mem))
15608     return false;
15609
15610   /* Can't deal with subregs.  */
15611   if (GET_CODE (mem) == SUBREG)
15612     return false;
15613
15614   gcc_assert (MEM_P (mem));
15615
15616   *offset = const0_rtx;
15617
15618   addr = XEXP (mem, 0);
15619
15620   /* If addr isn't valid for DImode, then we can't handle it.  */
15621   if (!arm_legitimate_address_p (DImode, addr,
15622                                  reload_in_progress || reload_completed))
15623     return false;
15624
15625   if (REG_P (addr))
15626     {
15627       *base = addr;
15628       return true;
15629     }
15630   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15631     {
15632       *base = XEXP (addr, 0);
15633       *offset = XEXP (addr, 1);
15634       return (REG_P (*base) && CONST_INT_P (*offset));
15635     }
15636
15637   return false;
15638 }
15639
15640 /* Called from a peephole2 to replace two word-size accesses with a
15641    single LDRD/STRD instruction.  Returns true iff we can generate a
15642    new instruction sequence.  That is, both accesses use the same base
15643    register and the gap between constant offsets is 4.  This function
15644    may reorder its operands to match ldrd/strd RTL templates.
15645    OPERANDS are the operands found by the peephole matcher;
15646    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15647    corresponding memory operands.  LOAD indicaates whether the access
15648    is load or store.  CONST_STORE indicates a store of constant
15649    integer values held in OPERANDS[4,5] and assumes that the pattern
15650    is of length 4 insn, for the purpose of checking dead registers.
15651    COMMUTE indicates that register operands may be reordered.  */
15652 bool
15653 gen_operands_ldrd_strd (rtx *operands, bool load,
15654                         bool const_store, bool commute)
15655 {
15656   int nops = 2;
15657   HOST_WIDE_INT offsets[2], offset;
15658   rtx base = NULL_RTX;
15659   rtx cur_base, cur_offset, tmp;
15660   int i, gap;
15661   HARD_REG_SET regset;
15662
15663   gcc_assert (!const_store || !load);
15664   /* Check that the memory references are immediate offsets from the
15665      same base register.  Extract the base register, the destination
15666      registers, and the corresponding memory offsets.  */
15667   for (i = 0; i < nops; i++)
15668     {
15669       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15670         return false;
15671
15672       if (i == 0)
15673         base = cur_base;
15674       else if (REGNO (base) != REGNO (cur_base))
15675         return false;
15676
15677       offsets[i] = INTVAL (cur_offset);
15678       if (GET_CODE (operands[i]) == SUBREG)
15679         {
15680           tmp = SUBREG_REG (operands[i]);
15681           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15682           operands[i] = tmp;
15683         }
15684     }
15685
15686   /* Make sure there is no dependency between the individual loads.  */
15687   if (load && REGNO (operands[0]) == REGNO (base))
15688     return false; /* RAW */
15689
15690   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15691     return false; /* WAW */
15692
15693   /* If the same input register is used in both stores
15694      when storing different constants, try to find a free register.
15695      For example, the code
15696         mov r0, 0
15697         str r0, [r2]
15698         mov r0, 1
15699         str r0, [r2, #4]
15700      can be transformed into
15701         mov r1, 0
15702         strd r1, r0, [r2]
15703      in Thumb mode assuming that r1 is free.  */
15704   if (const_store
15705       && REGNO (operands[0]) == REGNO (operands[1])
15706       && INTVAL (operands[4]) != INTVAL (operands[5]))
15707     {
15708     if (TARGET_THUMB2)
15709       {
15710         CLEAR_HARD_REG_SET (regset);
15711         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15712         if (tmp == NULL_RTX)
15713           return false;
15714
15715         /* Use the new register in the first load to ensure that
15716            if the original input register is not dead after peephole,
15717            then it will have the correct constant value.  */
15718         operands[0] = tmp;
15719       }
15720     else if (TARGET_ARM)
15721       {
15722         return false;
15723         int regno = REGNO (operands[0]);
15724         if (!peep2_reg_dead_p (4, operands[0]))
15725           {
15726             /* When the input register is even and is not dead after the
15727                pattern, it has to hold the second constant but we cannot
15728                form a legal STRD in ARM mode with this register as the second
15729                register.  */
15730             if (regno % 2 == 0)
15731               return false;
15732
15733             /* Is regno-1 free? */
15734             SET_HARD_REG_SET (regset);
15735             CLEAR_HARD_REG_BIT(regset, regno - 1);
15736             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15737             if (tmp == NULL_RTX)
15738               return false;
15739
15740             operands[0] = tmp;
15741           }
15742         else
15743           {
15744             /* Find a DImode register.  */
15745             CLEAR_HARD_REG_SET (regset);
15746             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15747             if (tmp != NULL_RTX)
15748               {
15749                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15750                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15751               }
15752             else
15753               {
15754                 /* Can we use the input register to form a DI register?  */
15755                 SET_HARD_REG_SET (regset);
15756                 CLEAR_HARD_REG_BIT(regset,
15757                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15758                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15759                 if (tmp == NULL_RTX)
15760                   return false;
15761                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15762               }
15763           }
15764
15765         gcc_assert (operands[0] != NULL_RTX);
15766         gcc_assert (operands[1] != NULL_RTX);
15767         gcc_assert (REGNO (operands[0]) % 2 == 0);
15768         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15769       }
15770     }
15771
15772   /* Make sure the instructions are ordered with lower memory access first.  */
15773   if (offsets[0] > offsets[1])
15774     {
15775       gap = offsets[0] - offsets[1];
15776       offset = offsets[1];
15777
15778       /* Swap the instructions such that lower memory is accessed first.  */
15779       std::swap (operands[0], operands[1]);
15780       std::swap (operands[2], operands[3]);
15781       if (const_store)
15782         std::swap (operands[4], operands[5]);
15783     }
15784   else
15785     {
15786       gap = offsets[1] - offsets[0];
15787       offset = offsets[0];
15788     }
15789
15790   /* Make sure accesses are to consecutive memory locations.  */
15791   if (gap != 4)
15792     return false;
15793
15794   /* Make sure we generate legal instructions.  */
15795   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15796                              false, load))
15797     return true;
15798
15799   /* In Thumb state, where registers are almost unconstrained, there
15800      is little hope to fix it.  */
15801   if (TARGET_THUMB2)
15802     return false;
15803
15804   if (load && commute)
15805     {
15806       /* Try reordering registers.  */
15807       std::swap (operands[0], operands[1]);
15808       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15809                                  false, load))
15810         return true;
15811     }
15812
15813   if (const_store)
15814     {
15815       /* If input registers are dead after this pattern, they can be
15816          reordered or replaced by other registers that are free in the
15817          current pattern.  */
15818       if (!peep2_reg_dead_p (4, operands[0])
15819           || !peep2_reg_dead_p (4, operands[1]))
15820         return false;
15821
15822       /* Try to reorder the input registers.  */
15823       /* For example, the code
15824            mov r0, 0
15825            mov r1, 1
15826            str r1, [r2]
15827            str r0, [r2, #4]
15828          can be transformed into
15829            mov r1, 0
15830            mov r0, 1
15831            strd r0, [r2]
15832       */
15833       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15834                                   false, false))
15835         {
15836           std::swap (operands[0], operands[1]);
15837           return true;
15838         }
15839
15840       /* Try to find a free DI register.  */
15841       CLEAR_HARD_REG_SET (regset);
15842       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15843       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15844       while (true)
15845         {
15846           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15847           if (tmp == NULL_RTX)
15848             return false;
15849
15850           /* DREG must be an even-numbered register in DImode.
15851              Split it into SI registers.  */
15852           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15853           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15854           gcc_assert (operands[0] != NULL_RTX);
15855           gcc_assert (operands[1] != NULL_RTX);
15856           gcc_assert (REGNO (operands[0]) % 2 == 0);
15857           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15858
15859           return (operands_ok_ldrd_strd (operands[0], operands[1],
15860                                          base, offset,
15861                                          false, load));
15862         }
15863     }
15864
15865   return false;
15866 }
15867
15868
15869
15870 \f
15871 /* Print a symbolic form of X to the debug file, F.  */
15872 static void
15873 arm_print_value (FILE *f, rtx x)
15874 {
15875   switch (GET_CODE (x))
15876     {
15877     case CONST_INT:
15878       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15879       return;
15880
15881     case CONST_DOUBLE:
15882       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15883       return;
15884
15885     case CONST_VECTOR:
15886       {
15887         int i;
15888
15889         fprintf (f, "<");
15890         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15891           {
15892             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15893             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15894               fputc (',', f);
15895           }
15896         fprintf (f, ">");
15897       }
15898       return;
15899
15900     case CONST_STRING:
15901       fprintf (f, "\"%s\"", XSTR (x, 0));
15902       return;
15903
15904     case SYMBOL_REF:
15905       fprintf (f, "`%s'", XSTR (x, 0));
15906       return;
15907
15908     case LABEL_REF:
15909       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15910       return;
15911
15912     case CONST:
15913       arm_print_value (f, XEXP (x, 0));
15914       return;
15915
15916     case PLUS:
15917       arm_print_value (f, XEXP (x, 0));
15918       fprintf (f, "+");
15919       arm_print_value (f, XEXP (x, 1));
15920       return;
15921
15922     case PC:
15923       fprintf (f, "pc");
15924       return;
15925
15926     default:
15927       fprintf (f, "????");
15928       return;
15929     }
15930 }
15931 \f
15932 /* Routines for manipulation of the constant pool.  */
15933
15934 /* Arm instructions cannot load a large constant directly into a
15935    register; they have to come from a pc relative load.  The constant
15936    must therefore be placed in the addressable range of the pc
15937    relative load.  Depending on the precise pc relative load
15938    instruction the range is somewhere between 256 bytes and 4k.  This
15939    means that we often have to dump a constant inside a function, and
15940    generate code to branch around it.
15941
15942    It is important to minimize this, since the branches will slow
15943    things down and make the code larger.
15944
15945    Normally we can hide the table after an existing unconditional
15946    branch so that there is no interruption of the flow, but in the
15947    worst case the code looks like this:
15948
15949         ldr     rn, L1
15950         ...
15951         b       L2
15952         align
15953         L1:     .long value
15954         L2:
15955         ...
15956
15957         ldr     rn, L3
15958         ...
15959         b       L4
15960         align
15961         L3:     .long value
15962         L4:
15963         ...
15964
15965    We fix this by performing a scan after scheduling, which notices
15966    which instructions need to have their operands fetched from the
15967    constant table and builds the table.
15968
15969    The algorithm starts by building a table of all the constants that
15970    need fixing up and all the natural barriers in the function (places
15971    where a constant table can be dropped without breaking the flow).
15972    For each fixup we note how far the pc-relative replacement will be
15973    able to reach and the offset of the instruction into the function.
15974
15975    Having built the table we then group the fixes together to form
15976    tables that are as large as possible (subject to addressing
15977    constraints) and emit each table of constants after the last
15978    barrier that is within range of all the instructions in the group.
15979    If a group does not contain a barrier, then we forcibly create one
15980    by inserting a jump instruction into the flow.  Once the table has
15981    been inserted, the insns are then modified to reference the
15982    relevant entry in the pool.
15983
15984    Possible enhancements to the algorithm (not implemented) are:
15985
15986    1) For some processors and object formats, there may be benefit in
15987    aligning the pools to the start of cache lines; this alignment
15988    would need to be taken into account when calculating addressability
15989    of a pool.  */
15990
15991 /* These typedefs are located at the start of this file, so that
15992    they can be used in the prototypes there.  This comment is to
15993    remind readers of that fact so that the following structures
15994    can be understood more easily.
15995
15996      typedef struct minipool_node    Mnode;
15997      typedef struct minipool_fixup   Mfix;  */
15998
15999 struct minipool_node
16000 {
16001   /* Doubly linked chain of entries.  */
16002   Mnode * next;
16003   Mnode * prev;
16004   /* The maximum offset into the code that this entry can be placed.  While
16005      pushing fixes for forward references, all entries are sorted in order
16006      of increasing max_address.  */
16007   HOST_WIDE_INT max_address;
16008   /* Similarly for an entry inserted for a backwards ref.  */
16009   HOST_WIDE_INT min_address;
16010   /* The number of fixes referencing this entry.  This can become zero
16011      if we "unpush" an entry.  In this case we ignore the entry when we
16012      come to emit the code.  */
16013   int refcount;
16014   /* The offset from the start of the minipool.  */
16015   HOST_WIDE_INT offset;
16016   /* The value in table.  */
16017   rtx value;
16018   /* The mode of value.  */
16019   machine_mode mode;
16020   /* The size of the value.  With iWMMXt enabled
16021      sizes > 4 also imply an alignment of 8-bytes.  */
16022   int fix_size;
16023 };
16024
16025 struct minipool_fixup
16026 {
16027   Mfix *            next;
16028   rtx_insn *        insn;
16029   HOST_WIDE_INT     address;
16030   rtx *             loc;
16031   machine_mode mode;
16032   int               fix_size;
16033   rtx               value;
16034   Mnode *           minipool;
16035   HOST_WIDE_INT     forwards;
16036   HOST_WIDE_INT     backwards;
16037 };
16038
16039 /* Fixes less than a word need padding out to a word boundary.  */
16040 #define MINIPOOL_FIX_SIZE(mode) \
16041   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16042
16043 static Mnode *  minipool_vector_head;
16044 static Mnode *  minipool_vector_tail;
16045 static rtx_code_label   *minipool_vector_label;
16046 static int      minipool_pad;
16047
16048 /* The linked list of all minipool fixes required for this function.  */
16049 Mfix *          minipool_fix_head;
16050 Mfix *          minipool_fix_tail;
16051 /* The fix entry for the current minipool, once it has been placed.  */
16052 Mfix *          minipool_barrier;
16053
16054 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16055 #define JUMP_TABLES_IN_TEXT_SECTION 0
16056 #endif
16057
16058 static HOST_WIDE_INT
16059 get_jump_table_size (rtx_jump_table_data *insn)
16060 {
16061   /* ADDR_VECs only take room if read-only data does into the text
16062      section.  */
16063   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16064     {
16065       rtx body = PATTERN (insn);
16066       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16067       HOST_WIDE_INT size;
16068       HOST_WIDE_INT modesize;
16069
16070       modesize = GET_MODE_SIZE (GET_MODE (body));
16071       size = modesize * XVECLEN (body, elt);
16072       switch (modesize)
16073         {
16074         case 1:
16075           /* Round up size  of TBB table to a halfword boundary.  */
16076           size = (size + 1) & ~(HOST_WIDE_INT)1;
16077           break;
16078         case 2:
16079           /* No padding necessary for TBH.  */
16080           break;
16081         case 4:
16082           /* Add two bytes for alignment on Thumb.  */
16083           if (TARGET_THUMB)
16084             size += 2;
16085           break;
16086         default:
16087           gcc_unreachable ();
16088         }
16089       return size;
16090     }
16091
16092   return 0;
16093 }
16094
16095 /* Return the maximum amount of padding that will be inserted before
16096    label LABEL.  */
16097
16098 static HOST_WIDE_INT
16099 get_label_padding (rtx label)
16100 {
16101   HOST_WIDE_INT align, min_insn_size;
16102
16103   align = 1 << label_to_alignment (label);
16104   min_insn_size = TARGET_THUMB ? 2 : 4;
16105   return align > min_insn_size ? align - min_insn_size : 0;
16106 }
16107
16108 /* Move a minipool fix MP from its current location to before MAX_MP.
16109    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16110    constraints may need updating.  */
16111 static Mnode *
16112 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16113                                HOST_WIDE_INT max_address)
16114 {
16115   /* The code below assumes these are different.  */
16116   gcc_assert (mp != max_mp);
16117
16118   if (max_mp == NULL)
16119     {
16120       if (max_address < mp->max_address)
16121         mp->max_address = max_address;
16122     }
16123   else
16124     {
16125       if (max_address > max_mp->max_address - mp->fix_size)
16126         mp->max_address = max_mp->max_address - mp->fix_size;
16127       else
16128         mp->max_address = max_address;
16129
16130       /* Unlink MP from its current position.  Since max_mp is non-null,
16131        mp->prev must be non-null.  */
16132       mp->prev->next = mp->next;
16133       if (mp->next != NULL)
16134         mp->next->prev = mp->prev;
16135       else
16136         minipool_vector_tail = mp->prev;
16137
16138       /* Re-insert it before MAX_MP.  */
16139       mp->next = max_mp;
16140       mp->prev = max_mp->prev;
16141       max_mp->prev = mp;
16142
16143       if (mp->prev != NULL)
16144         mp->prev->next = mp;
16145       else
16146         minipool_vector_head = mp;
16147     }
16148
16149   /* Save the new entry.  */
16150   max_mp = mp;
16151
16152   /* Scan over the preceding entries and adjust their addresses as
16153      required.  */
16154   while (mp->prev != NULL
16155          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16156     {
16157       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16158       mp = mp->prev;
16159     }
16160
16161   return max_mp;
16162 }
16163
16164 /* Add a constant to the minipool for a forward reference.  Returns the
16165    node added or NULL if the constant will not fit in this pool.  */
16166 static Mnode *
16167 add_minipool_forward_ref (Mfix *fix)
16168 {
16169   /* If set, max_mp is the first pool_entry that has a lower
16170      constraint than the one we are trying to add.  */
16171   Mnode *       max_mp = NULL;
16172   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16173   Mnode *       mp;
16174
16175   /* If the minipool starts before the end of FIX->INSN then this FIX
16176      can not be placed into the current pool.  Furthermore, adding the
16177      new constant pool entry may cause the pool to start FIX_SIZE bytes
16178      earlier.  */
16179   if (minipool_vector_head &&
16180       (fix->address + get_attr_length (fix->insn)
16181        >= minipool_vector_head->max_address - fix->fix_size))
16182     return NULL;
16183
16184   /* Scan the pool to see if a constant with the same value has
16185      already been added.  While we are doing this, also note the
16186      location where we must insert the constant if it doesn't already
16187      exist.  */
16188   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16189     {
16190       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16191           && fix->mode == mp->mode
16192           && (!LABEL_P (fix->value)
16193               || (CODE_LABEL_NUMBER (fix->value)
16194                   == CODE_LABEL_NUMBER (mp->value)))
16195           && rtx_equal_p (fix->value, mp->value))
16196         {
16197           /* More than one fix references this entry.  */
16198           mp->refcount++;
16199           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16200         }
16201
16202       /* Note the insertion point if necessary.  */
16203       if (max_mp == NULL
16204           && mp->max_address > max_address)
16205         max_mp = mp;
16206
16207       /* If we are inserting an 8-bytes aligned quantity and
16208          we have not already found an insertion point, then
16209          make sure that all such 8-byte aligned quantities are
16210          placed at the start of the pool.  */
16211       if (ARM_DOUBLEWORD_ALIGN
16212           && max_mp == NULL
16213           && fix->fix_size >= 8
16214           && mp->fix_size < 8)
16215         {
16216           max_mp = mp;
16217           max_address = mp->max_address;
16218         }
16219     }
16220
16221   /* The value is not currently in the minipool, so we need to create
16222      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16223      the end of the list since the placement is less constrained than
16224      any existing entry.  Otherwise, we insert the new fix before
16225      MAX_MP and, if necessary, adjust the constraints on the other
16226      entries.  */
16227   mp = XNEW (Mnode);
16228   mp->fix_size = fix->fix_size;
16229   mp->mode = fix->mode;
16230   mp->value = fix->value;
16231   mp->refcount = 1;
16232   /* Not yet required for a backwards ref.  */
16233   mp->min_address = -65536;
16234
16235   if (max_mp == NULL)
16236     {
16237       mp->max_address = max_address;
16238       mp->next = NULL;
16239       mp->prev = minipool_vector_tail;
16240
16241       if (mp->prev == NULL)
16242         {
16243           minipool_vector_head = mp;
16244           minipool_vector_label = gen_label_rtx ();
16245         }
16246       else
16247         mp->prev->next = mp;
16248
16249       minipool_vector_tail = mp;
16250     }
16251   else
16252     {
16253       if (max_address > max_mp->max_address - mp->fix_size)
16254         mp->max_address = max_mp->max_address - mp->fix_size;
16255       else
16256         mp->max_address = max_address;
16257
16258       mp->next = max_mp;
16259       mp->prev = max_mp->prev;
16260       max_mp->prev = mp;
16261       if (mp->prev != NULL)
16262         mp->prev->next = mp;
16263       else
16264         minipool_vector_head = mp;
16265     }
16266
16267   /* Save the new entry.  */
16268   max_mp = mp;
16269
16270   /* Scan over the preceding entries and adjust their addresses as
16271      required.  */
16272   while (mp->prev != NULL
16273          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16274     {
16275       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16276       mp = mp->prev;
16277     }
16278
16279   return max_mp;
16280 }
16281
16282 static Mnode *
16283 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16284                                 HOST_WIDE_INT  min_address)
16285 {
16286   HOST_WIDE_INT offset;
16287
16288   /* The code below assumes these are different.  */
16289   gcc_assert (mp != min_mp);
16290
16291   if (min_mp == NULL)
16292     {
16293       if (min_address > mp->min_address)
16294         mp->min_address = min_address;
16295     }
16296   else
16297     {
16298       /* We will adjust this below if it is too loose.  */
16299       mp->min_address = min_address;
16300
16301       /* Unlink MP from its current position.  Since min_mp is non-null,
16302          mp->next must be non-null.  */
16303       mp->next->prev = mp->prev;
16304       if (mp->prev != NULL)
16305         mp->prev->next = mp->next;
16306       else
16307         minipool_vector_head = mp->next;
16308
16309       /* Reinsert it after MIN_MP.  */
16310       mp->prev = min_mp;
16311       mp->next = min_mp->next;
16312       min_mp->next = mp;
16313       if (mp->next != NULL)
16314         mp->next->prev = mp;
16315       else
16316         minipool_vector_tail = mp;
16317     }
16318
16319   min_mp = mp;
16320
16321   offset = 0;
16322   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16323     {
16324       mp->offset = offset;
16325       if (mp->refcount > 0)
16326         offset += mp->fix_size;
16327
16328       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16329         mp->next->min_address = mp->min_address + mp->fix_size;
16330     }
16331
16332   return min_mp;
16333 }
16334
16335 /* Add a constant to the minipool for a backward reference.  Returns the
16336    node added or NULL if the constant will not fit in this pool.
16337
16338    Note that the code for insertion for a backwards reference can be
16339    somewhat confusing because the calculated offsets for each fix do
16340    not take into account the size of the pool (which is still under
16341    construction.  */
16342 static Mnode *
16343 add_minipool_backward_ref (Mfix *fix)
16344 {
16345   /* If set, min_mp is the last pool_entry that has a lower constraint
16346      than the one we are trying to add.  */
16347   Mnode *min_mp = NULL;
16348   /* This can be negative, since it is only a constraint.  */
16349   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16350   Mnode *mp;
16351
16352   /* If we can't reach the current pool from this insn, or if we can't
16353      insert this entry at the end of the pool without pushing other
16354      fixes out of range, then we don't try.  This ensures that we
16355      can't fail later on.  */
16356   if (min_address >= minipool_barrier->address
16357       || (minipool_vector_tail->min_address + fix->fix_size
16358           >= minipool_barrier->address))
16359     return NULL;
16360
16361   /* Scan the pool to see if a constant with the same value has
16362      already been added.  While we are doing this, also note the
16363      location where we must insert the constant if it doesn't already
16364      exist.  */
16365   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16366     {
16367       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16368           && fix->mode == mp->mode
16369           && (!LABEL_P (fix->value)
16370               || (CODE_LABEL_NUMBER (fix->value)
16371                   == CODE_LABEL_NUMBER (mp->value)))
16372           && rtx_equal_p (fix->value, mp->value)
16373           /* Check that there is enough slack to move this entry to the
16374              end of the table (this is conservative).  */
16375           && (mp->max_address
16376               > (minipool_barrier->address
16377                  + minipool_vector_tail->offset
16378                  + minipool_vector_tail->fix_size)))
16379         {
16380           mp->refcount++;
16381           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16382         }
16383
16384       if (min_mp != NULL)
16385         mp->min_address += fix->fix_size;
16386       else
16387         {
16388           /* Note the insertion point if necessary.  */
16389           if (mp->min_address < min_address)
16390             {
16391               /* For now, we do not allow the insertion of 8-byte alignment
16392                  requiring nodes anywhere but at the start of the pool.  */
16393               if (ARM_DOUBLEWORD_ALIGN
16394                   && fix->fix_size >= 8 && mp->fix_size < 8)
16395                 return NULL;
16396               else
16397                 min_mp = mp;
16398             }
16399           else if (mp->max_address
16400                    < minipool_barrier->address + mp->offset + fix->fix_size)
16401             {
16402               /* Inserting before this entry would push the fix beyond
16403                  its maximum address (which can happen if we have
16404                  re-located a forwards fix); force the new fix to come
16405                  after it.  */
16406               if (ARM_DOUBLEWORD_ALIGN
16407                   && fix->fix_size >= 8 && mp->fix_size < 8)
16408                 return NULL;
16409               else
16410                 {
16411                   min_mp = mp;
16412                   min_address = mp->min_address + fix->fix_size;
16413                 }
16414             }
16415           /* Do not insert a non-8-byte aligned quantity before 8-byte
16416              aligned quantities.  */
16417           else if (ARM_DOUBLEWORD_ALIGN
16418                    && fix->fix_size < 8
16419                    && mp->fix_size >= 8)
16420             {
16421               min_mp = mp;
16422               min_address = mp->min_address + fix->fix_size;
16423             }
16424         }
16425     }
16426
16427   /* We need to create a new entry.  */
16428   mp = XNEW (Mnode);
16429   mp->fix_size = fix->fix_size;
16430   mp->mode = fix->mode;
16431   mp->value = fix->value;
16432   mp->refcount = 1;
16433   mp->max_address = minipool_barrier->address + 65536;
16434
16435   mp->min_address = min_address;
16436
16437   if (min_mp == NULL)
16438     {
16439       mp->prev = NULL;
16440       mp->next = minipool_vector_head;
16441
16442       if (mp->next == NULL)
16443         {
16444           minipool_vector_tail = mp;
16445           minipool_vector_label = gen_label_rtx ();
16446         }
16447       else
16448         mp->next->prev = mp;
16449
16450       minipool_vector_head = mp;
16451     }
16452   else
16453     {
16454       mp->next = min_mp->next;
16455       mp->prev = min_mp;
16456       min_mp->next = mp;
16457
16458       if (mp->next != NULL)
16459         mp->next->prev = mp;
16460       else
16461         minipool_vector_tail = mp;
16462     }
16463
16464   /* Save the new entry.  */
16465   min_mp = mp;
16466
16467   if (mp->prev)
16468     mp = mp->prev;
16469   else
16470     mp->offset = 0;
16471
16472   /* Scan over the following entries and adjust their offsets.  */
16473   while (mp->next != NULL)
16474     {
16475       if (mp->next->min_address < mp->min_address + mp->fix_size)
16476         mp->next->min_address = mp->min_address + mp->fix_size;
16477
16478       if (mp->refcount)
16479         mp->next->offset = mp->offset + mp->fix_size;
16480       else
16481         mp->next->offset = mp->offset;
16482
16483       mp = mp->next;
16484     }
16485
16486   return min_mp;
16487 }
16488
16489 static void
16490 assign_minipool_offsets (Mfix *barrier)
16491 {
16492   HOST_WIDE_INT offset = 0;
16493   Mnode *mp;
16494
16495   minipool_barrier = barrier;
16496
16497   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16498     {
16499       mp->offset = offset;
16500
16501       if (mp->refcount > 0)
16502         offset += mp->fix_size;
16503     }
16504 }
16505
16506 /* Output the literal table */
16507 static void
16508 dump_minipool (rtx_insn *scan)
16509 {
16510   Mnode * mp;
16511   Mnode * nmp;
16512   int align64 = 0;
16513
16514   if (ARM_DOUBLEWORD_ALIGN)
16515     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16516       if (mp->refcount > 0 && mp->fix_size >= 8)
16517         {
16518           align64 = 1;
16519           break;
16520         }
16521
16522   if (dump_file)
16523     fprintf (dump_file,
16524              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16525              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16526
16527   scan = emit_label_after (gen_label_rtx (), scan);
16528   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16529   scan = emit_label_after (minipool_vector_label, scan);
16530
16531   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16532     {
16533       if (mp->refcount > 0)
16534         {
16535           if (dump_file)
16536             {
16537               fprintf (dump_file,
16538                        ";;  Offset %u, min %ld, max %ld ",
16539                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16540                        (unsigned long) mp->max_address);
16541               arm_print_value (dump_file, mp->value);
16542               fputc ('\n', dump_file);
16543             }
16544
16545           switch (GET_MODE_SIZE (mp->mode))
16546             {
16547 #ifdef HAVE_consttable_1
16548             case 1:
16549               scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16550               break;
16551
16552 #endif
16553 #ifdef HAVE_consttable_2
16554             case 2:
16555               scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16556               break;
16557
16558 #endif
16559 #ifdef HAVE_consttable_4
16560             case 4:
16561               scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16562               break;
16563
16564 #endif
16565 #ifdef HAVE_consttable_8
16566             case 8:
16567               scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16568               break;
16569
16570 #endif
16571 #ifdef HAVE_consttable_16
16572             case 16:
16573               scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16574               break;
16575
16576 #endif
16577             default:
16578               gcc_unreachable ();
16579             }
16580         }
16581
16582       nmp = mp->next;
16583       free (mp);
16584     }
16585
16586   minipool_vector_head = minipool_vector_tail = NULL;
16587   scan = emit_insn_after (gen_consttable_end (), scan);
16588   scan = emit_barrier_after (scan);
16589 }
16590
16591 /* Return the cost of forcibly inserting a barrier after INSN.  */
16592 static int
16593 arm_barrier_cost (rtx_insn *insn)
16594 {
16595   /* Basing the location of the pool on the loop depth is preferable,
16596      but at the moment, the basic block information seems to be
16597      corrupt by this stage of the compilation.  */
16598   int base_cost = 50;
16599   rtx_insn *next = next_nonnote_insn (insn);
16600
16601   if (next != NULL && LABEL_P (next))
16602     base_cost -= 20;
16603
16604   switch (GET_CODE (insn))
16605     {
16606     case CODE_LABEL:
16607       /* It will always be better to place the table before the label, rather
16608          than after it.  */
16609       return 50;
16610
16611     case INSN:
16612     case CALL_INSN:
16613       return base_cost;
16614
16615     case JUMP_INSN:
16616       return base_cost - 10;
16617
16618     default:
16619       return base_cost + 10;
16620     }
16621 }
16622
16623 /* Find the best place in the insn stream in the range
16624    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16625    Create the barrier by inserting a jump and add a new fix entry for
16626    it.  */
16627 static Mfix *
16628 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16629 {
16630   HOST_WIDE_INT count = 0;
16631   rtx_barrier *barrier;
16632   rtx_insn *from = fix->insn;
16633   /* The instruction after which we will insert the jump.  */
16634   rtx_insn *selected = NULL;
16635   int selected_cost;
16636   /* The address at which the jump instruction will be placed.  */
16637   HOST_WIDE_INT selected_address;
16638   Mfix * new_fix;
16639   HOST_WIDE_INT max_count = max_address - fix->address;
16640   rtx_code_label *label = gen_label_rtx ();
16641
16642   selected_cost = arm_barrier_cost (from);
16643   selected_address = fix->address;
16644
16645   while (from && count < max_count)
16646     {
16647       rtx_jump_table_data *tmp;
16648       int new_cost;
16649
16650       /* This code shouldn't have been called if there was a natural barrier
16651          within range.  */
16652       gcc_assert (!BARRIER_P (from));
16653
16654       /* Count the length of this insn.  This must stay in sync with the
16655          code that pushes minipool fixes.  */
16656       if (LABEL_P (from))
16657         count += get_label_padding (from);
16658       else
16659         count += get_attr_length (from);
16660
16661       /* If there is a jump table, add its length.  */
16662       if (tablejump_p (from, NULL, &tmp))
16663         {
16664           count += get_jump_table_size (tmp);
16665
16666           /* Jump tables aren't in a basic block, so base the cost on
16667              the dispatch insn.  If we select this location, we will
16668              still put the pool after the table.  */
16669           new_cost = arm_barrier_cost (from);
16670
16671           if (count < max_count
16672               && (!selected || new_cost <= selected_cost))
16673             {
16674               selected = tmp;
16675               selected_cost = new_cost;
16676               selected_address = fix->address + count;
16677             }
16678
16679           /* Continue after the dispatch table.  */
16680           from = NEXT_INSN (tmp);
16681           continue;
16682         }
16683
16684       new_cost = arm_barrier_cost (from);
16685
16686       if (count < max_count
16687           && (!selected || new_cost <= selected_cost))
16688         {
16689           selected = from;
16690           selected_cost = new_cost;
16691           selected_address = fix->address + count;
16692         }
16693
16694       from = NEXT_INSN (from);
16695     }
16696
16697   /* Make sure that we found a place to insert the jump.  */
16698   gcc_assert (selected);
16699
16700   /* Make sure we do not split a call and its corresponding
16701      CALL_ARG_LOCATION note.  */
16702   if (CALL_P (selected))
16703     {
16704       rtx_insn *next = NEXT_INSN (selected);
16705       if (next && NOTE_P (next)
16706           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16707           selected = next;
16708     }
16709
16710   /* Create a new JUMP_INSN that branches around a barrier.  */
16711   from = emit_jump_insn_after (gen_jump (label), selected);
16712   JUMP_LABEL (from) = label;
16713   barrier = emit_barrier_after (from);
16714   emit_label_after (label, barrier);
16715
16716   /* Create a minipool barrier entry for the new barrier.  */
16717   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16718   new_fix->insn = barrier;
16719   new_fix->address = selected_address;
16720   new_fix->next = fix->next;
16721   fix->next = new_fix;
16722
16723   return new_fix;
16724 }
16725
16726 /* Record that there is a natural barrier in the insn stream at
16727    ADDRESS.  */
16728 static void
16729 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16730 {
16731   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16732
16733   fix->insn = insn;
16734   fix->address = address;
16735
16736   fix->next = NULL;
16737   if (minipool_fix_head != NULL)
16738     minipool_fix_tail->next = fix;
16739   else
16740     minipool_fix_head = fix;
16741
16742   minipool_fix_tail = fix;
16743 }
16744
16745 /* Record INSN, which will need fixing up to load a value from the
16746    minipool.  ADDRESS is the offset of the insn since the start of the
16747    function; LOC is a pointer to the part of the insn which requires
16748    fixing; VALUE is the constant that must be loaded, which is of type
16749    MODE.  */
16750 static void
16751 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16752                    machine_mode mode, rtx value)
16753 {
16754   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16755
16756   fix->insn = insn;
16757   fix->address = address;
16758   fix->loc = loc;
16759   fix->mode = mode;
16760   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16761   fix->value = value;
16762   fix->forwards = get_attr_pool_range (insn);
16763   fix->backwards = get_attr_neg_pool_range (insn);
16764   fix->minipool = NULL;
16765
16766   /* If an insn doesn't have a range defined for it, then it isn't
16767      expecting to be reworked by this code.  Better to stop now than
16768      to generate duff assembly code.  */
16769   gcc_assert (fix->forwards || fix->backwards);
16770
16771   /* If an entry requires 8-byte alignment then assume all constant pools
16772      require 4 bytes of padding.  Trying to do this later on a per-pool
16773      basis is awkward because existing pool entries have to be modified.  */
16774   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16775     minipool_pad = 4;
16776
16777   if (dump_file)
16778     {
16779       fprintf (dump_file,
16780                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16781                GET_MODE_NAME (mode),
16782                INSN_UID (insn), (unsigned long) address,
16783                -1 * (long)fix->backwards, (long)fix->forwards);
16784       arm_print_value (dump_file, fix->value);
16785       fprintf (dump_file, "\n");
16786     }
16787
16788   /* Add it to the chain of fixes.  */
16789   fix->next = NULL;
16790
16791   if (minipool_fix_head != NULL)
16792     minipool_fix_tail->next = fix;
16793   else
16794     minipool_fix_head = fix;
16795
16796   minipool_fix_tail = fix;
16797 }
16798
16799 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16800    Returns the number of insns needed, or 99 if we always want to synthesize
16801    the value.  */
16802 int
16803 arm_max_const_double_inline_cost ()
16804 {
16805   /* Let the value get synthesized to avoid the use of literal pools.  */
16806   if (arm_disable_literal_pool)
16807     return 99;
16808
16809   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16810 }
16811
16812 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16813    Returns the number of insns needed, or 99 if we don't know how to
16814    do it.  */
16815 int
16816 arm_const_double_inline_cost (rtx val)
16817 {
16818   rtx lowpart, highpart;
16819   machine_mode mode;
16820
16821   mode = GET_MODE (val);
16822
16823   if (mode == VOIDmode)
16824     mode = DImode;
16825
16826   gcc_assert (GET_MODE_SIZE (mode) == 8);
16827
16828   lowpart = gen_lowpart (SImode, val);
16829   highpart = gen_highpart_mode (SImode, mode, val);
16830
16831   gcc_assert (CONST_INT_P (lowpart));
16832   gcc_assert (CONST_INT_P (highpart));
16833
16834   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16835                             NULL_RTX, NULL_RTX, 0, 0)
16836           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16837                               NULL_RTX, NULL_RTX, 0, 0));
16838 }
16839
16840 /* Cost of loading a SImode constant.  */
16841 static inline int
16842 arm_const_inline_cost (enum rtx_code code, rtx val)
16843 {
16844   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16845                            NULL_RTX, NULL_RTX, 1, 0);
16846 }
16847
16848 /* Return true if it is worthwhile to split a 64-bit constant into two
16849    32-bit operations.  This is the case if optimizing for size, or
16850    if we have load delay slots, or if one 32-bit part can be done with
16851    a single data operation.  */
16852 bool
16853 arm_const_double_by_parts (rtx val)
16854 {
16855   machine_mode mode = GET_MODE (val);
16856   rtx part;
16857
16858   if (optimize_size || arm_ld_sched)
16859     return true;
16860
16861   if (mode == VOIDmode)
16862     mode = DImode;
16863
16864   part = gen_highpart_mode (SImode, mode, val);
16865
16866   gcc_assert (CONST_INT_P (part));
16867
16868   if (const_ok_for_arm (INTVAL (part))
16869       || const_ok_for_arm (~INTVAL (part)))
16870     return true;
16871
16872   part = gen_lowpart (SImode, val);
16873
16874   gcc_assert (CONST_INT_P (part));
16875
16876   if (const_ok_for_arm (INTVAL (part))
16877       || const_ok_for_arm (~INTVAL (part)))
16878     return true;
16879
16880   return false;
16881 }
16882
16883 /* Return true if it is possible to inline both the high and low parts
16884    of a 64-bit constant into 32-bit data processing instructions.  */
16885 bool
16886 arm_const_double_by_immediates (rtx val)
16887 {
16888   machine_mode mode = GET_MODE (val);
16889   rtx part;
16890
16891   if (mode == VOIDmode)
16892     mode = DImode;
16893
16894   part = gen_highpart_mode (SImode, mode, val);
16895
16896   gcc_assert (CONST_INT_P (part));
16897
16898   if (!const_ok_for_arm (INTVAL (part)))
16899     return false;
16900
16901   part = gen_lowpart (SImode, val);
16902
16903   gcc_assert (CONST_INT_P (part));
16904
16905   if (!const_ok_for_arm (INTVAL (part)))
16906     return false;
16907
16908   return true;
16909 }
16910
16911 /* Scan INSN and note any of its operands that need fixing.
16912    If DO_PUSHES is false we do not actually push any of the fixups
16913    needed.  */
16914 static void
16915 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16916 {
16917   int opno;
16918
16919   extract_constrain_insn (insn);
16920
16921   if (recog_data.n_alternatives == 0)
16922     return;
16923
16924   /* Fill in recog_op_alt with information about the constraints of
16925      this insn.  */
16926   preprocess_constraints (insn);
16927
16928   const operand_alternative *op_alt = which_op_alt ();
16929   for (opno = 0; opno < recog_data.n_operands; opno++)
16930     {
16931       /* Things we need to fix can only occur in inputs.  */
16932       if (recog_data.operand_type[opno] != OP_IN)
16933         continue;
16934
16935       /* If this alternative is a memory reference, then any mention
16936          of constants in this alternative is really to fool reload
16937          into allowing us to accept one there.  We need to fix them up
16938          now so that we output the right code.  */
16939       if (op_alt[opno].memory_ok)
16940         {
16941           rtx op = recog_data.operand[opno];
16942
16943           if (CONSTANT_P (op))
16944             {
16945               if (do_pushes)
16946                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16947                                    recog_data.operand_mode[opno], op);
16948             }
16949           else if (MEM_P (op)
16950                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16951                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16952             {
16953               if (do_pushes)
16954                 {
16955                   rtx cop = avoid_constant_pool_reference (op);
16956
16957                   /* Casting the address of something to a mode narrower
16958                      than a word can cause avoid_constant_pool_reference()
16959                      to return the pool reference itself.  That's no good to
16960                      us here.  Lets just hope that we can use the
16961                      constant pool value directly.  */
16962                   if (op == cop)
16963                     cop = get_pool_constant (XEXP (op, 0));
16964
16965                   push_minipool_fix (insn, address,
16966                                      recog_data.operand_loc[opno],
16967                                      recog_data.operand_mode[opno], cop);
16968                 }
16969
16970             }
16971         }
16972     }
16973
16974   return;
16975 }
16976
16977 /* Rewrite move insn into subtract of 0 if the condition codes will
16978    be useful in next conditional jump insn.  */
16979
16980 static void
16981 thumb1_reorg (void)
16982 {
16983   basic_block bb;
16984
16985   FOR_EACH_BB_FN (bb, cfun)
16986     {
16987       rtx dest, src;
16988       rtx pat, op0, set = NULL;
16989       rtx_insn *prev, *insn = BB_END (bb);
16990       bool insn_clobbered = false;
16991
16992       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
16993         insn = PREV_INSN (insn);
16994
16995       /* Find the last cbranchsi4_insn in basic block BB.  */
16996       if (insn == BB_HEAD (bb)
16997           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16998         continue;
16999
17000       /* Get the register with which we are comparing.  */
17001       pat = PATTERN (insn);
17002       op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17003
17004       /* Find the first flag setting insn before INSN in basic block BB.  */
17005       gcc_assert (insn != BB_HEAD (bb));
17006       for (prev = PREV_INSN (insn);
17007            (!insn_clobbered
17008             && prev != BB_HEAD (bb)
17009             && (NOTE_P (prev)
17010                 || DEBUG_INSN_P (prev)
17011                 || ((set = single_set (prev)) != NULL
17012                     && get_attr_conds (prev) == CONDS_NOCOND)));
17013            prev = PREV_INSN (prev))
17014         {
17015           if (reg_set_p (op0, prev))
17016             insn_clobbered = true;
17017         }
17018
17019       /* Skip if op0 is clobbered by insn other than prev. */
17020       if (insn_clobbered)
17021         continue;
17022
17023       if (!set)
17024         continue;
17025
17026       dest = SET_DEST (set);
17027       src = SET_SRC (set);
17028       if (!low_register_operand (dest, SImode)
17029           || !low_register_operand (src, SImode))
17030         continue;
17031
17032       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17033          in INSN.  Both src and dest of the move insn are checked.  */
17034       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17035         {
17036           dest = copy_rtx (dest);
17037           src = copy_rtx (src);
17038           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17039           PATTERN (prev) = gen_rtx_SET (dest, src);
17040           INSN_CODE (prev) = -1;
17041           /* Set test register in INSN to dest.  */
17042           XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17043           INSN_CODE (insn) = -1;
17044         }
17045     }
17046 }
17047
17048 /* Convert instructions to their cc-clobbering variant if possible, since
17049    that allows us to use smaller encodings.  */
17050
17051 static void
17052 thumb2_reorg (void)
17053 {
17054   basic_block bb;
17055   regset_head live;
17056
17057   INIT_REG_SET (&live);
17058
17059   /* We are freeing block_for_insn in the toplev to keep compatibility
17060      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17061   compute_bb_for_insn ();
17062   df_analyze ();
17063
17064   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17065
17066   FOR_EACH_BB_FN (bb, cfun)
17067     {
17068       if (current_tune->disparage_flag_setting_t16_encodings
17069           && optimize_bb_for_speed_p (bb))
17070         continue;
17071
17072       rtx_insn *insn;
17073       Convert_Action action = SKIP;
17074       Convert_Action action_for_partial_flag_setting
17075         = (current_tune->disparage_partial_flag_setting_t16_encodings
17076            && optimize_bb_for_speed_p (bb))
17077           ? SKIP : CONV;
17078
17079       COPY_REG_SET (&live, DF_LR_OUT (bb));
17080       df_simulate_initialize_backwards (bb, &live);
17081       FOR_BB_INSNS_REVERSE (bb, insn)
17082         {
17083           if (NONJUMP_INSN_P (insn)
17084               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17085               && GET_CODE (PATTERN (insn)) == SET)
17086             {
17087               action = SKIP;
17088               rtx pat = PATTERN (insn);
17089               rtx dst = XEXP (pat, 0);
17090               rtx src = XEXP (pat, 1);
17091               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17092
17093               if (UNARY_P (src) || BINARY_P (src))
17094                   op0 = XEXP (src, 0);
17095
17096               if (BINARY_P (src))
17097                   op1 = XEXP (src, 1);
17098
17099               if (low_register_operand (dst, SImode))
17100                 {
17101                   switch (GET_CODE (src))
17102                     {
17103                     case PLUS:
17104                       /* Adding two registers and storing the result
17105                          in the first source is already a 16-bit
17106                          operation.  */
17107                       if (rtx_equal_p (dst, op0)
17108                           && register_operand (op1, SImode))
17109                         break;
17110
17111                       if (low_register_operand (op0, SImode))
17112                         {
17113                           /* ADDS <Rd>,<Rn>,<Rm>  */
17114                           if (low_register_operand (op1, SImode))
17115                             action = CONV;
17116                           /* ADDS <Rdn>,#<imm8>  */
17117                           /* SUBS <Rdn>,#<imm8>  */
17118                           else if (rtx_equal_p (dst, op0)
17119                                    && CONST_INT_P (op1)
17120                                    && IN_RANGE (INTVAL (op1), -255, 255))
17121                             action = CONV;
17122                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17123                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17124                           else if (CONST_INT_P (op1)
17125                                    && IN_RANGE (INTVAL (op1), -7, 7))
17126                             action = CONV;
17127                         }
17128                       /* ADCS <Rd>, <Rn>  */
17129                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17130                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17131                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17132                                                        SImode)
17133                               && COMPARISON_P (op1)
17134                               && cc_register (XEXP (op1, 0), VOIDmode)
17135                               && maybe_get_arm_condition_code (op1) == ARM_CS
17136                               && XEXP (op1, 1) == const0_rtx)
17137                         action = CONV;
17138                       break;
17139
17140                     case MINUS:
17141                       /* RSBS <Rd>,<Rn>,#0
17142                          Not handled here: see NEG below.  */
17143                       /* SUBS <Rd>,<Rn>,#<imm3>
17144                          SUBS <Rdn>,#<imm8>
17145                          Not handled here: see PLUS above.  */
17146                       /* SUBS <Rd>,<Rn>,<Rm>  */
17147                       if (low_register_operand (op0, SImode)
17148                           && low_register_operand (op1, SImode))
17149                             action = CONV;
17150                       break;
17151
17152                     case MULT:
17153                       /* MULS <Rdm>,<Rn>,<Rdm>
17154                          As an exception to the rule, this is only used
17155                          when optimizing for size since MULS is slow on all
17156                          known implementations.  We do not even want to use
17157                          MULS in cold code, if optimizing for speed, so we
17158                          test the global flag here.  */
17159                       if (!optimize_size)
17160                         break;
17161                       /* else fall through.  */
17162                     case AND:
17163                     case IOR:
17164                     case XOR:
17165                       /* ANDS <Rdn>,<Rm>  */
17166                       if (rtx_equal_p (dst, op0)
17167                           && low_register_operand (op1, SImode))
17168                         action = action_for_partial_flag_setting;
17169                       else if (rtx_equal_p (dst, op1)
17170                                && low_register_operand (op0, SImode))
17171                         action = action_for_partial_flag_setting == SKIP
17172                                  ? SKIP : SWAP_CONV;
17173                       break;
17174
17175                     case ASHIFTRT:
17176                     case ASHIFT:
17177                     case LSHIFTRT:
17178                       /* ASRS <Rdn>,<Rm> */
17179                       /* LSRS <Rdn>,<Rm> */
17180                       /* LSLS <Rdn>,<Rm> */
17181                       if (rtx_equal_p (dst, op0)
17182                           && low_register_operand (op1, SImode))
17183                         action = action_for_partial_flag_setting;
17184                       /* ASRS <Rd>,<Rm>,#<imm5> */
17185                       /* LSRS <Rd>,<Rm>,#<imm5> */
17186                       /* LSLS <Rd>,<Rm>,#<imm5> */
17187                       else if (low_register_operand (op0, SImode)
17188                                && CONST_INT_P (op1)
17189                                && IN_RANGE (INTVAL (op1), 0, 31))
17190                         action = action_for_partial_flag_setting;
17191                       break;
17192
17193                     case ROTATERT:
17194                       /* RORS <Rdn>,<Rm>  */
17195                       if (rtx_equal_p (dst, op0)
17196                           && low_register_operand (op1, SImode))
17197                         action = action_for_partial_flag_setting;
17198                       break;
17199
17200                     case NOT:
17201                       /* MVNS <Rd>,<Rm>  */
17202                       if (low_register_operand (op0, SImode))
17203                         action = action_for_partial_flag_setting;
17204                       break;
17205
17206                     case NEG:
17207                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17208                       if (low_register_operand (op0, SImode))
17209                         action = CONV;
17210                       break;
17211
17212                     case CONST_INT:
17213                       /* MOVS <Rd>,#<imm8>  */
17214                       if (CONST_INT_P (src)
17215                           && IN_RANGE (INTVAL (src), 0, 255))
17216                         action = action_for_partial_flag_setting;
17217                       break;
17218
17219                     case REG:
17220                       /* MOVS and MOV<c> with registers have different
17221                          encodings, so are not relevant here.  */
17222                       break;
17223
17224                     default:
17225                       break;
17226                     }
17227                 }
17228
17229               if (action != SKIP)
17230                 {
17231                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17232                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17233                   rtvec vec;
17234
17235                   if (action == SWAP_CONV)
17236                     {
17237                       src = copy_rtx (src);
17238                       XEXP (src, 0) = op1;
17239                       XEXP (src, 1) = op0;
17240                       pat = gen_rtx_SET (dst, src);
17241                       vec = gen_rtvec (2, pat, clobber);
17242                     }
17243                   else /* action == CONV */
17244                     vec = gen_rtvec (2, pat, clobber);
17245
17246                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17247                   INSN_CODE (insn) = -1;
17248                 }
17249             }
17250
17251           if (NONDEBUG_INSN_P (insn))
17252             df_simulate_one_insn_backwards (bb, insn, &live);
17253         }
17254     }
17255
17256   CLEAR_REG_SET (&live);
17257 }
17258
17259 /* Gcc puts the pool in the wrong place for ARM, since we can only
17260    load addresses a limited distance around the pc.  We do some
17261    special munging to move the constant pool values to the correct
17262    point in the code.  */
17263 static void
17264 arm_reorg (void)
17265 {
17266   rtx_insn *insn;
17267   HOST_WIDE_INT address = 0;
17268   Mfix * fix;
17269
17270   if (TARGET_THUMB1)
17271     thumb1_reorg ();
17272   else if (TARGET_THUMB2)
17273     thumb2_reorg ();
17274
17275   /* Ensure all insns that must be split have been split at this point.
17276      Otherwise, the pool placement code below may compute incorrect
17277      insn lengths.  Note that when optimizing, all insns have already
17278      been split at this point.  */
17279   if (!optimize)
17280     split_all_insns_noflow ();
17281
17282   minipool_fix_head = minipool_fix_tail = NULL;
17283
17284   /* The first insn must always be a note, or the code below won't
17285      scan it properly.  */
17286   insn = get_insns ();
17287   gcc_assert (NOTE_P (insn));
17288   minipool_pad = 0;
17289
17290   /* Scan all the insns and record the operands that will need fixing.  */
17291   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17292     {
17293       if (BARRIER_P (insn))
17294         push_minipool_barrier (insn, address);
17295       else if (INSN_P (insn))
17296         {
17297           rtx_jump_table_data *table;
17298
17299           note_invalid_constants (insn, address, true);
17300           address += get_attr_length (insn);
17301
17302           /* If the insn is a vector jump, add the size of the table
17303              and skip the table.  */
17304           if (tablejump_p (insn, NULL, &table))
17305             {
17306               address += get_jump_table_size (table);
17307               insn = table;
17308             }
17309         }
17310       else if (LABEL_P (insn))
17311         /* Add the worst-case padding due to alignment.  We don't add
17312            the _current_ padding because the minipool insertions
17313            themselves might change it.  */
17314         address += get_label_padding (insn);
17315     }
17316
17317   fix = minipool_fix_head;
17318
17319   /* Now scan the fixups and perform the required changes.  */
17320   while (fix)
17321     {
17322       Mfix * ftmp;
17323       Mfix * fdel;
17324       Mfix *  last_added_fix;
17325       Mfix * last_barrier = NULL;
17326       Mfix * this_fix;
17327
17328       /* Skip any further barriers before the next fix.  */
17329       while (fix && BARRIER_P (fix->insn))
17330         fix = fix->next;
17331
17332       /* No more fixes.  */
17333       if (fix == NULL)
17334         break;
17335
17336       last_added_fix = NULL;
17337
17338       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17339         {
17340           if (BARRIER_P (ftmp->insn))
17341             {
17342               if (ftmp->address >= minipool_vector_head->max_address)
17343                 break;
17344
17345               last_barrier = ftmp;
17346             }
17347           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17348             break;
17349
17350           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17351         }
17352
17353       /* If we found a barrier, drop back to that; any fixes that we
17354          could have reached but come after the barrier will now go in
17355          the next mini-pool.  */
17356       if (last_barrier != NULL)
17357         {
17358           /* Reduce the refcount for those fixes that won't go into this
17359              pool after all.  */
17360           for (fdel = last_barrier->next;
17361                fdel && fdel != ftmp;
17362                fdel = fdel->next)
17363             {
17364               fdel->minipool->refcount--;
17365               fdel->minipool = NULL;
17366             }
17367
17368           ftmp = last_barrier;
17369         }
17370       else
17371         {
17372           /* ftmp is first fix that we can't fit into this pool and
17373              there no natural barriers that we could use.  Insert a
17374              new barrier in the code somewhere between the previous
17375              fix and this one, and arrange to jump around it.  */
17376           HOST_WIDE_INT max_address;
17377
17378           /* The last item on the list of fixes must be a barrier, so
17379              we can never run off the end of the list of fixes without
17380              last_barrier being set.  */
17381           gcc_assert (ftmp);
17382
17383           max_address = minipool_vector_head->max_address;
17384           /* Check that there isn't another fix that is in range that
17385              we couldn't fit into this pool because the pool was
17386              already too large: we need to put the pool before such an
17387              instruction.  The pool itself may come just after the
17388              fix because create_fix_barrier also allows space for a
17389              jump instruction.  */
17390           if (ftmp->address < max_address)
17391             max_address = ftmp->address + 1;
17392
17393           last_barrier = create_fix_barrier (last_added_fix, max_address);
17394         }
17395
17396       assign_minipool_offsets (last_barrier);
17397
17398       while (ftmp)
17399         {
17400           if (!BARRIER_P (ftmp->insn)
17401               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17402                   == NULL))
17403             break;
17404
17405           ftmp = ftmp->next;
17406         }
17407
17408       /* Scan over the fixes we have identified for this pool, fixing them
17409          up and adding the constants to the pool itself.  */
17410       for (this_fix = fix; this_fix && ftmp != this_fix;
17411            this_fix = this_fix->next)
17412         if (!BARRIER_P (this_fix->insn))
17413           {
17414             rtx addr
17415               = plus_constant (Pmode,
17416                                gen_rtx_LABEL_REF (VOIDmode,
17417                                                   minipool_vector_label),
17418                                this_fix->minipool->offset);
17419             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17420           }
17421
17422       dump_minipool (last_barrier->insn);
17423       fix = ftmp;
17424     }
17425
17426   /* From now on we must synthesize any constants that we can't handle
17427      directly.  This can happen if the RTL gets split during final
17428      instruction generation.  */
17429   cfun->machine->after_arm_reorg = 1;
17430
17431   /* Free the minipool memory.  */
17432   obstack_free (&minipool_obstack, minipool_startobj);
17433 }
17434 \f
17435 /* Routines to output assembly language.  */
17436
17437 /* Return string representation of passed in real value.  */
17438 static const char *
17439 fp_const_from_val (REAL_VALUE_TYPE *r)
17440 {
17441   if (!fp_consts_inited)
17442     init_fp_table ();
17443
17444   gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17445   return "0";
17446 }
17447
17448 /* OPERANDS[0] is the entire list of insns that constitute pop,
17449    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17450    is in the list, UPDATE is true iff the list contains explicit
17451    update of base register.  */
17452 void
17453 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17454                          bool update)
17455 {
17456   int i;
17457   char pattern[100];
17458   int offset;
17459   const char *conditional;
17460   int num_saves = XVECLEN (operands[0], 0);
17461   unsigned int regno;
17462   unsigned int regno_base = REGNO (operands[1]);
17463
17464   offset = 0;
17465   offset += update ? 1 : 0;
17466   offset += return_pc ? 1 : 0;
17467
17468   /* Is the base register in the list?  */
17469   for (i = offset; i < num_saves; i++)
17470     {
17471       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17472       /* If SP is in the list, then the base register must be SP.  */
17473       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17474       /* If base register is in the list, there must be no explicit update.  */
17475       if (regno == regno_base)
17476         gcc_assert (!update);
17477     }
17478
17479   conditional = reverse ? "%?%D0" : "%?%d0";
17480   if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17481     {
17482       /* Output pop (not stmfd) because it has a shorter encoding.  */
17483       gcc_assert (update);
17484       sprintf (pattern, "pop%s\t{", conditional);
17485     }
17486   else
17487     {
17488       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17489          It's just a convention, their semantics are identical.  */
17490       if (regno_base == SP_REGNUM)
17491         sprintf (pattern, "ldm%sfd\t", conditional);
17492       else if (TARGET_UNIFIED_ASM)
17493         sprintf (pattern, "ldmia%s\t", conditional);
17494       else
17495         sprintf (pattern, "ldm%sia\t", conditional);
17496
17497       strcat (pattern, reg_names[regno_base]);
17498       if (update)
17499         strcat (pattern, "!, {");
17500       else
17501         strcat (pattern, ", {");
17502     }
17503
17504   /* Output the first destination register.  */
17505   strcat (pattern,
17506           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17507
17508   /* Output the rest of the destination registers.  */
17509   for (i = offset + 1; i < num_saves; i++)
17510     {
17511       strcat (pattern, ", ");
17512       strcat (pattern,
17513               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17514     }
17515
17516   strcat (pattern, "}");
17517
17518   if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17519     strcat (pattern, "^");
17520
17521   output_asm_insn (pattern, &cond);
17522 }
17523
17524
17525 /* Output the assembly for a store multiple.  */
17526
17527 const char *
17528 vfp_output_vstmd (rtx * operands)
17529 {
17530   char pattern[100];
17531   int p;
17532   int base;
17533   int i;
17534   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17535                    ? XEXP (operands[0], 0)
17536                    : XEXP (XEXP (operands[0], 0), 0);
17537   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17538
17539   if (push_p)
17540     strcpy (pattern, "vpush%?.64\t{%P1");
17541   else
17542     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17543
17544   p = strlen (pattern);
17545
17546   gcc_assert (REG_P (operands[1]));
17547
17548   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17549   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17550     {
17551       p += sprintf (&pattern[p], ", d%d", base + i);
17552     }
17553   strcpy (&pattern[p], "}");
17554
17555   output_asm_insn (pattern, operands);
17556   return "";
17557 }
17558
17559
17560 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17561    number of bytes pushed.  */
17562
17563 static int
17564 vfp_emit_fstmd (int base_reg, int count)
17565 {
17566   rtx par;
17567   rtx dwarf;
17568   rtx tmp, reg;
17569   int i;
17570
17571   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17572      register pairs are stored by a store multiple insn.  We avoid this
17573      by pushing an extra pair.  */
17574   if (count == 2 && !arm_arch6)
17575     {
17576       if (base_reg == LAST_VFP_REGNUM - 3)
17577         base_reg -= 2;
17578       count++;
17579     }
17580
17581   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17582      larger stores into multiple parts (up to a maximum of two, in
17583      practice).  */
17584   if (count > 16)
17585     {
17586       int saved;
17587       /* NOTE: base_reg is an internal register number, so each D register
17588          counts as 2.  */
17589       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17590       saved += vfp_emit_fstmd (base_reg, 16);
17591       return saved;
17592     }
17593
17594   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17595   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17596
17597   reg = gen_rtx_REG (DFmode, base_reg);
17598   base_reg += 2;
17599
17600   XVECEXP (par, 0, 0)
17601     = gen_rtx_SET (gen_frame_mem
17602                    (BLKmode,
17603                     gen_rtx_PRE_MODIFY (Pmode,
17604                                         stack_pointer_rtx,
17605                                         plus_constant
17606                                         (Pmode, stack_pointer_rtx,
17607                                          - (count * 8)))
17608                     ),
17609                    gen_rtx_UNSPEC (BLKmode,
17610                                    gen_rtvec (1, reg),
17611                                    UNSPEC_PUSH_MULT));
17612
17613   tmp = gen_rtx_SET (stack_pointer_rtx,
17614                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17615   RTX_FRAME_RELATED_P (tmp) = 1;
17616   XVECEXP (dwarf, 0, 0) = tmp;
17617
17618   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17619   RTX_FRAME_RELATED_P (tmp) = 1;
17620   XVECEXP (dwarf, 0, 1) = tmp;
17621
17622   for (i = 1; i < count; i++)
17623     {
17624       reg = gen_rtx_REG (DFmode, base_reg);
17625       base_reg += 2;
17626       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17627
17628       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17629                                         plus_constant (Pmode,
17630                                                        stack_pointer_rtx,
17631                                                        i * 8)),
17632                          reg);
17633       RTX_FRAME_RELATED_P (tmp) = 1;
17634       XVECEXP (dwarf, 0, i + 1) = tmp;
17635     }
17636
17637   par = emit_insn (par);
17638   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17639   RTX_FRAME_RELATED_P (par) = 1;
17640
17641   return count * 8;
17642 }
17643
17644 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17645    the call target.  */
17646
17647 void
17648 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17649 {
17650   rtx insn;
17651
17652   insn = emit_call_insn (pat);
17653
17654   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17655      If the call might use such an entry, add a use of the PIC register
17656      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17657   if (TARGET_VXWORKS_RTP
17658       && flag_pic
17659       && !sibcall
17660       && GET_CODE (addr) == SYMBOL_REF
17661       && (SYMBOL_REF_DECL (addr)
17662           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17663           : !SYMBOL_REF_LOCAL_P (addr)))
17664     {
17665       require_pic_register ();
17666       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17667     }
17668
17669   if (TARGET_AAPCS_BASED)
17670     {
17671       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17672          linker.  We need to add an IP clobber to allow setting
17673          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17674          is not needed since it's a fixed register.  */
17675       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17676       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17677     }
17678 }
17679
17680 /* Output a 'call' insn.  */
17681 const char *
17682 output_call (rtx *operands)
17683 {
17684   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17685
17686   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17687   if (REGNO (operands[0]) == LR_REGNUM)
17688     {
17689       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17690       output_asm_insn ("mov%?\t%0, %|lr", operands);
17691     }
17692
17693   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17694
17695   if (TARGET_INTERWORK || arm_arch4t)
17696     output_asm_insn ("bx%?\t%0", operands);
17697   else
17698     output_asm_insn ("mov%?\t%|pc, %0", operands);
17699
17700   return "";
17701 }
17702
17703 /* Output a 'call' insn that is a reference in memory. This is
17704    disabled for ARMv5 and we prefer a blx instead because otherwise
17705    there's a significant performance overhead.  */
17706 const char *
17707 output_call_mem (rtx *operands)
17708 {
17709   gcc_assert (!arm_arch5);
17710   if (TARGET_INTERWORK)
17711     {
17712       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17713       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17714       output_asm_insn ("bx%?\t%|ip", operands);
17715     }
17716   else if (regno_use_in (LR_REGNUM, operands[0]))
17717     {
17718       /* LR is used in the memory address.  We load the address in the
17719          first instruction.  It's safe to use IP as the target of the
17720          load since the call will kill it anyway.  */
17721       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17722       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17723       if (arm_arch4t)
17724         output_asm_insn ("bx%?\t%|ip", operands);
17725       else
17726         output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17727     }
17728   else
17729     {
17730       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17731       output_asm_insn ("ldr%?\t%|pc, %0", operands);
17732     }
17733
17734   return "";
17735 }
17736
17737
17738 /* Output a move from arm registers to arm registers of a long double
17739    OPERANDS[0] is the destination.
17740    OPERANDS[1] is the source.  */
17741 const char *
17742 output_mov_long_double_arm_from_arm (rtx *operands)
17743 {
17744   /* We have to be careful here because the two might overlap.  */
17745   int dest_start = REGNO (operands[0]);
17746   int src_start = REGNO (operands[1]);
17747   rtx ops[2];
17748   int i;
17749
17750   if (dest_start < src_start)
17751     {
17752       for (i = 0; i < 3; i++)
17753         {
17754           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17755           ops[1] = gen_rtx_REG (SImode, src_start + i);
17756           output_asm_insn ("mov%?\t%0, %1", ops);
17757         }
17758     }
17759   else
17760     {
17761       for (i = 2; i >= 0; i--)
17762         {
17763           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17764           ops[1] = gen_rtx_REG (SImode, src_start + i);
17765           output_asm_insn ("mov%?\t%0, %1", ops);
17766         }
17767     }
17768
17769   return "";
17770 }
17771
17772 void
17773 arm_emit_movpair (rtx dest, rtx src)
17774  {
17775   /* If the src is an immediate, simplify it.  */
17776   if (CONST_INT_P (src))
17777     {
17778       HOST_WIDE_INT val = INTVAL (src);
17779       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17780       if ((val >> 16) & 0x0000ffff)
17781         emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17782                                              GEN_INT (16)),
17783                        GEN_INT ((val >> 16) & 0x0000ffff));
17784       return;
17785     }
17786    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17787    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17788  }
17789
17790 /* Output a move between double words.  It must be REG<-MEM
17791    or MEM<-REG.  */
17792 const char *
17793 output_move_double (rtx *operands, bool emit, int *count)
17794 {
17795   enum rtx_code code0 = GET_CODE (operands[0]);
17796   enum rtx_code code1 = GET_CODE (operands[1]);
17797   rtx otherops[3];
17798   if (count)
17799     *count = 1;
17800
17801   /* The only case when this might happen is when
17802      you are looking at the length of a DImode instruction
17803      that has an invalid constant in it.  */
17804   if (code0 == REG && code1 != MEM)
17805     {
17806       gcc_assert (!emit);
17807       *count = 2;
17808       return "";
17809     }
17810
17811   if (code0 == REG)
17812     {
17813       unsigned int reg0 = REGNO (operands[0]);
17814
17815       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17816
17817       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17818
17819       switch (GET_CODE (XEXP (operands[1], 0)))
17820         {
17821         case REG:
17822
17823           if (emit)
17824             {
17825               if (TARGET_LDRD
17826                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17827                 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17828               else
17829                 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17830             }
17831           break;
17832
17833         case PRE_INC:
17834           gcc_assert (TARGET_LDRD);
17835           if (emit)
17836             output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17837           break;
17838
17839         case PRE_DEC:
17840           if (emit)
17841             {
17842               if (TARGET_LDRD)
17843                 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17844               else
17845                 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17846             }
17847           break;
17848
17849         case POST_INC:
17850           if (emit)
17851             {
17852               if (TARGET_LDRD)
17853                 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17854               else
17855                 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17856             }
17857           break;
17858
17859         case POST_DEC:
17860           gcc_assert (TARGET_LDRD);
17861           if (emit)
17862             output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17863           break;
17864
17865         case PRE_MODIFY:
17866         case POST_MODIFY:
17867           /* Autoicrement addressing modes should never have overlapping
17868              base and destination registers, and overlapping index registers
17869              are already prohibited, so this doesn't need to worry about
17870              fix_cm3_ldrd.  */
17871           otherops[0] = operands[0];
17872           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17873           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17874
17875           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17876             {
17877               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17878                 {
17879                   /* Registers overlap so split out the increment.  */
17880                   if (emit)
17881                     {
17882                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
17883                       output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17884                     }
17885                   if (count)
17886                     *count = 2;
17887                 }
17888               else
17889                 {
17890                   /* Use a single insn if we can.
17891                      FIXME: IWMMXT allows offsets larger than ldrd can
17892                      handle, fix these up with a pair of ldr.  */
17893                   if (TARGET_THUMB2
17894                       || !CONST_INT_P (otherops[2])
17895                       || (INTVAL (otherops[2]) > -256
17896                           && INTVAL (otherops[2]) < 256))
17897                     {
17898                       if (emit)
17899                         output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17900                     }
17901                   else
17902                     {
17903                       if (emit)
17904                         {
17905                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17906                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17907                         }
17908                       if (count)
17909                         *count = 2;
17910
17911                     }
17912                 }
17913             }
17914           else
17915             {
17916               /* Use a single insn if we can.
17917                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
17918                  fix these up with a pair of ldr.  */
17919               if (TARGET_THUMB2
17920                   || !CONST_INT_P (otherops[2])
17921                   || (INTVAL (otherops[2]) > -256
17922                       && INTVAL (otherops[2]) < 256))
17923                 {
17924                   if (emit)
17925                     output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17926                 }
17927               else
17928                 {
17929                   if (emit)
17930                     {
17931                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17932                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17933                     }
17934                   if (count)
17935                     *count = 2;
17936                 }
17937             }
17938           break;
17939
17940         case LABEL_REF:
17941         case CONST:
17942           /* We might be able to use ldrd %0, %1 here.  However the range is
17943              different to ldr/adr, and it is broken on some ARMv7-M
17944              implementations.  */
17945           /* Use the second register of the pair to avoid problematic
17946              overlap.  */
17947           otherops[1] = operands[1];
17948           if (emit)
17949             output_asm_insn ("adr%?\t%0, %1", otherops);
17950           operands[1] = otherops[0];
17951           if (emit)
17952             {
17953               if (TARGET_LDRD)
17954                 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17955               else
17956                 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17957             }
17958
17959           if (count)
17960             *count = 2;
17961           break;
17962
17963           /* ??? This needs checking for thumb2.  */
17964         default:
17965           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17966                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17967             {
17968               otherops[0] = operands[0];
17969               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17970               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17971
17972               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17973                 {
17974                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17975                     {
17976                       switch ((int) INTVAL (otherops[2]))
17977                         {
17978                         case -8:
17979                           if (emit)
17980                             output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17981                           return "";
17982                         case -4:
17983                           if (TARGET_THUMB2)
17984                             break;
17985                           if (emit)
17986                             output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17987                           return "";
17988                         case 4:
17989                           if (TARGET_THUMB2)
17990                             break;
17991                           if (emit)
17992                             output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17993                           return "";
17994                         }
17995                     }
17996                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17997                   operands[1] = otherops[0];
17998                   if (TARGET_LDRD
17999                       && (REG_P (otherops[2])
18000                           || TARGET_THUMB2
18001                           || (CONST_INT_P (otherops[2])
18002                               && INTVAL (otherops[2]) > -256
18003                               && INTVAL (otherops[2]) < 256)))
18004                     {
18005                       if (reg_overlap_mentioned_p (operands[0],
18006                                                    otherops[2]))
18007                         {
18008                           /* Swap base and index registers over to
18009                              avoid a conflict.  */
18010                           std::swap (otherops[1], otherops[2]);
18011                         }
18012                       /* If both registers conflict, it will usually
18013                          have been fixed by a splitter.  */
18014                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18015                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18016                         {
18017                           if (emit)
18018                             {
18019                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18020                               output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18021                             }
18022                           if (count)
18023                             *count = 2;
18024                         }
18025                       else
18026                         {
18027                           otherops[0] = operands[0];
18028                           if (emit)
18029                             output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18030                         }
18031                       return "";
18032                     }
18033
18034                   if (CONST_INT_P (otherops[2]))
18035                     {
18036                       if (emit)
18037                         {
18038                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18039                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18040                           else
18041                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18042                         }
18043                     }
18044                   else
18045                     {
18046                       if (emit)
18047                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18048                     }
18049                 }
18050               else
18051                 {
18052                   if (emit)
18053                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18054                 }
18055
18056               if (count)
18057                 *count = 2;
18058
18059               if (TARGET_LDRD)
18060                 return "ldr%(d%)\t%0, [%1]";
18061
18062               return "ldm%(ia%)\t%1, %M0";
18063             }
18064           else
18065             {
18066               otherops[1] = adjust_address (operands[1], SImode, 4);
18067               /* Take care of overlapping base/data reg.  */
18068               if (reg_mentioned_p (operands[0], operands[1]))
18069                 {
18070                   if (emit)
18071                     {
18072                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18073                       output_asm_insn ("ldr%?\t%0, %1", operands);
18074                     }
18075                   if (count)
18076                     *count = 2;
18077
18078                 }
18079               else
18080                 {
18081                   if (emit)
18082                     {
18083                       output_asm_insn ("ldr%?\t%0, %1", operands);
18084                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18085                     }
18086                   if (count)
18087                     *count = 2;
18088                 }
18089             }
18090         }
18091     }
18092   else
18093     {
18094       /* Constraints should ensure this.  */
18095       gcc_assert (code0 == MEM && code1 == REG);
18096       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18097                   || (TARGET_ARM && TARGET_LDRD));
18098
18099       switch (GET_CODE (XEXP (operands[0], 0)))
18100         {
18101         case REG:
18102           if (emit)
18103             {
18104               if (TARGET_LDRD)
18105                 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18106               else
18107                 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18108             }
18109           break;
18110
18111         case PRE_INC:
18112           gcc_assert (TARGET_LDRD);
18113           if (emit)
18114             output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18115           break;
18116
18117         case PRE_DEC:
18118           if (emit)
18119             {
18120               if (TARGET_LDRD)
18121                 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18122               else
18123                 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18124             }
18125           break;
18126
18127         case POST_INC:
18128           if (emit)
18129             {
18130               if (TARGET_LDRD)
18131                 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18132               else
18133                 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18134             }
18135           break;
18136
18137         case POST_DEC:
18138           gcc_assert (TARGET_LDRD);
18139           if (emit)
18140             output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18141           break;
18142
18143         case PRE_MODIFY:
18144         case POST_MODIFY:
18145           otherops[0] = operands[1];
18146           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18147           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18148
18149           /* IWMMXT allows offsets larger than ldrd can handle,
18150              fix these up with a pair of ldr.  */
18151           if (!TARGET_THUMB2
18152               && CONST_INT_P (otherops[2])
18153               && (INTVAL(otherops[2]) <= -256
18154                   || INTVAL(otherops[2]) >= 256))
18155             {
18156               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18157                 {
18158                   if (emit)
18159                     {
18160                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18161                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18162                     }
18163                   if (count)
18164                     *count = 2;
18165                 }
18166               else
18167                 {
18168                   if (emit)
18169                     {
18170                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18171                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18172                     }
18173                   if (count)
18174                     *count = 2;
18175                 }
18176             }
18177           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18178             {
18179               if (emit)
18180                 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18181             }
18182           else
18183             {
18184               if (emit)
18185                 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18186             }
18187           break;
18188
18189         case PLUS:
18190           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18191           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18192             {
18193               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18194                 {
18195                 case -8:
18196                   if (emit)
18197                     output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18198                   return "";
18199
18200                 case -4:
18201                   if (TARGET_THUMB2)
18202                     break;
18203                   if (emit)
18204                     output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18205                   return "";
18206
18207                 case 4:
18208                   if (TARGET_THUMB2)
18209                     break;
18210                   if (emit)
18211                     output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18212                   return "";
18213                 }
18214             }
18215           if (TARGET_LDRD
18216               && (REG_P (otherops[2])
18217                   || TARGET_THUMB2
18218                   || (CONST_INT_P (otherops[2])
18219                       && INTVAL (otherops[2]) > -256
18220                       && INTVAL (otherops[2]) < 256)))
18221             {
18222               otherops[0] = operands[1];
18223               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18224               if (emit)
18225                 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18226               return "";
18227             }
18228           /* Fall through */
18229
18230         default:
18231           otherops[0] = adjust_address (operands[0], SImode, 4);
18232           otherops[1] = operands[1];
18233           if (emit)
18234             {
18235               output_asm_insn ("str%?\t%1, %0", operands);
18236               output_asm_insn ("str%?\t%H1, %0", otherops);
18237             }
18238           if (count)
18239             *count = 2;
18240         }
18241     }
18242
18243   return "";
18244 }
18245
18246 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18247    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18248
18249 const char *
18250 output_move_quad (rtx *operands)
18251 {
18252   if (REG_P (operands[0]))
18253     {
18254       /* Load, or reg->reg move.  */
18255
18256       if (MEM_P (operands[1]))
18257         {
18258           switch (GET_CODE (XEXP (operands[1], 0)))
18259             {
18260             case REG:
18261               output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18262               break;
18263
18264             case LABEL_REF:
18265             case CONST:
18266               output_asm_insn ("adr%?\t%0, %1", operands);
18267               output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18268               break;
18269
18270             default:
18271               gcc_unreachable ();
18272             }
18273         }
18274       else
18275         {
18276           rtx ops[2];
18277           int dest, src, i;
18278
18279           gcc_assert (REG_P (operands[1]));
18280
18281           dest = REGNO (operands[0]);
18282           src = REGNO (operands[1]);
18283
18284           /* This seems pretty dumb, but hopefully GCC won't try to do it
18285              very often.  */
18286           if (dest < src)
18287             for (i = 0; i < 4; i++)
18288               {
18289                 ops[0] = gen_rtx_REG (SImode, dest + i);
18290                 ops[1] = gen_rtx_REG (SImode, src + i);
18291                 output_asm_insn ("mov%?\t%0, %1", ops);
18292               }
18293           else
18294             for (i = 3; i >= 0; i--)
18295               {
18296                 ops[0] = gen_rtx_REG (SImode, dest + i);
18297                 ops[1] = gen_rtx_REG (SImode, src + i);
18298                 output_asm_insn ("mov%?\t%0, %1", ops);
18299               }
18300         }
18301     }
18302   else
18303     {
18304       gcc_assert (MEM_P (operands[0]));
18305       gcc_assert (REG_P (operands[1]));
18306       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18307
18308       switch (GET_CODE (XEXP (operands[0], 0)))
18309         {
18310         case REG:
18311           output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18312           break;
18313
18314         default:
18315           gcc_unreachable ();
18316         }
18317     }
18318
18319   return "";
18320 }
18321
18322 /* Output a VFP load or store instruction.  */
18323
18324 const char *
18325 output_move_vfp (rtx *operands)
18326 {
18327   rtx reg, mem, addr, ops[2];
18328   int load = REG_P (operands[0]);
18329   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18330   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18331   const char *templ;
18332   char buff[50];
18333   machine_mode mode;
18334
18335   reg = operands[!load];
18336   mem = operands[load];
18337
18338   mode = GET_MODE (reg);
18339
18340   gcc_assert (REG_P (reg));
18341   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18342   gcc_assert (mode == SFmode
18343               || mode == DFmode
18344               || mode == SImode
18345               || mode == DImode
18346               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18347   gcc_assert (MEM_P (mem));
18348
18349   addr = XEXP (mem, 0);
18350
18351   switch (GET_CODE (addr))
18352     {
18353     case PRE_DEC:
18354       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18355       ops[0] = XEXP (addr, 0);
18356       ops[1] = reg;
18357       break;
18358
18359     case POST_INC:
18360       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18361       ops[0] = XEXP (addr, 0);
18362       ops[1] = reg;
18363       break;
18364
18365     default:
18366       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18367       ops[0] = reg;
18368       ops[1] = mem;
18369       break;
18370     }
18371
18372   sprintf (buff, templ,
18373            load ? "ld" : "st",
18374            dp ? "64" : "32",
18375            dp ? "P" : "",
18376            integer_p ? "\t%@ int" : "");
18377   output_asm_insn (buff, ops);
18378
18379   return "";
18380 }
18381
18382 /* Output a Neon double-word or quad-word load or store, or a load
18383    or store for larger structure modes.
18384
18385    WARNING: The ordering of elements is weird in big-endian mode,
18386    because the EABI requires that vectors stored in memory appear
18387    as though they were stored by a VSTM, as required by the EABI.
18388    GCC RTL defines element ordering based on in-memory order.
18389    This can be different from the architectural ordering of elements
18390    within a NEON register. The intrinsics defined in arm_neon.h use the
18391    NEON register element ordering, not the GCC RTL element ordering.
18392
18393    For example, the in-memory ordering of a big-endian a quadword
18394    vector with 16-bit elements when stored from register pair {d0,d1}
18395    will be (lowest address first, d0[N] is NEON register element N):
18396
18397      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18398
18399    When necessary, quadword registers (dN, dN+1) are moved to ARM
18400    registers from rN in the order:
18401
18402      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18403
18404    So that STM/LDM can be used on vectors in ARM registers, and the
18405    same memory layout will result as if VSTM/VLDM were used.
18406
18407    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18408    possible, which allows use of appropriate alignment tags.
18409    Note that the choice of "64" is independent of the actual vector
18410    element size; this size simply ensures that the behavior is
18411    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18412
18413    Due to limitations of those instructions, use of VST1.64/VLD1.64
18414    is not possible if:
18415     - the address contains PRE_DEC, or
18416     - the mode refers to more than 4 double-word registers
18417
18418    In those cases, it would be possible to replace VSTM/VLDM by a
18419    sequence of instructions; this is not currently implemented since
18420    this is not certain to actually improve performance.  */
18421
18422 const char *
18423 output_move_neon (rtx *operands)
18424 {
18425   rtx reg, mem, addr, ops[2];
18426   int regno, nregs, load = REG_P (operands[0]);
18427   const char *templ;
18428   char buff[50];
18429   machine_mode mode;
18430
18431   reg = operands[!load];
18432   mem = operands[load];
18433
18434   mode = GET_MODE (reg);
18435
18436   gcc_assert (REG_P (reg));
18437   regno = REGNO (reg);
18438   nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18439   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18440               || NEON_REGNO_OK_FOR_QUAD (regno));
18441   gcc_assert (VALID_NEON_DREG_MODE (mode)
18442               || VALID_NEON_QREG_MODE (mode)
18443               || VALID_NEON_STRUCT_MODE (mode));
18444   gcc_assert (MEM_P (mem));
18445
18446   addr = XEXP (mem, 0);
18447
18448   /* Strip off const from addresses like (const (plus (...))).  */
18449   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18450     addr = XEXP (addr, 0);
18451
18452   switch (GET_CODE (addr))
18453     {
18454     case POST_INC:
18455       /* We have to use vldm / vstm for too-large modes.  */
18456       if (nregs > 4)
18457         {
18458           templ = "v%smia%%?\t%%0!, %%h1";
18459           ops[0] = XEXP (addr, 0);
18460         }
18461       else
18462         {
18463           templ = "v%s1.64\t%%h1, %%A0";
18464           ops[0] = mem;
18465         }
18466       ops[1] = reg;
18467       break;
18468
18469     case PRE_DEC:
18470       /* We have to use vldm / vstm in this case, since there is no
18471          pre-decrement form of the vld1 / vst1 instructions.  */
18472       templ = "v%smdb%%?\t%%0!, %%h1";
18473       ops[0] = XEXP (addr, 0);
18474       ops[1] = reg;
18475       break;
18476
18477     case POST_MODIFY:
18478       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18479       gcc_unreachable ();
18480
18481     case REG:
18482       /* We have to use vldm / vstm for too-large modes.  */
18483       if (nregs > 1)
18484         {
18485           if (nregs > 4)
18486             templ = "v%smia%%?\t%%m0, %%h1";
18487           else
18488             templ = "v%s1.64\t%%h1, %%A0";
18489
18490           ops[0] = mem;
18491           ops[1] = reg;
18492           break;
18493         }
18494       /* Fall through.  */
18495     case LABEL_REF:
18496     case PLUS:
18497       {
18498         int i;
18499         int overlap = -1;
18500         for (i = 0; i < nregs; i++)
18501           {
18502             /* We're only using DImode here because it's a convenient size.  */
18503             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18504             ops[1] = adjust_address (mem, DImode, 8 * i);
18505             if (reg_overlap_mentioned_p (ops[0], mem))
18506               {
18507                 gcc_assert (overlap == -1);
18508                 overlap = i;
18509               }
18510             else
18511               {
18512                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18513                 output_asm_insn (buff, ops);
18514               }
18515           }
18516         if (overlap != -1)
18517           {
18518             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18519             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18520             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18521             output_asm_insn (buff, ops);
18522           }
18523
18524         return "";
18525       }
18526
18527     default:
18528       gcc_unreachable ();
18529     }
18530
18531   sprintf (buff, templ, load ? "ld" : "st");
18532   output_asm_insn (buff, ops);
18533
18534   return "";
18535 }
18536
18537 /* Compute and return the length of neon_mov<mode>, where <mode> is
18538    one of VSTRUCT modes: EI, OI, CI or XI.  */
18539 int
18540 arm_attr_length_move_neon (rtx_insn *insn)
18541 {
18542   rtx reg, mem, addr;
18543   int load;
18544   machine_mode mode;
18545
18546   extract_insn_cached (insn);
18547
18548   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18549     {
18550       mode = GET_MODE (recog_data.operand[0]);
18551       switch (mode)
18552         {
18553         case EImode:
18554         case OImode:
18555           return 8;
18556         case CImode:
18557           return 12;
18558         case XImode:
18559           return 16;
18560         default:
18561           gcc_unreachable ();
18562         }
18563     }
18564
18565   load = REG_P (recog_data.operand[0]);
18566   reg = recog_data.operand[!load];
18567   mem = recog_data.operand[load];
18568
18569   gcc_assert (MEM_P (mem));
18570
18571   mode = GET_MODE (reg);
18572   addr = XEXP (mem, 0);
18573
18574   /* Strip off const from addresses like (const (plus (...))).  */
18575   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18576     addr = XEXP (addr, 0);
18577
18578   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18579     {
18580       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18581       return insns * 4;
18582     }
18583   else
18584     return 4;
18585 }
18586
18587 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18588    return zero.  */
18589
18590 int
18591 arm_address_offset_is_imm (rtx_insn *insn)
18592 {
18593   rtx mem, addr;
18594
18595   extract_insn_cached (insn);
18596
18597   if (REG_P (recog_data.operand[0]))
18598     return 0;
18599
18600   mem = recog_data.operand[0];
18601
18602   gcc_assert (MEM_P (mem));
18603
18604   addr = XEXP (mem, 0);
18605
18606   if (REG_P (addr)
18607       || (GET_CODE (addr) == PLUS
18608           && REG_P (XEXP (addr, 0))
18609           && CONST_INT_P (XEXP (addr, 1))))
18610     return 1;
18611   else
18612     return 0;
18613 }
18614
18615 /* Output an ADD r, s, #n where n may be too big for one instruction.
18616    If adding zero to one register, output nothing.  */
18617 const char *
18618 output_add_immediate (rtx *operands)
18619 {
18620   HOST_WIDE_INT n = INTVAL (operands[2]);
18621
18622   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18623     {
18624       if (n < 0)
18625         output_multi_immediate (operands,
18626                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18627                                 -n);
18628       else
18629         output_multi_immediate (operands,
18630                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18631                                 n);
18632     }
18633
18634   return "";
18635 }
18636
18637 /* Output a multiple immediate operation.
18638    OPERANDS is the vector of operands referred to in the output patterns.
18639    INSTR1 is the output pattern to use for the first constant.
18640    INSTR2 is the output pattern to use for subsequent constants.
18641    IMMED_OP is the index of the constant slot in OPERANDS.
18642    N is the constant value.  */
18643 static const char *
18644 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18645                         int immed_op, HOST_WIDE_INT n)
18646 {
18647 #if HOST_BITS_PER_WIDE_INT > 32
18648   n &= 0xffffffff;
18649 #endif
18650
18651   if (n == 0)
18652     {
18653       /* Quick and easy output.  */
18654       operands[immed_op] = const0_rtx;
18655       output_asm_insn (instr1, operands);
18656     }
18657   else
18658     {
18659       int i;
18660       const char * instr = instr1;
18661
18662       /* Note that n is never zero here (which would give no output).  */
18663       for (i = 0; i < 32; i += 2)
18664         {
18665           if (n & (3 << i))
18666             {
18667               operands[immed_op] = GEN_INT (n & (255 << i));
18668               output_asm_insn (instr, operands);
18669               instr = instr2;
18670               i += 6;
18671             }
18672         }
18673     }
18674
18675   return "";
18676 }
18677
18678 /* Return the name of a shifter operation.  */
18679 static const char *
18680 arm_shift_nmem(enum rtx_code code)
18681 {
18682   switch (code)
18683     {
18684     case ASHIFT:
18685       return ARM_LSL_NAME;
18686
18687     case ASHIFTRT:
18688       return "asr";
18689
18690     case LSHIFTRT:
18691       return "lsr";
18692
18693     case ROTATERT:
18694       return "ror";
18695
18696     default:
18697       abort();
18698     }
18699 }
18700
18701 /* Return the appropriate ARM instruction for the operation code.
18702    The returned result should not be overwritten.  OP is the rtx of the
18703    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18704    was shifted.  */
18705 const char *
18706 arithmetic_instr (rtx op, int shift_first_arg)
18707 {
18708   switch (GET_CODE (op))
18709     {
18710     case PLUS:
18711       return "add";
18712
18713     case MINUS:
18714       return shift_first_arg ? "rsb" : "sub";
18715
18716     case IOR:
18717       return "orr";
18718
18719     case XOR:
18720       return "eor";
18721
18722     case AND:
18723       return "and";
18724
18725     case ASHIFT:
18726     case ASHIFTRT:
18727     case LSHIFTRT:
18728     case ROTATERT:
18729       return arm_shift_nmem(GET_CODE(op));
18730
18731     default:
18732       gcc_unreachable ();
18733     }
18734 }
18735
18736 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18737    for the operation code.  The returned result should not be overwritten.
18738    OP is the rtx code of the shift.
18739    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18740    shift.  */
18741 static const char *
18742 shift_op (rtx op, HOST_WIDE_INT *amountp)
18743 {
18744   const char * mnem;
18745   enum rtx_code code = GET_CODE (op);
18746
18747   switch (code)
18748     {
18749     case ROTATE:
18750       if (!CONST_INT_P (XEXP (op, 1)))
18751         {
18752           output_operand_lossage ("invalid shift operand");
18753           return NULL;
18754         }
18755
18756       code = ROTATERT;
18757       *amountp = 32 - INTVAL (XEXP (op, 1));
18758       mnem = "ror";
18759       break;
18760
18761     case ASHIFT:
18762     case ASHIFTRT:
18763     case LSHIFTRT:
18764     case ROTATERT:
18765       mnem = arm_shift_nmem(code);
18766       if (CONST_INT_P (XEXP (op, 1)))
18767         {
18768           *amountp = INTVAL (XEXP (op, 1));
18769         }
18770       else if (REG_P (XEXP (op, 1)))
18771         {
18772           *amountp = -1;
18773           return mnem;
18774         }
18775       else
18776         {
18777           output_operand_lossage ("invalid shift operand");
18778           return NULL;
18779         }
18780       break;
18781
18782     case MULT:
18783       /* We never have to worry about the amount being other than a
18784          power of 2, since this case can never be reloaded from a reg.  */
18785       if (!CONST_INT_P (XEXP (op, 1)))
18786         {
18787           output_operand_lossage ("invalid shift operand");
18788           return NULL;
18789         }
18790
18791       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18792
18793       /* Amount must be a power of two.  */
18794       if (*amountp & (*amountp - 1))
18795         {
18796           output_operand_lossage ("invalid shift operand");
18797           return NULL;
18798         }
18799
18800       *amountp = int_log2 (*amountp);
18801       return ARM_LSL_NAME;
18802
18803     default:
18804       output_operand_lossage ("invalid shift operand");
18805       return NULL;
18806     }
18807
18808   /* This is not 100% correct, but follows from the desire to merge
18809      multiplication by a power of 2 with the recognizer for a
18810      shift.  >=32 is not a valid shift for "lsl", so we must try and
18811      output a shift that produces the correct arithmetical result.
18812      Using lsr #32 is identical except for the fact that the carry bit
18813      is not set correctly if we set the flags; but we never use the
18814      carry bit from such an operation, so we can ignore that.  */
18815   if (code == ROTATERT)
18816     /* Rotate is just modulo 32.  */
18817     *amountp &= 31;
18818   else if (*amountp != (*amountp & 31))
18819     {
18820       if (code == ASHIFT)
18821         mnem = "lsr";
18822       *amountp = 32;
18823     }
18824
18825   /* Shifts of 0 are no-ops.  */
18826   if (*amountp == 0)
18827     return NULL;
18828
18829   return mnem;
18830 }
18831
18832 /* Obtain the shift from the POWER of two.  */
18833
18834 static HOST_WIDE_INT
18835 int_log2 (HOST_WIDE_INT power)
18836 {
18837   HOST_WIDE_INT shift = 0;
18838
18839   while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18840     {
18841       gcc_assert (shift <= 31);
18842       shift++;
18843     }
18844
18845   return shift;
18846 }
18847
18848 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
18849    because /bin/as is horribly restrictive.  The judgement about
18850    whether or not each character is 'printable' (and can be output as
18851    is) or not (and must be printed with an octal escape) must be made
18852    with reference to the *host* character set -- the situation is
18853    similar to that discussed in the comments above pp_c_char in
18854    c-pretty-print.c.  */
18855
18856 #define MAX_ASCII_LEN 51
18857
18858 void
18859 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18860 {
18861   int i;
18862   int len_so_far = 0;
18863
18864   fputs ("\t.ascii\t\"", stream);
18865
18866   for (i = 0; i < len; i++)
18867     {
18868       int c = p[i];
18869
18870       if (len_so_far >= MAX_ASCII_LEN)
18871         {
18872           fputs ("\"\n\t.ascii\t\"", stream);
18873           len_so_far = 0;
18874         }
18875
18876       if (ISPRINT (c))
18877         {
18878           if (c == '\\' || c == '\"')
18879             {
18880               putc ('\\', stream);
18881               len_so_far++;
18882             }
18883           putc (c, stream);
18884           len_so_far++;
18885         }
18886       else
18887         {
18888           fprintf (stream, "\\%03o", c);
18889           len_so_far += 4;
18890         }
18891     }
18892
18893   fputs ("\"\n", stream);
18894 }
18895 \f
18896 /* Whether a register is callee saved or not.  This is necessary because high
18897    registers are marked as caller saved when optimizing for size on Thumb-1
18898    targets despite being callee saved in order to avoid using them.  */
18899 #define callee_saved_reg_p(reg) \
18900   (!call_used_regs[reg] \
18901    || (TARGET_THUMB1 && optimize_size \
18902        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18903
18904 /* Compute the register save mask for registers 0 through 12
18905    inclusive.  This code is used by arm_compute_save_reg_mask.  */
18906
18907 static unsigned long
18908 arm_compute_save_reg0_reg12_mask (void)
18909 {
18910   unsigned long func_type = arm_current_func_type ();
18911   unsigned long save_reg_mask = 0;
18912   unsigned int reg;
18913
18914   if (IS_INTERRUPT (func_type))
18915     {
18916       unsigned int max_reg;
18917       /* Interrupt functions must not corrupt any registers,
18918          even call clobbered ones.  If this is a leaf function
18919          we can just examine the registers used by the RTL, but
18920          otherwise we have to assume that whatever function is
18921          called might clobber anything, and so we have to save
18922          all the call-clobbered registers as well.  */
18923       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18924         /* FIQ handlers have registers r8 - r12 banked, so
18925            we only need to check r0 - r7, Normal ISRs only
18926            bank r14 and r15, so we must check up to r12.
18927            r13 is the stack pointer which is always preserved,
18928            so we do not need to consider it here.  */
18929         max_reg = 7;
18930       else
18931         max_reg = 12;
18932
18933       for (reg = 0; reg <= max_reg; reg++)
18934         if (df_regs_ever_live_p (reg)
18935             || (! crtl->is_leaf && call_used_regs[reg]))
18936           save_reg_mask |= (1 << reg);
18937
18938       /* Also save the pic base register if necessary.  */
18939       if (flag_pic
18940           && !TARGET_SINGLE_PIC_BASE
18941           && arm_pic_register != INVALID_REGNUM
18942           && crtl->uses_pic_offset_table)
18943         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18944     }
18945   else if (IS_VOLATILE(func_type))
18946     {
18947       /* For noreturn functions we historically omitted register saves
18948          altogether.  However this really messes up debugging.  As a
18949          compromise save just the frame pointers.  Combined with the link
18950          register saved elsewhere this should be sufficient to get
18951          a backtrace.  */
18952       if (frame_pointer_needed)
18953         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18954       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18955         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18956       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18957         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18958     }
18959   else
18960     {
18961       /* In the normal case we only need to save those registers
18962          which are call saved and which are used by this function.  */
18963       for (reg = 0; reg <= 11; reg++)
18964         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
18965           save_reg_mask |= (1 << reg);
18966
18967       /* Handle the frame pointer as a special case.  */
18968       if (frame_pointer_needed)
18969         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18970
18971       /* If we aren't loading the PIC register,
18972          don't stack it even though it may be live.  */
18973       if (flag_pic
18974           && !TARGET_SINGLE_PIC_BASE
18975           && arm_pic_register != INVALID_REGNUM
18976           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18977               || crtl->uses_pic_offset_table))
18978         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18979
18980       /* The prologue will copy SP into R0, so save it.  */
18981       if (IS_STACKALIGN (func_type))
18982         save_reg_mask |= 1;
18983     }
18984
18985   /* Save registers so the exception handler can modify them.  */
18986   if (crtl->calls_eh_return)
18987     {
18988       unsigned int i;
18989
18990       for (i = 0; ; i++)
18991         {
18992           reg = EH_RETURN_DATA_REGNO (i);
18993           if (reg == INVALID_REGNUM)
18994             break;
18995           save_reg_mask |= 1 << reg;
18996         }
18997     }
18998
18999   return save_reg_mask;
19000 }
19001
19002 /* Return true if r3 is live at the start of the function.  */
19003
19004 static bool
19005 arm_r3_live_at_start_p (void)
19006 {
19007   /* Just look at cfg info, which is still close enough to correct at this
19008      point.  This gives false positives for broken functions that might use
19009      uninitialized data that happens to be allocated in r3, but who cares?  */
19010   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19011 }
19012
19013 /* Compute the number of bytes used to store the static chain register on the
19014    stack, above the stack frame.  We need to know this accurately to get the
19015    alignment of the rest of the stack frame correct.  */
19016
19017 static int
19018 arm_compute_static_chain_stack_bytes (void)
19019 {
19020   /* See the defining assertion in arm_expand_prologue.  */
19021   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19022       && IS_NESTED (arm_current_func_type ())
19023       && arm_r3_live_at_start_p ()
19024       && crtl->args.pretend_args_size == 0)
19025     return 4;
19026
19027   return 0;
19028 }
19029
19030 /* Compute a bit mask of which registers need to be
19031    saved on the stack for the current function.
19032    This is used by arm_get_frame_offsets, which may add extra registers.  */
19033
19034 static unsigned long
19035 arm_compute_save_reg_mask (void)
19036 {
19037   unsigned int save_reg_mask = 0;
19038   unsigned long func_type = arm_current_func_type ();
19039   unsigned int reg;
19040
19041   if (IS_NAKED (func_type))
19042     /* This should never really happen.  */
19043     return 0;
19044
19045   /* If we are creating a stack frame, then we must save the frame pointer,
19046      IP (which will hold the old stack pointer), LR and the PC.  */
19047   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19048     save_reg_mask |=
19049       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19050       | (1 << IP_REGNUM)
19051       | (1 << LR_REGNUM)
19052       | (1 << PC_REGNUM);
19053
19054   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19055
19056   /* Decide if we need to save the link register.
19057      Interrupt routines have their own banked link register,
19058      so they never need to save it.
19059      Otherwise if we do not use the link register we do not need to save
19060      it.  If we are pushing other registers onto the stack however, we
19061      can save an instruction in the epilogue by pushing the link register
19062      now and then popping it back into the PC.  This incurs extra memory
19063      accesses though, so we only do it when optimizing for size, and only
19064      if we know that we will not need a fancy return sequence.  */
19065   if (df_regs_ever_live_p (LR_REGNUM)
19066       || (save_reg_mask
19067           && optimize_size
19068           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19069           && !crtl->tail_call_emit
19070           && !crtl->calls_eh_return))
19071     save_reg_mask |= 1 << LR_REGNUM;
19072
19073   if (cfun->machine->lr_save_eliminated)
19074     save_reg_mask &= ~ (1 << LR_REGNUM);
19075
19076   if (TARGET_REALLY_IWMMXT
19077       && ((bit_count (save_reg_mask)
19078            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19079                            arm_compute_static_chain_stack_bytes())
19080            ) % 2) != 0)
19081     {
19082       /* The total number of registers that are going to be pushed
19083          onto the stack is odd.  We need to ensure that the stack
19084          is 64-bit aligned before we start to save iWMMXt registers,
19085          and also before we start to create locals.  (A local variable
19086          might be a double or long long which we will load/store using
19087          an iWMMXt instruction).  Therefore we need to push another
19088          ARM register, so that the stack will be 64-bit aligned.  We
19089          try to avoid using the arg registers (r0 -r3) as they might be
19090          used to pass values in a tail call.  */
19091       for (reg = 4; reg <= 12; reg++)
19092         if ((save_reg_mask & (1 << reg)) == 0)
19093           break;
19094
19095       if (reg <= 12)
19096         save_reg_mask |= (1 << reg);
19097       else
19098         {
19099           cfun->machine->sibcall_blocked = 1;
19100           save_reg_mask |= (1 << 3);
19101         }
19102     }
19103
19104   /* We may need to push an additional register for use initializing the
19105      PIC base register.  */
19106   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19107       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19108     {
19109       reg = thumb_find_work_register (1 << 4);
19110       if (!call_used_regs[reg])
19111         save_reg_mask |= (1 << reg);
19112     }
19113
19114   return save_reg_mask;
19115 }
19116
19117
19118 /* Compute a bit mask of which registers need to be
19119    saved on the stack for the current function.  */
19120 static unsigned long
19121 thumb1_compute_save_reg_mask (void)
19122 {
19123   unsigned long mask;
19124   unsigned reg;
19125
19126   mask = 0;
19127   for (reg = 0; reg < 12; reg ++)
19128     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19129       mask |= 1 << reg;
19130
19131   if (flag_pic
19132       && !TARGET_SINGLE_PIC_BASE
19133       && arm_pic_register != INVALID_REGNUM
19134       && crtl->uses_pic_offset_table)
19135     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19136
19137   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19138   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19139     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19140
19141   /* LR will also be pushed if any lo regs are pushed.  */
19142   if (mask & 0xff || thumb_force_lr_save ())
19143     mask |= (1 << LR_REGNUM);
19144
19145   /* Make sure we have a low work register if we need one.
19146      We will need one if we are going to push a high register,
19147      but we are not currently intending to push a low register.  */
19148   if ((mask & 0xff) == 0
19149       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19150     {
19151       /* Use thumb_find_work_register to choose which register
19152          we will use.  If the register is live then we will
19153          have to push it.  Use LAST_LO_REGNUM as our fallback
19154          choice for the register to select.  */
19155       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19156       /* Make sure the register returned by thumb_find_work_register is
19157          not part of the return value.  */
19158       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19159         reg = LAST_LO_REGNUM;
19160
19161       if (callee_saved_reg_p (reg))
19162         mask |= 1 << reg;
19163     }
19164
19165   /* The 504 below is 8 bytes less than 512 because there are two possible
19166      alignment words.  We can't tell here if they will be present or not so we
19167      have to play it safe and assume that they are. */
19168   if ((CALLER_INTERWORKING_SLOT_SIZE +
19169        ROUND_UP_WORD (get_frame_size ()) +
19170        crtl->outgoing_args_size) >= 504)
19171     {
19172       /* This is the same as the code in thumb1_expand_prologue() which
19173          determines which register to use for stack decrement. */
19174       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19175         if (mask & (1 << reg))
19176           break;
19177
19178       if (reg > LAST_LO_REGNUM)
19179         {
19180           /* Make sure we have a register available for stack decrement. */
19181           mask |= 1 << LAST_LO_REGNUM;
19182         }
19183     }
19184
19185   return mask;
19186 }
19187
19188
19189 /* Return the number of bytes required to save VFP registers.  */
19190 static int
19191 arm_get_vfp_saved_size (void)
19192 {
19193   unsigned int regno;
19194   int count;
19195   int saved;
19196
19197   saved = 0;
19198   /* Space for saved VFP registers.  */
19199   if (TARGET_HARD_FLOAT && TARGET_VFP)
19200     {
19201       count = 0;
19202       for (regno = FIRST_VFP_REGNUM;
19203            regno < LAST_VFP_REGNUM;
19204            regno += 2)
19205         {
19206           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19207               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19208             {
19209               if (count > 0)
19210                 {
19211                   /* Workaround ARM10 VFPr1 bug.  */
19212                   if (count == 2 && !arm_arch6)
19213                     count++;
19214                   saved += count * 8;
19215                 }
19216               count = 0;
19217             }
19218           else
19219             count++;
19220         }
19221       if (count > 0)
19222         {
19223           if (count == 2 && !arm_arch6)
19224             count++;
19225           saved += count * 8;
19226         }
19227     }
19228   return saved;
19229 }
19230
19231
19232 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19233    everything bar the final return instruction.  If simple_return is true,
19234    then do not output epilogue, because it has already been emitted in RTL.  */
19235 const char *
19236 output_return_instruction (rtx operand, bool really_return, bool reverse,
19237                            bool simple_return)
19238 {
19239   char conditional[10];
19240   char instr[100];
19241   unsigned reg;
19242   unsigned long live_regs_mask;
19243   unsigned long func_type;
19244   arm_stack_offsets *offsets;
19245
19246   func_type = arm_current_func_type ();
19247
19248   if (IS_NAKED (func_type))
19249     return "";
19250
19251   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19252     {
19253       /* If this function was declared non-returning, and we have
19254          found a tail call, then we have to trust that the called
19255          function won't return.  */
19256       if (really_return)
19257         {
19258           rtx ops[2];
19259
19260           /* Otherwise, trap an attempted return by aborting.  */
19261           ops[0] = operand;
19262           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19263                                        : "abort");
19264           assemble_external_libcall (ops[1]);
19265           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19266         }
19267
19268       return "";
19269     }
19270
19271   gcc_assert (!cfun->calls_alloca || really_return);
19272
19273   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19274
19275   cfun->machine->return_used_this_function = 1;
19276
19277   offsets = arm_get_frame_offsets ();
19278   live_regs_mask = offsets->saved_regs_mask;
19279
19280   if (!simple_return && live_regs_mask)
19281     {
19282       const char * return_reg;
19283
19284       /* If we do not have any special requirements for function exit
19285          (e.g. interworking) then we can load the return address
19286          directly into the PC.  Otherwise we must load it into LR.  */
19287       if (really_return
19288           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19289         return_reg = reg_names[PC_REGNUM];
19290       else
19291         return_reg = reg_names[LR_REGNUM];
19292
19293       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19294         {
19295           /* There are three possible reasons for the IP register
19296              being saved.  1) a stack frame was created, in which case
19297              IP contains the old stack pointer, or 2) an ISR routine
19298              corrupted it, or 3) it was saved to align the stack on
19299              iWMMXt.  In case 1, restore IP into SP, otherwise just
19300              restore IP.  */
19301           if (frame_pointer_needed)
19302             {
19303               live_regs_mask &= ~ (1 << IP_REGNUM);
19304               live_regs_mask |=   (1 << SP_REGNUM);
19305             }
19306           else
19307             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19308         }
19309
19310       /* On some ARM architectures it is faster to use LDR rather than
19311          LDM to load a single register.  On other architectures, the
19312          cost is the same.  In 26 bit mode, or for exception handlers,
19313          we have to use LDM to load the PC so that the CPSR is also
19314          restored.  */
19315       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19316         if (live_regs_mask == (1U << reg))
19317           break;
19318
19319       if (reg <= LAST_ARM_REGNUM
19320           && (reg != LR_REGNUM
19321               || ! really_return
19322               || ! IS_INTERRUPT (func_type)))
19323         {
19324           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19325                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19326         }
19327       else
19328         {
19329           char *p;
19330           int first = 1;
19331
19332           /* Generate the load multiple instruction to restore the
19333              registers.  Note we can get here, even if
19334              frame_pointer_needed is true, but only if sp already
19335              points to the base of the saved core registers.  */
19336           if (live_regs_mask & (1 << SP_REGNUM))
19337             {
19338               unsigned HOST_WIDE_INT stack_adjust;
19339
19340               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19341               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19342
19343               if (stack_adjust && arm_arch5 && TARGET_ARM)
19344                 if (TARGET_UNIFIED_ASM)
19345                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19346                 else
19347                   sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19348               else
19349                 {
19350                   /* If we can't use ldmib (SA110 bug),
19351                      then try to pop r3 instead.  */
19352                   if (stack_adjust)
19353                     live_regs_mask |= 1 << 3;
19354
19355                   if (TARGET_UNIFIED_ASM)
19356                     sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19357                   else
19358                     sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19359                 }
19360             }
19361           else
19362             if (TARGET_UNIFIED_ASM)
19363               sprintf (instr, "pop%s\t{", conditional);
19364             else
19365               sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19366
19367           p = instr + strlen (instr);
19368
19369           for (reg = 0; reg <= SP_REGNUM; reg++)
19370             if (live_regs_mask & (1 << reg))
19371               {
19372                 int l = strlen (reg_names[reg]);
19373
19374                 if (first)
19375                   first = 0;
19376                 else
19377                   {
19378                     memcpy (p, ", ", 2);
19379                     p += 2;
19380                   }
19381
19382                 memcpy (p, "%|", 2);
19383                 memcpy (p + 2, reg_names[reg], l);
19384                 p += l + 2;
19385               }
19386
19387           if (live_regs_mask & (1 << LR_REGNUM))
19388             {
19389               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19390               /* If returning from an interrupt, restore the CPSR.  */
19391               if (IS_INTERRUPT (func_type))
19392                 strcat (p, "^");
19393             }
19394           else
19395             strcpy (p, "}");
19396         }
19397
19398       output_asm_insn (instr, & operand);
19399
19400       /* See if we need to generate an extra instruction to
19401          perform the actual function return.  */
19402       if (really_return
19403           && func_type != ARM_FT_INTERWORKED
19404           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19405         {
19406           /* The return has already been handled
19407              by loading the LR into the PC.  */
19408           return "";
19409         }
19410     }
19411
19412   if (really_return)
19413     {
19414       switch ((int) ARM_FUNC_TYPE (func_type))
19415         {
19416         case ARM_FT_ISR:
19417         case ARM_FT_FIQ:
19418           /* ??? This is wrong for unified assembly syntax.  */
19419           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19420           break;
19421
19422         case ARM_FT_INTERWORKED:
19423           sprintf (instr, "bx%s\t%%|lr", conditional);
19424           break;
19425
19426         case ARM_FT_EXCEPTION:
19427           /* ??? This is wrong for unified assembly syntax.  */
19428           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19429           break;
19430
19431         default:
19432           /* Use bx if it's available.  */
19433           if (arm_arch5 || arm_arch4t)
19434             sprintf (instr, "bx%s\t%%|lr", conditional);
19435           else
19436             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19437           break;
19438         }
19439
19440       output_asm_insn (instr, & operand);
19441     }
19442
19443   return "";
19444 }
19445
19446 /* Write the function name into the code section, directly preceding
19447    the function prologue.
19448
19449    Code will be output similar to this:
19450      t0
19451          .ascii "arm_poke_function_name", 0
19452          .align
19453      t1
19454          .word 0xff000000 + (t1 - t0)
19455      arm_poke_function_name
19456          mov     ip, sp
19457          stmfd   sp!, {fp, ip, lr, pc}
19458          sub     fp, ip, #4
19459
19460    When performing a stack backtrace, code can inspect the value
19461    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19462    at location pc - 12 and the top 8 bits are set, then we know
19463    that there is a function name embedded immediately preceding this
19464    location and has length ((pc[-3]) & 0xff000000).
19465
19466    We assume that pc is declared as a pointer to an unsigned long.
19467
19468    It is of no benefit to output the function name if we are assembling
19469    a leaf function.  These function types will not contain a stack
19470    backtrace structure, therefore it is not possible to determine the
19471    function name.  */
19472 void
19473 arm_poke_function_name (FILE *stream, const char *name)
19474 {
19475   unsigned long alignlength;
19476   unsigned long length;
19477   rtx           x;
19478
19479   length      = strlen (name) + 1;
19480   alignlength = ROUND_UP_WORD (length);
19481
19482   ASM_OUTPUT_ASCII (stream, name, length);
19483   ASM_OUTPUT_ALIGN (stream, 2);
19484   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19485   assemble_aligned_integer (UNITS_PER_WORD, x);
19486 }
19487
19488 /* Place some comments into the assembler stream
19489    describing the current function.  */
19490 static void
19491 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19492 {
19493   unsigned long func_type;
19494
19495   /* ??? Do we want to print some of the below anyway?  */
19496   if (TARGET_THUMB1)
19497     return;
19498
19499   /* Sanity check.  */
19500   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19501
19502   func_type = arm_current_func_type ();
19503
19504   switch ((int) ARM_FUNC_TYPE (func_type))
19505     {
19506     default:
19507     case ARM_FT_NORMAL:
19508       break;
19509     case ARM_FT_INTERWORKED:
19510       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19511       break;
19512     case ARM_FT_ISR:
19513       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19514       break;
19515     case ARM_FT_FIQ:
19516       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19517       break;
19518     case ARM_FT_EXCEPTION:
19519       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19520       break;
19521     }
19522
19523   if (IS_NAKED (func_type))
19524     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19525
19526   if (IS_VOLATILE (func_type))
19527     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19528
19529   if (IS_NESTED (func_type))
19530     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19531   if (IS_STACKALIGN (func_type))
19532     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19533
19534   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19535                crtl->args.size,
19536                crtl->args.pretend_args_size, frame_size);
19537
19538   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19539                frame_pointer_needed,
19540                cfun->machine->uses_anonymous_args);
19541
19542   if (cfun->machine->lr_save_eliminated)
19543     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19544
19545   if (crtl->calls_eh_return)
19546     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19547
19548 }
19549
19550 static void
19551 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19552                               HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19553 {
19554   arm_stack_offsets *offsets;
19555
19556   if (TARGET_THUMB1)
19557     {
19558       int regno;
19559
19560       /* Emit any call-via-reg trampolines that are needed for v4t support
19561          of call_reg and call_value_reg type insns.  */
19562       for (regno = 0; regno < LR_REGNUM; regno++)
19563         {
19564           rtx label = cfun->machine->call_via[regno];
19565
19566           if (label != NULL)
19567             {
19568               switch_to_section (function_section (current_function_decl));
19569               targetm.asm_out.internal_label (asm_out_file, "L",
19570                                               CODE_LABEL_NUMBER (label));
19571               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19572             }
19573         }
19574
19575       /* ??? Probably not safe to set this here, since it assumes that a
19576          function will be emitted as assembly immediately after we generate
19577          RTL for it.  This does not happen for inline functions.  */
19578       cfun->machine->return_used_this_function = 0;
19579     }
19580   else /* TARGET_32BIT */
19581     {
19582       /* We need to take into account any stack-frame rounding.  */
19583       offsets = arm_get_frame_offsets ();
19584
19585       gcc_assert (!use_return_insn (FALSE, NULL)
19586                   || (cfun->machine->return_used_this_function != 0)
19587                   || offsets->saved_regs == offsets->outgoing_args
19588                   || frame_pointer_needed);
19589     }
19590 }
19591
19592 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19593    STR and STRD.  If an even number of registers are being pushed, one
19594    or more STRD patterns are created for each register pair.  If an
19595    odd number of registers are pushed, emit an initial STR followed by
19596    as many STRD instructions as are needed.  This works best when the
19597    stack is initially 64-bit aligned (the normal case), since it
19598    ensures that each STRD is also 64-bit aligned.  */
19599 static void
19600 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19601 {
19602   int num_regs = 0;
19603   int i;
19604   int regno;
19605   rtx par = NULL_RTX;
19606   rtx dwarf = NULL_RTX;
19607   rtx tmp;
19608   bool first = true;
19609
19610   num_regs = bit_count (saved_regs_mask);
19611
19612   /* Must be at least one register to save, and can't save SP or PC.  */
19613   gcc_assert (num_regs > 0 && num_regs <= 14);
19614   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19615   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19616
19617   /* Create sequence for DWARF info.  All the frame-related data for
19618      debugging is held in this wrapper.  */
19619   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19620
19621   /* Describe the stack adjustment.  */
19622   tmp = gen_rtx_SET (stack_pointer_rtx,
19623                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19624   RTX_FRAME_RELATED_P (tmp) = 1;
19625   XVECEXP (dwarf, 0, 0) = tmp;
19626
19627   /* Find the first register.  */
19628   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19629     ;
19630
19631   i = 0;
19632
19633   /* If there's an odd number of registers to push.  Start off by
19634      pushing a single register.  This ensures that subsequent strd
19635      operations are dword aligned (assuming that SP was originally
19636      64-bit aligned).  */
19637   if ((num_regs & 1) != 0)
19638     {
19639       rtx reg, mem, insn;
19640
19641       reg = gen_rtx_REG (SImode, regno);
19642       if (num_regs == 1)
19643         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19644                                                      stack_pointer_rtx));
19645       else
19646         mem = gen_frame_mem (Pmode,
19647                              gen_rtx_PRE_MODIFY
19648                              (Pmode, stack_pointer_rtx,
19649                               plus_constant (Pmode, stack_pointer_rtx,
19650                                              -4 * num_regs)));
19651
19652       tmp = gen_rtx_SET (mem, reg);
19653       RTX_FRAME_RELATED_P (tmp) = 1;
19654       insn = emit_insn (tmp);
19655       RTX_FRAME_RELATED_P (insn) = 1;
19656       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19657       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19658       RTX_FRAME_RELATED_P (tmp) = 1;
19659       i++;
19660       regno++;
19661       XVECEXP (dwarf, 0, i) = tmp;
19662       first = false;
19663     }
19664
19665   while (i < num_regs)
19666     if (saved_regs_mask & (1 << regno))
19667       {
19668         rtx reg1, reg2, mem1, mem2;
19669         rtx tmp0, tmp1, tmp2;
19670         int regno2;
19671
19672         /* Find the register to pair with this one.  */
19673         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19674              regno2++)
19675           ;
19676
19677         reg1 = gen_rtx_REG (SImode, regno);
19678         reg2 = gen_rtx_REG (SImode, regno2);
19679
19680         if (first)
19681           {
19682             rtx insn;
19683
19684             first = false;
19685             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19686                                                         stack_pointer_rtx,
19687                                                         -4 * num_regs));
19688             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19689                                                         stack_pointer_rtx,
19690                                                         -4 * (num_regs - 1)));
19691             tmp0 = gen_rtx_SET (stack_pointer_rtx,
19692                                 plus_constant (Pmode, stack_pointer_rtx,
19693                                                -4 * (num_regs)));
19694             tmp1 = gen_rtx_SET (mem1, reg1);
19695             tmp2 = gen_rtx_SET (mem2, reg2);
19696             RTX_FRAME_RELATED_P (tmp0) = 1;
19697             RTX_FRAME_RELATED_P (tmp1) = 1;
19698             RTX_FRAME_RELATED_P (tmp2) = 1;
19699             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19700             XVECEXP (par, 0, 0) = tmp0;
19701             XVECEXP (par, 0, 1) = tmp1;
19702             XVECEXP (par, 0, 2) = tmp2;
19703             insn = emit_insn (par);
19704             RTX_FRAME_RELATED_P (insn) = 1;
19705             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19706           }
19707         else
19708           {
19709             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19710                                                         stack_pointer_rtx,
19711                                                         4 * i));
19712             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19713                                                         stack_pointer_rtx,
19714                                                         4 * (i + 1)));
19715             tmp1 = gen_rtx_SET (mem1, reg1);
19716             tmp2 = gen_rtx_SET (mem2, reg2);
19717             RTX_FRAME_RELATED_P (tmp1) = 1;
19718             RTX_FRAME_RELATED_P (tmp2) = 1;
19719             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19720             XVECEXP (par, 0, 0) = tmp1;
19721             XVECEXP (par, 0, 1) = tmp2;
19722             emit_insn (par);
19723           }
19724
19725         /* Create unwind information.  This is an approximation.  */
19726         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19727                                            plus_constant (Pmode,
19728                                                           stack_pointer_rtx,
19729                                                           4 * i)),
19730                             reg1);
19731         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19732                                            plus_constant (Pmode,
19733                                                           stack_pointer_rtx,
19734                                                           4 * (i + 1))),
19735                             reg2);
19736
19737         RTX_FRAME_RELATED_P (tmp1) = 1;
19738         RTX_FRAME_RELATED_P (tmp2) = 1;
19739         XVECEXP (dwarf, 0, i + 1) = tmp1;
19740         XVECEXP (dwarf, 0, i + 2) = tmp2;
19741         i += 2;
19742         regno = regno2 + 1;
19743       }
19744     else
19745       regno++;
19746
19747   return;
19748 }
19749
19750 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19751    whenever possible, otherwise it emits single-word stores.  The first store
19752    also allocates stack space for all saved registers, using writeback with
19753    post-addressing mode.  All other stores use offset addressing.  If no STRD
19754    can be emitted, this function emits a sequence of single-word stores,
19755    and not an STM as before, because single-word stores provide more freedom
19756    scheduling and can be turned into an STM by peephole optimizations.  */
19757 static void
19758 arm_emit_strd_push (unsigned long saved_regs_mask)
19759 {
19760   int num_regs = 0;
19761   int i, j, dwarf_index  = 0;
19762   int offset = 0;
19763   rtx dwarf = NULL_RTX;
19764   rtx insn = NULL_RTX;
19765   rtx tmp, mem;
19766
19767   /* TODO: A more efficient code can be emitted by changing the
19768      layout, e.g., first push all pairs that can use STRD to keep the
19769      stack aligned, and then push all other registers.  */
19770   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19771     if (saved_regs_mask & (1 << i))
19772       num_regs++;
19773
19774   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19775   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19776   gcc_assert (num_regs > 0);
19777
19778   /* Create sequence for DWARF info.  */
19779   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19780
19781   /* For dwarf info, we generate explicit stack update.  */
19782   tmp = gen_rtx_SET (stack_pointer_rtx,
19783                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19784   RTX_FRAME_RELATED_P (tmp) = 1;
19785   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19786
19787   /* Save registers.  */
19788   offset = - 4 * num_regs;
19789   j = 0;
19790   while (j <= LAST_ARM_REGNUM)
19791     if (saved_regs_mask & (1 << j))
19792       {
19793         if ((j % 2 == 0)
19794             && (saved_regs_mask & (1 << (j + 1))))
19795           {
19796             /* Current register and previous register form register pair for
19797                which STRD can be generated.  */
19798             if (offset < 0)
19799               {
19800                 /* Allocate stack space for all saved registers.  */
19801                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19802                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19803                 mem = gen_frame_mem (DImode, tmp);
19804                 offset = 0;
19805               }
19806             else if (offset > 0)
19807               mem = gen_frame_mem (DImode,
19808                                    plus_constant (Pmode,
19809                                                   stack_pointer_rtx,
19810                                                   offset));
19811             else
19812               mem = gen_frame_mem (DImode, stack_pointer_rtx);
19813
19814             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19815             RTX_FRAME_RELATED_P (tmp) = 1;
19816             tmp = emit_insn (tmp);
19817
19818             /* Record the first store insn.  */
19819             if (dwarf_index == 1)
19820               insn = tmp;
19821
19822             /* Generate dwarf info.  */
19823             mem = gen_frame_mem (SImode,
19824                                  plus_constant (Pmode,
19825                                                 stack_pointer_rtx,
19826                                                 offset));
19827             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19828             RTX_FRAME_RELATED_P (tmp) = 1;
19829             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19830
19831             mem = gen_frame_mem (SImode,
19832                                  plus_constant (Pmode,
19833                                                 stack_pointer_rtx,
19834                                                 offset + 4));
19835             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
19836             RTX_FRAME_RELATED_P (tmp) = 1;
19837             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19838
19839             offset += 8;
19840             j += 2;
19841           }
19842         else
19843           {
19844             /* Emit a single word store.  */
19845             if (offset < 0)
19846               {
19847                 /* Allocate stack space for all saved registers.  */
19848                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19849                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19850                 mem = gen_frame_mem (SImode, tmp);
19851                 offset = 0;
19852               }
19853             else if (offset > 0)
19854               mem = gen_frame_mem (SImode,
19855                                    plus_constant (Pmode,
19856                                                   stack_pointer_rtx,
19857                                                   offset));
19858             else
19859               mem = gen_frame_mem (SImode, stack_pointer_rtx);
19860
19861             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19862             RTX_FRAME_RELATED_P (tmp) = 1;
19863             tmp = emit_insn (tmp);
19864
19865             /* Record the first store insn.  */
19866             if (dwarf_index == 1)
19867               insn = tmp;
19868
19869             /* Generate dwarf info.  */
19870             mem = gen_frame_mem (SImode,
19871                                  plus_constant(Pmode,
19872                                                stack_pointer_rtx,
19873                                                offset));
19874             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19875             RTX_FRAME_RELATED_P (tmp) = 1;
19876             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19877
19878             offset += 4;
19879             j += 1;
19880           }
19881       }
19882     else
19883       j++;
19884
19885   /* Attach dwarf info to the first insn we generate.  */
19886   gcc_assert (insn != NULL_RTX);
19887   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19888   RTX_FRAME_RELATED_P (insn) = 1;
19889 }
19890
19891 /* Generate and emit an insn that we will recognize as a push_multi.
19892    Unfortunately, since this insn does not reflect very well the actual
19893    semantics of the operation, we need to annotate the insn for the benefit
19894    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
19895    MASK for registers that should be annotated for DWARF2 frame unwind
19896    information.  */
19897 static rtx
19898 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19899 {
19900   int num_regs = 0;
19901   int num_dwarf_regs = 0;
19902   int i, j;
19903   rtx par;
19904   rtx dwarf;
19905   int dwarf_par_index;
19906   rtx tmp, reg;
19907
19908   /* We don't record the PC in the dwarf frame information.  */
19909   dwarf_regs_mask &= ~(1 << PC_REGNUM);
19910
19911   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19912     {
19913       if (mask & (1 << i))
19914         num_regs++;
19915       if (dwarf_regs_mask & (1 << i))
19916         num_dwarf_regs++;
19917     }
19918
19919   gcc_assert (num_regs && num_regs <= 16);
19920   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19921
19922   /* For the body of the insn we are going to generate an UNSPEC in
19923      parallel with several USEs.  This allows the insn to be recognized
19924      by the push_multi pattern in the arm.md file.
19925
19926      The body of the insn looks something like this:
19927
19928        (parallel [
19929            (set (mem:BLK (pre_modify:SI (reg:SI sp)
19930                                         (const_int:SI <num>)))
19931                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19932            (use (reg:SI XX))
19933            (use (reg:SI YY))
19934            ...
19935         ])
19936
19937      For the frame note however, we try to be more explicit and actually
19938      show each register being stored into the stack frame, plus a (single)
19939      decrement of the stack pointer.  We do it this way in order to be
19940      friendly to the stack unwinding code, which only wants to see a single
19941      stack decrement per instruction.  The RTL we generate for the note looks
19942      something like this:
19943
19944       (sequence [
19945            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19946            (set (mem:SI (reg:SI sp)) (reg:SI r4))
19947            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19948            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19949            ...
19950         ])
19951
19952      FIXME:: In an ideal world the PRE_MODIFY would not exist and
19953      instead we'd have a parallel expression detailing all
19954      the stores to the various memory addresses so that debug
19955      information is more up-to-date. Remember however while writing
19956      this to take care of the constraints with the push instruction.
19957
19958      Note also that this has to be taken care of for the VFP registers.
19959
19960      For more see PR43399.  */
19961
19962   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19963   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19964   dwarf_par_index = 1;
19965
19966   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19967     {
19968       if (mask & (1 << i))
19969         {
19970           reg = gen_rtx_REG (SImode, i);
19971
19972           XVECEXP (par, 0, 0)
19973             = gen_rtx_SET (gen_frame_mem
19974                            (BLKmode,
19975                             gen_rtx_PRE_MODIFY (Pmode,
19976                                                 stack_pointer_rtx,
19977                                                 plus_constant
19978                                                 (Pmode, stack_pointer_rtx,
19979                                                  -4 * num_regs))
19980                             ),
19981                            gen_rtx_UNSPEC (BLKmode,
19982                                            gen_rtvec (1, reg),
19983                                            UNSPEC_PUSH_MULT));
19984
19985           if (dwarf_regs_mask & (1 << i))
19986             {
19987               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
19988                                  reg);
19989               RTX_FRAME_RELATED_P (tmp) = 1;
19990               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19991             }
19992
19993           break;
19994         }
19995     }
19996
19997   for (j = 1, i++; j < num_regs; i++)
19998     {
19999       if (mask & (1 << i))
20000         {
20001           reg = gen_rtx_REG (SImode, i);
20002
20003           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20004
20005           if (dwarf_regs_mask & (1 << i))
20006             {
20007               tmp
20008                 = gen_rtx_SET (gen_frame_mem
20009                                (SImode,
20010                                 plus_constant (Pmode, stack_pointer_rtx,
20011                                                4 * j)),
20012                                reg);
20013               RTX_FRAME_RELATED_P (tmp) = 1;
20014               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20015             }
20016
20017           j++;
20018         }
20019     }
20020
20021   par = emit_insn (par);
20022
20023   tmp = gen_rtx_SET (stack_pointer_rtx,
20024                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20025   RTX_FRAME_RELATED_P (tmp) = 1;
20026   XVECEXP (dwarf, 0, 0) = tmp;
20027
20028   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20029
20030   return par;
20031 }
20032
20033 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20034    SIZE is the offset to be adjusted.
20035    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20036 static void
20037 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20038 {
20039   rtx dwarf;
20040
20041   RTX_FRAME_RELATED_P (insn) = 1;
20042   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20043   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20044 }
20045
20046 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20047    SAVED_REGS_MASK shows which registers need to be restored.
20048
20049    Unfortunately, since this insn does not reflect very well the actual
20050    semantics of the operation, we need to annotate the insn for the benefit
20051    of DWARF2 frame unwind information.  */
20052 static void
20053 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20054 {
20055   int num_regs = 0;
20056   int i, j;
20057   rtx par;
20058   rtx dwarf = NULL_RTX;
20059   rtx tmp, reg;
20060   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20061   int offset_adj;
20062   int emit_update;
20063
20064   offset_adj = return_in_pc ? 1 : 0;
20065   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20066     if (saved_regs_mask & (1 << i))
20067       num_regs++;
20068
20069   gcc_assert (num_regs && num_regs <= 16);
20070
20071   /* If SP is in reglist, then we don't emit SP update insn.  */
20072   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20073
20074   /* The parallel needs to hold num_regs SETs
20075      and one SET for the stack update.  */
20076   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20077
20078   if (return_in_pc)
20079     XVECEXP (par, 0, 0) = ret_rtx;
20080
20081   if (emit_update)
20082     {
20083       /* Increment the stack pointer, based on there being
20084          num_regs 4-byte registers to restore.  */
20085       tmp = gen_rtx_SET (stack_pointer_rtx,
20086                          plus_constant (Pmode,
20087                                         stack_pointer_rtx,
20088                                         4 * num_regs));
20089       RTX_FRAME_RELATED_P (tmp) = 1;
20090       XVECEXP (par, 0, offset_adj) = tmp;
20091     }
20092
20093   /* Now restore every reg, which may include PC.  */
20094   for (j = 0, i = 0; j < num_regs; i++)
20095     if (saved_regs_mask & (1 << i))
20096       {
20097         reg = gen_rtx_REG (SImode, i);
20098         if ((num_regs == 1) && emit_update && !return_in_pc)
20099           {
20100             /* Emit single load with writeback.  */
20101             tmp = gen_frame_mem (SImode,
20102                                  gen_rtx_POST_INC (Pmode,
20103                                                    stack_pointer_rtx));
20104             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20105             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20106             return;
20107           }
20108
20109         tmp = gen_rtx_SET (reg,
20110                            gen_frame_mem
20111                            (SImode,
20112                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20113         RTX_FRAME_RELATED_P (tmp) = 1;
20114         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20115
20116         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20117            should not have PC, skip PC.  */
20118         if (i != PC_REGNUM)
20119           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20120
20121         j++;
20122       }
20123
20124   if (return_in_pc)
20125     par = emit_jump_insn (par);
20126   else
20127     par = emit_insn (par);
20128
20129   REG_NOTES (par) = dwarf;
20130   if (!return_in_pc)
20131     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20132                                  stack_pointer_rtx, stack_pointer_rtx);
20133 }
20134
20135 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20136    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20137
20138    Unfortunately, since this insn does not reflect very well the actual
20139    semantics of the operation, we need to annotate the insn for the benefit
20140    of DWARF2 frame unwind information.  */
20141 static void
20142 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20143 {
20144   int i, j;
20145   rtx par;
20146   rtx dwarf = NULL_RTX;
20147   rtx tmp, reg;
20148
20149   gcc_assert (num_regs && num_regs <= 32);
20150
20151     /* Workaround ARM10 VFPr1 bug.  */
20152   if (num_regs == 2 && !arm_arch6)
20153     {
20154       if (first_reg == 15)
20155         first_reg--;
20156
20157       num_regs++;
20158     }
20159
20160   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20161      there could be up to 32 D-registers to restore.
20162      If there are more than 16 D-registers, make two recursive calls,
20163      each of which emits one pop_multi instruction.  */
20164   if (num_regs > 16)
20165     {
20166       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20167       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20168       return;
20169     }
20170
20171   /* The parallel needs to hold num_regs SETs
20172      and one SET for the stack update.  */
20173   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20174
20175   /* Increment the stack pointer, based on there being
20176      num_regs 8-byte registers to restore.  */
20177   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20178   RTX_FRAME_RELATED_P (tmp) = 1;
20179   XVECEXP (par, 0, 0) = tmp;
20180
20181   /* Now show every reg that will be restored, using a SET for each.  */
20182   for (j = 0, i=first_reg; j < num_regs; i += 2)
20183     {
20184       reg = gen_rtx_REG (DFmode, i);
20185
20186       tmp = gen_rtx_SET (reg,
20187                          gen_frame_mem
20188                          (DFmode,
20189                           plus_constant (Pmode, base_reg, 8 * j)));
20190       RTX_FRAME_RELATED_P (tmp) = 1;
20191       XVECEXP (par, 0, j + 1) = tmp;
20192
20193       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20194
20195       j++;
20196     }
20197
20198   par = emit_insn (par);
20199   REG_NOTES (par) = dwarf;
20200
20201   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20202   if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20203     {
20204       RTX_FRAME_RELATED_P (par) = 1;
20205       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20206     }
20207   else
20208     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20209                                  base_reg, base_reg);
20210 }
20211
20212 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20213    number of registers are being popped, multiple LDRD patterns are created for
20214    all register pairs.  If odd number of registers are popped, last register is
20215    loaded by using LDR pattern.  */
20216 static void
20217 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20218 {
20219   int num_regs = 0;
20220   int i, j;
20221   rtx par = NULL_RTX;
20222   rtx dwarf = NULL_RTX;
20223   rtx tmp, reg, tmp1;
20224   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20225
20226   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20227     if (saved_regs_mask & (1 << i))
20228       num_regs++;
20229
20230   gcc_assert (num_regs && num_regs <= 16);
20231
20232   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20233      to be popped.  So, if num_regs is even, now it will become odd,
20234      and we can generate pop with PC.  If num_regs is odd, it will be
20235      even now, and ldr with return can be generated for PC.  */
20236   if (return_in_pc)
20237     num_regs--;
20238
20239   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20240
20241   /* Var j iterates over all the registers to gather all the registers in
20242      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20243      A PARALLEL RTX of register-pair is created here, so that pattern for
20244      LDRD can be matched.  As PC is always last register to be popped, and
20245      we have already decremented num_regs if PC, we don't have to worry
20246      about PC in this loop.  */
20247   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20248     if (saved_regs_mask & (1 << j))
20249       {
20250         /* Create RTX for memory load.  */
20251         reg = gen_rtx_REG (SImode, j);
20252         tmp = gen_rtx_SET (reg,
20253                            gen_frame_mem (SImode,
20254                                plus_constant (Pmode,
20255                                               stack_pointer_rtx, 4 * i)));
20256         RTX_FRAME_RELATED_P (tmp) = 1;
20257
20258         if (i % 2 == 0)
20259           {
20260             /* When saved-register index (i) is even, the RTX to be emitted is
20261                yet to be created.  Hence create it first.  The LDRD pattern we
20262                are generating is :
20263                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20264                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20265                where target registers need not be consecutive.  */
20266             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20267             dwarf = NULL_RTX;
20268           }
20269
20270         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20271            added as 0th element and if i is odd, reg_i is added as 1st element
20272            of LDRD pattern shown above.  */
20273         XVECEXP (par, 0, (i % 2)) = tmp;
20274         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20275
20276         if ((i % 2) == 1)
20277           {
20278             /* When saved-register index (i) is odd, RTXs for both the registers
20279                to be loaded are generated in above given LDRD pattern, and the
20280                pattern can be emitted now.  */
20281             par = emit_insn (par);
20282             REG_NOTES (par) = dwarf;
20283             RTX_FRAME_RELATED_P (par) = 1;
20284           }
20285
20286         i++;
20287       }
20288
20289   /* If the number of registers pushed is odd AND return_in_pc is false OR
20290      number of registers are even AND return_in_pc is true, last register is
20291      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20292      then LDR with post increment.  */
20293
20294   /* Increment the stack pointer, based on there being
20295      num_regs 4-byte registers to restore.  */
20296   tmp = gen_rtx_SET (stack_pointer_rtx,
20297                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20298   RTX_FRAME_RELATED_P (tmp) = 1;
20299   tmp = emit_insn (tmp);
20300   if (!return_in_pc)
20301     {
20302       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20303                                    stack_pointer_rtx, stack_pointer_rtx);
20304     }
20305
20306   dwarf = NULL_RTX;
20307
20308   if (((num_regs % 2) == 1 && !return_in_pc)
20309       || ((num_regs % 2) == 0 && return_in_pc))
20310     {
20311       /* Scan for the single register to be popped.  Skip until the saved
20312          register is found.  */
20313       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20314
20315       /* Gen LDR with post increment here.  */
20316       tmp1 = gen_rtx_MEM (SImode,
20317                           gen_rtx_POST_INC (SImode,
20318                                             stack_pointer_rtx));
20319       set_mem_alias_set (tmp1, get_frame_alias_set ());
20320
20321       reg = gen_rtx_REG (SImode, j);
20322       tmp = gen_rtx_SET (reg, tmp1);
20323       RTX_FRAME_RELATED_P (tmp) = 1;
20324       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20325
20326       if (return_in_pc)
20327         {
20328           /* If return_in_pc, j must be PC_REGNUM.  */
20329           gcc_assert (j == PC_REGNUM);
20330           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20331           XVECEXP (par, 0, 0) = ret_rtx;
20332           XVECEXP (par, 0, 1) = tmp;
20333           par = emit_jump_insn (par);
20334         }
20335       else
20336         {
20337           par = emit_insn (tmp);
20338           REG_NOTES (par) = dwarf;
20339           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20340                                        stack_pointer_rtx, stack_pointer_rtx);
20341         }
20342
20343     }
20344   else if ((num_regs % 2) == 1 && return_in_pc)
20345     {
20346       /* There are 2 registers to be popped.  So, generate the pattern
20347          pop_multiple_with_stack_update_and_return to pop in PC.  */
20348       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20349     }
20350
20351   return;
20352 }
20353
20354 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20355    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20356    offset addressing and then generates one separate stack udpate. This provides
20357    more scheduling freedom, compared to writeback on every load.  However,
20358    if the function returns using load into PC directly
20359    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20360    before the last load.  TODO: Add a peephole optimization to recognize
20361    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20362    peephole optimization to merge the load at stack-offset zero
20363    with the stack update instruction using load with writeback
20364    in post-index addressing mode.  */
20365 static void
20366 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20367 {
20368   int j = 0;
20369   int offset = 0;
20370   rtx par = NULL_RTX;
20371   rtx dwarf = NULL_RTX;
20372   rtx tmp, mem;
20373
20374   /* Restore saved registers.  */
20375   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20376   j = 0;
20377   while (j <= LAST_ARM_REGNUM)
20378     if (saved_regs_mask & (1 << j))
20379       {
20380         if ((j % 2) == 0
20381             && (saved_regs_mask & (1 << (j + 1)))
20382             && (j + 1) != PC_REGNUM)
20383           {
20384             /* Current register and next register form register pair for which
20385                LDRD can be generated. PC is always the last register popped, and
20386                we handle it separately.  */
20387             if (offset > 0)
20388               mem = gen_frame_mem (DImode,
20389                                    plus_constant (Pmode,
20390                                                   stack_pointer_rtx,
20391                                                   offset));
20392             else
20393               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20394
20395             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20396             tmp = emit_insn (tmp);
20397             RTX_FRAME_RELATED_P (tmp) = 1;
20398
20399             /* Generate dwarf info.  */
20400
20401             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20402                                     gen_rtx_REG (SImode, j),
20403                                     NULL_RTX);
20404             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20405                                     gen_rtx_REG (SImode, j + 1),
20406                                     dwarf);
20407
20408             REG_NOTES (tmp) = dwarf;
20409
20410             offset += 8;
20411             j += 2;
20412           }
20413         else if (j != PC_REGNUM)
20414           {
20415             /* Emit a single word load.  */
20416             if (offset > 0)
20417               mem = gen_frame_mem (SImode,
20418                                    plus_constant (Pmode,
20419                                                   stack_pointer_rtx,
20420                                                   offset));
20421             else
20422               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20423
20424             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20425             tmp = emit_insn (tmp);
20426             RTX_FRAME_RELATED_P (tmp) = 1;
20427
20428             /* Generate dwarf info.  */
20429             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20430                                               gen_rtx_REG (SImode, j),
20431                                               NULL_RTX);
20432
20433             offset += 4;
20434             j += 1;
20435           }
20436         else /* j == PC_REGNUM */
20437           j++;
20438       }
20439     else
20440       j++;
20441
20442   /* Update the stack.  */
20443   if (offset > 0)
20444     {
20445       tmp = gen_rtx_SET (stack_pointer_rtx,
20446                          plus_constant (Pmode,
20447                                         stack_pointer_rtx,
20448                                         offset));
20449       tmp = emit_insn (tmp);
20450       arm_add_cfa_adjust_cfa_note (tmp, offset,
20451                                    stack_pointer_rtx, stack_pointer_rtx);
20452       offset = 0;
20453     }
20454
20455   if (saved_regs_mask & (1 << PC_REGNUM))
20456     {
20457       /* Only PC is to be popped.  */
20458       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20459       XVECEXP (par, 0, 0) = ret_rtx;
20460       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20461                          gen_frame_mem (SImode,
20462                                         gen_rtx_POST_INC (SImode,
20463                                                           stack_pointer_rtx)));
20464       RTX_FRAME_RELATED_P (tmp) = 1;
20465       XVECEXP (par, 0, 1) = tmp;
20466       par = emit_jump_insn (par);
20467
20468       /* Generate dwarf info.  */
20469       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20470                               gen_rtx_REG (SImode, PC_REGNUM),
20471                               NULL_RTX);
20472       REG_NOTES (par) = dwarf;
20473       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20474                                    stack_pointer_rtx, stack_pointer_rtx);
20475     }
20476 }
20477
20478 /* Calculate the size of the return value that is passed in registers.  */
20479 static unsigned
20480 arm_size_return_regs (void)
20481 {
20482   machine_mode mode;
20483
20484   if (crtl->return_rtx != 0)
20485     mode = GET_MODE (crtl->return_rtx);
20486   else
20487     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20488
20489   return GET_MODE_SIZE (mode);
20490 }
20491
20492 /* Return true if the current function needs to save/restore LR.  */
20493 static bool
20494 thumb_force_lr_save (void)
20495 {
20496   return !cfun->machine->lr_save_eliminated
20497          && (!leaf_function_p ()
20498              || thumb_far_jump_used_p ()
20499              || df_regs_ever_live_p (LR_REGNUM));
20500 }
20501
20502 /* We do not know if r3 will be available because
20503    we do have an indirect tailcall happening in this
20504    particular case.  */
20505 static bool
20506 is_indirect_tailcall_p (rtx call)
20507 {
20508   rtx pat = PATTERN (call);
20509
20510   /* Indirect tail call.  */
20511   pat = XVECEXP (pat, 0, 0);
20512   if (GET_CODE (pat) == SET)
20513     pat = SET_SRC (pat);
20514
20515   pat = XEXP (XEXP (pat, 0), 0);
20516   return REG_P (pat);
20517 }
20518
20519 /* Return true if r3 is used by any of the tail call insns in the
20520    current function.  */
20521 static bool
20522 any_sibcall_could_use_r3 (void)
20523 {
20524   edge_iterator ei;
20525   edge e;
20526
20527   if (!crtl->tail_call_emit)
20528     return false;
20529   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20530     if (e->flags & EDGE_SIBCALL)
20531       {
20532         rtx call = BB_END (e->src);
20533         if (!CALL_P (call))
20534           call = prev_nonnote_nondebug_insn (call);
20535         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20536         if (find_regno_fusage (call, USE, 3)
20537             || is_indirect_tailcall_p (call))
20538           return true;
20539       }
20540   return false;
20541 }
20542
20543
20544 /* Compute the distance from register FROM to register TO.
20545    These can be the arg pointer (26), the soft frame pointer (25),
20546    the stack pointer (13) or the hard frame pointer (11).
20547    In thumb mode r7 is used as the soft frame pointer, if needed.
20548    Typical stack layout looks like this:
20549
20550        old stack pointer -> |    |
20551                              ----
20552                             |    | \
20553                             |    |   saved arguments for
20554                             |    |   vararg functions
20555                             |    | /
20556                               --
20557    hard FP & arg pointer -> |    | \
20558                             |    |   stack
20559                             |    |   frame
20560                             |    | /
20561                               --
20562                             |    | \
20563                             |    |   call saved
20564                             |    |   registers
20565       soft frame pointer -> |    | /
20566                               --
20567                             |    | \
20568                             |    |   local
20569                             |    |   variables
20570      locals base pointer -> |    | /
20571                               --
20572                             |    | \
20573                             |    |   outgoing
20574                             |    |   arguments
20575    current stack pointer -> |    | /
20576                               --
20577
20578   For a given function some or all of these stack components
20579   may not be needed, giving rise to the possibility of
20580   eliminating some of the registers.
20581
20582   The values returned by this function must reflect the behavior
20583   of arm_expand_prologue() and arm_compute_save_reg_mask().
20584
20585   The sign of the number returned reflects the direction of stack
20586   growth, so the values are positive for all eliminations except
20587   from the soft frame pointer to the hard frame pointer.
20588
20589   SFP may point just inside the local variables block to ensure correct
20590   alignment.  */
20591
20592
20593 /* Calculate stack offsets.  These are used to calculate register elimination
20594    offsets and in prologue/epilogue code.  Also calculates which registers
20595    should be saved.  */
20596
20597 static arm_stack_offsets *
20598 arm_get_frame_offsets (void)
20599 {
20600   struct arm_stack_offsets *offsets;
20601   unsigned long func_type;
20602   int leaf;
20603   int saved;
20604   int core_saved;
20605   HOST_WIDE_INT frame_size;
20606   int i;
20607
20608   offsets = &cfun->machine->stack_offsets;
20609
20610   /* We need to know if we are a leaf function.  Unfortunately, it
20611      is possible to be called after start_sequence has been called,
20612      which causes get_insns to return the insns for the sequence,
20613      not the function, which will cause leaf_function_p to return
20614      the incorrect result.
20615
20616      to know about leaf functions once reload has completed, and the
20617      frame size cannot be changed after that time, so we can safely
20618      use the cached value.  */
20619
20620   if (reload_completed)
20621     return offsets;
20622
20623   /* Initially this is the size of the local variables.  It will translated
20624      into an offset once we have determined the size of preceding data.  */
20625   frame_size = ROUND_UP_WORD (get_frame_size ());
20626
20627   leaf = leaf_function_p ();
20628
20629   /* Space for variadic functions.  */
20630   offsets->saved_args = crtl->args.pretend_args_size;
20631
20632   /* In Thumb mode this is incorrect, but never used.  */
20633   offsets->frame
20634     = (offsets->saved_args
20635        + arm_compute_static_chain_stack_bytes ()
20636        + (frame_pointer_needed ? 4 : 0));
20637
20638   if (TARGET_32BIT)
20639     {
20640       unsigned int regno;
20641
20642       offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20643       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20644       saved = core_saved;
20645
20646       /* We know that SP will be doubleword aligned on entry, and we must
20647          preserve that condition at any subroutine call.  We also require the
20648          soft frame pointer to be doubleword aligned.  */
20649
20650       if (TARGET_REALLY_IWMMXT)
20651         {
20652           /* Check for the call-saved iWMMXt registers.  */
20653           for (regno = FIRST_IWMMXT_REGNUM;
20654                regno <= LAST_IWMMXT_REGNUM;
20655                regno++)
20656             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20657               saved += 8;
20658         }
20659
20660       func_type = arm_current_func_type ();
20661       /* Space for saved VFP registers.  */
20662       if (! IS_VOLATILE (func_type)
20663           && TARGET_HARD_FLOAT && TARGET_VFP)
20664         saved += arm_get_vfp_saved_size ();
20665     }
20666   else /* TARGET_THUMB1 */
20667     {
20668       offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20669       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20670       saved = core_saved;
20671       if (TARGET_BACKTRACE)
20672         saved += 16;
20673     }
20674
20675   /* Saved registers include the stack frame.  */
20676   offsets->saved_regs
20677     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20678   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20679
20680   /* A leaf function does not need any stack alignment if it has nothing
20681      on the stack.  */
20682   if (leaf && frame_size == 0
20683       /* However if it calls alloca(), we have a dynamically allocated
20684          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20685       && ! cfun->calls_alloca)
20686     {
20687       offsets->outgoing_args = offsets->soft_frame;
20688       offsets->locals_base = offsets->soft_frame;
20689       return offsets;
20690     }
20691
20692   /* Ensure SFP has the correct alignment.  */
20693   if (ARM_DOUBLEWORD_ALIGN
20694       && (offsets->soft_frame & 7))
20695     {
20696       offsets->soft_frame += 4;
20697       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20698          when there is a stack frame as the alignment will be rolled into
20699          the normal stack adjustment.  */
20700       if (frame_size + crtl->outgoing_args_size == 0)
20701         {
20702           int reg = -1;
20703
20704           /* Register r3 is caller-saved.  Normally it does not need to be
20705              saved on entry by the prologue.  However if we choose to save
20706              it for padding then we may confuse the compiler into thinking
20707              a prologue sequence is required when in fact it is not.  This
20708              will occur when shrink-wrapping if r3 is used as a scratch
20709              register and there are no other callee-saved writes.
20710
20711              This situation can be avoided when other callee-saved registers
20712              are available and r3 is not mandatory if we choose a callee-saved
20713              register for padding.  */
20714           bool prefer_callee_reg_p = false;
20715
20716           /* If it is safe to use r3, then do so.  This sometimes
20717              generates better code on Thumb-2 by avoiding the need to
20718              use 32-bit push/pop instructions.  */
20719           if (! any_sibcall_could_use_r3 ()
20720               && arm_size_return_regs () <= 12
20721               && (offsets->saved_regs_mask & (1 << 3)) == 0
20722               && (TARGET_THUMB2
20723                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20724             {
20725               reg = 3;
20726               if (!TARGET_THUMB2)
20727                 prefer_callee_reg_p = true;
20728             }
20729           if (reg == -1
20730               || prefer_callee_reg_p)
20731             {
20732               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20733                 {
20734                   /* Avoid fixed registers; they may be changed at
20735                      arbitrary times so it's unsafe to restore them
20736                      during the epilogue.  */
20737                   if (!fixed_regs[i]
20738                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20739                     {
20740                       reg = i;
20741                       break;
20742                     }
20743                 }
20744             }
20745
20746           if (reg != -1)
20747             {
20748               offsets->saved_regs += 4;
20749               offsets->saved_regs_mask |= (1 << reg);
20750             }
20751         }
20752     }
20753
20754   offsets->locals_base = offsets->soft_frame + frame_size;
20755   offsets->outgoing_args = (offsets->locals_base
20756                             + crtl->outgoing_args_size);
20757
20758   if (ARM_DOUBLEWORD_ALIGN)
20759     {
20760       /* Ensure SP remains doubleword aligned.  */
20761       if (offsets->outgoing_args & 7)
20762         offsets->outgoing_args += 4;
20763       gcc_assert (!(offsets->outgoing_args & 7));
20764     }
20765
20766   return offsets;
20767 }
20768
20769
20770 /* Calculate the relative offsets for the different stack pointers.  Positive
20771    offsets are in the direction of stack growth.  */
20772
20773 HOST_WIDE_INT
20774 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20775 {
20776   arm_stack_offsets *offsets;
20777
20778   offsets = arm_get_frame_offsets ();
20779
20780   /* OK, now we have enough information to compute the distances.
20781      There must be an entry in these switch tables for each pair
20782      of registers in ELIMINABLE_REGS, even if some of the entries
20783      seem to be redundant or useless.  */
20784   switch (from)
20785     {
20786     case ARG_POINTER_REGNUM:
20787       switch (to)
20788         {
20789         case THUMB_HARD_FRAME_POINTER_REGNUM:
20790           return 0;
20791
20792         case FRAME_POINTER_REGNUM:
20793           /* This is the reverse of the soft frame pointer
20794              to hard frame pointer elimination below.  */
20795           return offsets->soft_frame - offsets->saved_args;
20796
20797         case ARM_HARD_FRAME_POINTER_REGNUM:
20798           /* This is only non-zero in the case where the static chain register
20799              is stored above the frame.  */
20800           return offsets->frame - offsets->saved_args - 4;
20801
20802         case STACK_POINTER_REGNUM:
20803           /* If nothing has been pushed on the stack at all
20804              then this will return -4.  This *is* correct!  */
20805           return offsets->outgoing_args - (offsets->saved_args + 4);
20806
20807         default:
20808           gcc_unreachable ();
20809         }
20810       gcc_unreachable ();
20811
20812     case FRAME_POINTER_REGNUM:
20813       switch (to)
20814         {
20815         case THUMB_HARD_FRAME_POINTER_REGNUM:
20816           return 0;
20817
20818         case ARM_HARD_FRAME_POINTER_REGNUM:
20819           /* The hard frame pointer points to the top entry in the
20820              stack frame.  The soft frame pointer to the bottom entry
20821              in the stack frame.  If there is no stack frame at all,
20822              then they are identical.  */
20823
20824           return offsets->frame - offsets->soft_frame;
20825
20826         case STACK_POINTER_REGNUM:
20827           return offsets->outgoing_args - offsets->soft_frame;
20828
20829         default:
20830           gcc_unreachable ();
20831         }
20832       gcc_unreachable ();
20833
20834     default:
20835       /* You cannot eliminate from the stack pointer.
20836          In theory you could eliminate from the hard frame
20837          pointer to the stack pointer, but this will never
20838          happen, since if a stack frame is not needed the
20839          hard frame pointer will never be used.  */
20840       gcc_unreachable ();
20841     }
20842 }
20843
20844 /* Given FROM and TO register numbers, say whether this elimination is
20845    allowed.  Frame pointer elimination is automatically handled.
20846
20847    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
20848    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
20849    pointer, we must eliminate FRAME_POINTER_REGNUM into
20850    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20851    ARG_POINTER_REGNUM.  */
20852
20853 bool
20854 arm_can_eliminate (const int from, const int to)
20855 {
20856   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20857           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20858           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20859           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20860            true);
20861 }
20862
20863 /* Emit RTL to save coprocessor registers on function entry.  Returns the
20864    number of bytes pushed.  */
20865
20866 static int
20867 arm_save_coproc_regs(void)
20868 {
20869   int saved_size = 0;
20870   unsigned reg;
20871   unsigned start_reg;
20872   rtx insn;
20873
20874   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20875     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20876       {
20877         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20878         insn = gen_rtx_MEM (V2SImode, insn);
20879         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20880         RTX_FRAME_RELATED_P (insn) = 1;
20881         saved_size += 8;
20882       }
20883
20884   if (TARGET_HARD_FLOAT && TARGET_VFP)
20885     {
20886       start_reg = FIRST_VFP_REGNUM;
20887
20888       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20889         {
20890           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20891               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20892             {
20893               if (start_reg != reg)
20894                 saved_size += vfp_emit_fstmd (start_reg,
20895                                               (reg - start_reg) / 2);
20896               start_reg = reg + 2;
20897             }
20898         }
20899       if (start_reg != reg)
20900         saved_size += vfp_emit_fstmd (start_reg,
20901                                       (reg - start_reg) / 2);
20902     }
20903   return saved_size;
20904 }
20905
20906
20907 /* Set the Thumb frame pointer from the stack pointer.  */
20908
20909 static void
20910 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20911 {
20912   HOST_WIDE_INT amount;
20913   rtx insn, dwarf;
20914
20915   amount = offsets->outgoing_args - offsets->locals_base;
20916   if (amount < 1024)
20917     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20918                                   stack_pointer_rtx, GEN_INT (amount)));
20919   else
20920     {
20921       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20922       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
20923          expects the first two operands to be the same.  */
20924       if (TARGET_THUMB2)
20925         {
20926           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20927                                         stack_pointer_rtx,
20928                                         hard_frame_pointer_rtx));
20929         }
20930       else
20931         {
20932           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20933                                         hard_frame_pointer_rtx,
20934                                         stack_pointer_rtx));
20935         }
20936       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
20937                            plus_constant (Pmode, stack_pointer_rtx, amount));
20938       RTX_FRAME_RELATED_P (dwarf) = 1;
20939       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20940     }
20941
20942   RTX_FRAME_RELATED_P (insn) = 1;
20943 }
20944
20945 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20946    function.  */
20947 void
20948 arm_expand_prologue (void)
20949 {
20950   rtx amount;
20951   rtx insn;
20952   rtx ip_rtx;
20953   unsigned long live_regs_mask;
20954   unsigned long func_type;
20955   int fp_offset = 0;
20956   int saved_pretend_args = 0;
20957   int saved_regs = 0;
20958   unsigned HOST_WIDE_INT args_to_push;
20959   arm_stack_offsets *offsets;
20960
20961   func_type = arm_current_func_type ();
20962
20963   /* Naked functions don't have prologues.  */
20964   if (IS_NAKED (func_type))
20965     return;
20966
20967   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
20968   args_to_push = crtl->args.pretend_args_size;
20969
20970   /* Compute which register we will have to save onto the stack.  */
20971   offsets = arm_get_frame_offsets ();
20972   live_regs_mask = offsets->saved_regs_mask;
20973
20974   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20975
20976   if (IS_STACKALIGN (func_type))
20977     {
20978       rtx r0, r1;
20979
20980       /* Handle a word-aligned stack pointer.  We generate the following:
20981
20982           mov r0, sp
20983           bic r1, r0, #7
20984           mov sp, r1
20985           <save and restore r0 in normal prologue/epilogue>
20986           mov sp, r0
20987           bx lr
20988
20989          The unwinder doesn't need to know about the stack realignment.
20990          Just tell it we saved SP in r0.  */
20991       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20992
20993       r0 = gen_rtx_REG (SImode, R0_REGNUM);
20994       r1 = gen_rtx_REG (SImode, R1_REGNUM);
20995
20996       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20997       RTX_FRAME_RELATED_P (insn) = 1;
20998       add_reg_note (insn, REG_CFA_REGISTER, NULL);
20999
21000       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21001
21002       /* ??? The CFA changes here, which may cause GDB to conclude that it
21003          has entered a different function.  That said, the unwind info is
21004          correct, individually, before and after this instruction because
21005          we've described the save of SP, which will override the default
21006          handling of SP as restoring from the CFA.  */
21007       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21008     }
21009
21010   /* For APCS frames, if IP register is clobbered
21011      when creating frame, save that register in a special
21012      way.  */
21013   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21014     {
21015       if (IS_INTERRUPT (func_type))
21016         {
21017           /* Interrupt functions must not corrupt any registers.
21018              Creating a frame pointer however, corrupts the IP
21019              register, so we must push it first.  */
21020           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21021
21022           /* Do not set RTX_FRAME_RELATED_P on this insn.
21023              The dwarf stack unwinding code only wants to see one
21024              stack decrement per function, and this is not it.  If
21025              this instruction is labeled as being part of the frame
21026              creation sequence then dwarf2out_frame_debug_expr will
21027              die when it encounters the assignment of IP to FP
21028              later on, since the use of SP here establishes SP as
21029              the CFA register and not IP.
21030
21031              Anyway this instruction is not really part of the stack
21032              frame creation although it is part of the prologue.  */
21033         }
21034       else if (IS_NESTED (func_type))
21035         {
21036           /* The static chain register is the same as the IP register
21037              used as a scratch register during stack frame creation.
21038              To get around this need to find somewhere to store IP
21039              whilst the frame is being created.  We try the following
21040              places in order:
21041
21042                1. The last argument register r3 if it is available.
21043                2. A slot on the stack above the frame if there are no
21044                   arguments to push onto the stack.
21045                3. Register r3 again, after pushing the argument registers
21046                   onto the stack, if this is a varargs function.
21047                4. The last slot on the stack created for the arguments to
21048                   push, if this isn't a varargs function.
21049
21050              Note - we only need to tell the dwarf2 backend about the SP
21051              adjustment in the second variant; the static chain register
21052              doesn't need to be unwound, as it doesn't contain a value
21053              inherited from the caller.  */
21054
21055           if (!arm_r3_live_at_start_p ())
21056             insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21057           else if (args_to_push == 0)
21058             {
21059               rtx addr, dwarf;
21060
21061               gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21062               saved_regs += 4;
21063
21064               addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21065               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21066               fp_offset = 4;
21067
21068               /* Just tell the dwarf backend that we adjusted SP.  */
21069               dwarf = gen_rtx_SET (stack_pointer_rtx,
21070                                    plus_constant (Pmode, stack_pointer_rtx,
21071                                                   -fp_offset));
21072               RTX_FRAME_RELATED_P (insn) = 1;
21073               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21074             }
21075           else
21076             {
21077               /* Store the args on the stack.  */
21078               if (cfun->machine->uses_anonymous_args)
21079                 {
21080                   insn
21081                     = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21082                                            (0xf0 >> (args_to_push / 4)) & 0xf);
21083                   emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21084                   saved_pretend_args = 1;
21085                 }
21086               else
21087                 {
21088                   rtx addr, dwarf;
21089
21090                   if (args_to_push == 4)
21091                     addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21092                   else
21093                     addr
21094                       = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21095                                             plus_constant (Pmode,
21096                                                            stack_pointer_rtx,
21097                                                            -args_to_push));
21098
21099                   insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21100
21101                   /* Just tell the dwarf backend that we adjusted SP.  */
21102                   dwarf
21103                     = gen_rtx_SET (stack_pointer_rtx,
21104                                    plus_constant (Pmode, stack_pointer_rtx,
21105                                                   -args_to_push));
21106                   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21107                 }
21108
21109               RTX_FRAME_RELATED_P (insn) = 1;
21110               fp_offset = args_to_push;
21111               args_to_push = 0;
21112             }
21113         }
21114
21115       insn = emit_set_insn (ip_rtx,
21116                             plus_constant (Pmode, stack_pointer_rtx,
21117                                            fp_offset));
21118       RTX_FRAME_RELATED_P (insn) = 1;
21119     }
21120
21121   if (args_to_push)
21122     {
21123       /* Push the argument registers, or reserve space for them.  */
21124       if (cfun->machine->uses_anonymous_args)
21125         insn = emit_multi_reg_push
21126           ((0xf0 >> (args_to_push / 4)) & 0xf,
21127            (0xf0 >> (args_to_push / 4)) & 0xf);
21128       else
21129         insn = emit_insn
21130           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21131                        GEN_INT (- args_to_push)));
21132       RTX_FRAME_RELATED_P (insn) = 1;
21133     }
21134
21135   /* If this is an interrupt service routine, and the link register
21136      is going to be pushed, and we're not generating extra
21137      push of IP (needed when frame is needed and frame layout if apcs),
21138      subtracting four from LR now will mean that the function return
21139      can be done with a single instruction.  */
21140   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21141       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21142       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21143       && TARGET_ARM)
21144     {
21145       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21146
21147       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21148     }
21149
21150   if (live_regs_mask)
21151     {
21152       unsigned long dwarf_regs_mask = live_regs_mask;
21153
21154       saved_regs += bit_count (live_regs_mask) * 4;
21155       if (optimize_size && !frame_pointer_needed
21156           && saved_regs == offsets->saved_regs - offsets->saved_args)
21157         {
21158           /* If no coprocessor registers are being pushed and we don't have
21159              to worry about a frame pointer then push extra registers to
21160              create the stack frame.  This is done is a way that does not
21161              alter the frame layout, so is independent of the epilogue.  */
21162           int n;
21163           int frame;
21164           n = 0;
21165           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21166             n++;
21167           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21168           if (frame && n * 4 >= frame)
21169             {
21170               n = frame / 4;
21171               live_regs_mask |= (1 << n) - 1;
21172               saved_regs += frame;
21173             }
21174         }
21175
21176       if (TARGET_LDRD
21177           && current_tune->prefer_ldrd_strd
21178           && !optimize_function_for_size_p (cfun))
21179         {
21180           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21181           if (TARGET_THUMB2)
21182             thumb2_emit_strd_push (live_regs_mask);
21183           else if (TARGET_ARM
21184                    && !TARGET_APCS_FRAME
21185                    && !IS_INTERRUPT (func_type))
21186             arm_emit_strd_push (live_regs_mask);
21187           else
21188             {
21189               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21190               RTX_FRAME_RELATED_P (insn) = 1;
21191             }
21192         }
21193       else
21194         {
21195           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21196           RTX_FRAME_RELATED_P (insn) = 1;
21197         }
21198     }
21199
21200   if (! IS_VOLATILE (func_type))
21201     saved_regs += arm_save_coproc_regs ();
21202
21203   if (frame_pointer_needed && TARGET_ARM)
21204     {
21205       /* Create the new frame pointer.  */
21206       if (TARGET_APCS_FRAME)
21207         {
21208           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21209           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21210           RTX_FRAME_RELATED_P (insn) = 1;
21211
21212           if (IS_NESTED (func_type))
21213             {
21214               /* Recover the static chain register.  */
21215               if (!arm_r3_live_at_start_p () || saved_pretend_args)
21216                 insn = gen_rtx_REG (SImode, 3);
21217               else
21218                 {
21219                   insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21220                   insn = gen_frame_mem (SImode, insn);
21221                 }
21222               emit_set_insn (ip_rtx, insn);
21223               /* Add a USE to stop propagate_one_insn() from barfing.  */
21224               emit_insn (gen_force_register_use (ip_rtx));
21225             }
21226         }
21227       else
21228         {
21229           insn = GEN_INT (saved_regs - 4);
21230           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21231                                         stack_pointer_rtx, insn));
21232           RTX_FRAME_RELATED_P (insn) = 1;
21233         }
21234     }
21235
21236   if (flag_stack_usage_info)
21237     current_function_static_stack_size
21238       = offsets->outgoing_args - offsets->saved_args;
21239
21240   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21241     {
21242       /* This add can produce multiple insns for a large constant, so we
21243          need to get tricky.  */
21244       rtx_insn *last = get_last_insn ();
21245
21246       amount = GEN_INT (offsets->saved_args + saved_regs
21247                         - offsets->outgoing_args);
21248
21249       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21250                                     amount));
21251       do
21252         {
21253           last = last ? NEXT_INSN (last) : get_insns ();
21254           RTX_FRAME_RELATED_P (last) = 1;
21255         }
21256       while (last != insn);
21257
21258       /* If the frame pointer is needed, emit a special barrier that
21259          will prevent the scheduler from moving stores to the frame
21260          before the stack adjustment.  */
21261       if (frame_pointer_needed)
21262         insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21263                                          hard_frame_pointer_rtx));
21264     }
21265
21266
21267   if (frame_pointer_needed && TARGET_THUMB2)
21268     thumb_set_frame_pointer (offsets);
21269
21270   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21271     {
21272       unsigned long mask;
21273
21274       mask = live_regs_mask;
21275       mask &= THUMB2_WORK_REGS;
21276       if (!IS_NESTED (func_type))
21277         mask |= (1 << IP_REGNUM);
21278       arm_load_pic_register (mask);
21279     }
21280
21281   /* If we are profiling, make sure no instructions are scheduled before
21282      the call to mcount.  Similarly if the user has requested no
21283      scheduling in the prolog.  Similarly if we want non-call exceptions
21284      using the EABI unwinder, to prevent faulting instructions from being
21285      swapped with a stack adjustment.  */
21286   if (crtl->profile || !TARGET_SCHED_PROLOG
21287       || (arm_except_unwind_info (&global_options) == UI_TARGET
21288           && cfun->can_throw_non_call_exceptions))
21289     emit_insn (gen_blockage ());
21290
21291   /* If the link register is being kept alive, with the return address in it,
21292      then make sure that it does not get reused by the ce2 pass.  */
21293   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21294     cfun->machine->lr_save_eliminated = 1;
21295 }
21296 \f
21297 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21298 static void
21299 arm_print_condition (FILE *stream)
21300 {
21301   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21302     {
21303       /* Branch conversion is not implemented for Thumb-2.  */
21304       if (TARGET_THUMB)
21305         {
21306           output_operand_lossage ("predicated Thumb instruction");
21307           return;
21308         }
21309       if (current_insn_predicate != NULL)
21310         {
21311           output_operand_lossage
21312             ("predicated instruction in conditional sequence");
21313           return;
21314         }
21315
21316       fputs (arm_condition_codes[arm_current_cc], stream);
21317     }
21318   else if (current_insn_predicate)
21319     {
21320       enum arm_cond_code code;
21321
21322       if (TARGET_THUMB1)
21323         {
21324           output_operand_lossage ("predicated Thumb instruction");
21325           return;
21326         }
21327
21328       code = get_arm_condition_code (current_insn_predicate);
21329       fputs (arm_condition_codes[code], stream);
21330     }
21331 }
21332
21333
21334 /* Globally reserved letters: acln
21335    Puncutation letters currently used: @_|?().!#
21336    Lower case letters currently used: bcdefhimpqtvwxyz
21337    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21338    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21339
21340    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21341
21342    If CODE is 'd', then the X is a condition operand and the instruction
21343    should only be executed if the condition is true.
21344    if CODE is 'D', then the X is a condition operand and the instruction
21345    should only be executed if the condition is false: however, if the mode
21346    of the comparison is CCFPEmode, then always execute the instruction -- we
21347    do this because in these circumstances !GE does not necessarily imply LT;
21348    in these cases the instruction pattern will take care to make sure that
21349    an instruction containing %d will follow, thereby undoing the effects of
21350    doing this instruction unconditionally.
21351    If CODE is 'N' then X is a floating point operand that must be negated
21352    before output.
21353    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21354    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21355 static void
21356 arm_print_operand (FILE *stream, rtx x, int code)
21357 {
21358   switch (code)
21359     {
21360     case '@':
21361       fputs (ASM_COMMENT_START, stream);
21362       return;
21363
21364     case '_':
21365       fputs (user_label_prefix, stream);
21366       return;
21367
21368     case '|':
21369       fputs (REGISTER_PREFIX, stream);
21370       return;
21371
21372     case '?':
21373       arm_print_condition (stream);
21374       return;
21375
21376     case '(':
21377       /* Nothing in unified syntax, otherwise the current condition code.  */
21378       if (!TARGET_UNIFIED_ASM)
21379         arm_print_condition (stream);
21380       break;
21381
21382     case ')':
21383       /* The current condition code in unified syntax, otherwise nothing.  */
21384       if (TARGET_UNIFIED_ASM)
21385         arm_print_condition (stream);
21386       break;
21387
21388     case '.':
21389       /* The current condition code for a condition code setting instruction.
21390          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21391       if (TARGET_UNIFIED_ASM)
21392         {
21393           fputc('s', stream);
21394           arm_print_condition (stream);
21395         }
21396       else
21397         {
21398           arm_print_condition (stream);
21399           fputc('s', stream);
21400         }
21401       return;
21402
21403     case '!':
21404       /* If the instruction is conditionally executed then print
21405          the current condition code, otherwise print 's'.  */
21406       gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21407       if (current_insn_predicate)
21408         arm_print_condition (stream);
21409       else
21410         fputc('s', stream);
21411       break;
21412
21413     /* %# is a "break" sequence. It doesn't output anything, but is used to
21414        separate e.g. operand numbers from following text, if that text consists
21415        of further digits which we don't want to be part of the operand
21416        number.  */
21417     case '#':
21418       return;
21419
21420     case 'N':
21421       {
21422         REAL_VALUE_TYPE r;
21423         REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21424         r = real_value_negate (&r);
21425         fprintf (stream, "%s", fp_const_from_val (&r));
21426       }
21427       return;
21428
21429     /* An integer or symbol address without a preceding # sign.  */
21430     case 'c':
21431       switch (GET_CODE (x))
21432         {
21433         case CONST_INT:
21434           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21435           break;
21436
21437         case SYMBOL_REF:
21438           output_addr_const (stream, x);
21439           break;
21440
21441         case CONST:
21442           if (GET_CODE (XEXP (x, 0)) == PLUS
21443               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21444             {
21445               output_addr_const (stream, x);
21446               break;
21447             }
21448           /* Fall through.  */
21449
21450         default:
21451           output_operand_lossage ("Unsupported operand for code '%c'", code);
21452         }
21453       return;
21454
21455     /* An integer that we want to print in HEX.  */
21456     case 'x':
21457       switch (GET_CODE (x))
21458         {
21459         case CONST_INT:
21460           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21461           break;
21462
21463         default:
21464           output_operand_lossage ("Unsupported operand for code '%c'", code);
21465         }
21466       return;
21467
21468     case 'B':
21469       if (CONST_INT_P (x))
21470         {
21471           HOST_WIDE_INT val;
21472           val = ARM_SIGN_EXTEND (~INTVAL (x));
21473           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21474         }
21475       else
21476         {
21477           putc ('~', stream);
21478           output_addr_const (stream, x);
21479         }
21480       return;
21481
21482     case 'b':
21483       /* Print the log2 of a CONST_INT.  */
21484       {
21485         HOST_WIDE_INT val;
21486
21487         if (!CONST_INT_P (x)
21488             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21489           output_operand_lossage ("Unsupported operand for code '%c'", code);
21490         else
21491           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21492       }
21493       return;
21494
21495     case 'L':
21496       /* The low 16 bits of an immediate constant.  */
21497       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21498       return;
21499
21500     case 'i':
21501       fprintf (stream, "%s", arithmetic_instr (x, 1));
21502       return;
21503
21504     case 'I':
21505       fprintf (stream, "%s", arithmetic_instr (x, 0));
21506       return;
21507
21508     case 'S':
21509       {
21510         HOST_WIDE_INT val;
21511         const char *shift;
21512
21513         shift = shift_op (x, &val);
21514
21515         if (shift)
21516           {
21517             fprintf (stream, ", %s ", shift);
21518             if (val == -1)
21519               arm_print_operand (stream, XEXP (x, 1), 0);
21520             else
21521               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21522           }
21523       }
21524       return;
21525
21526       /* An explanation of the 'Q', 'R' and 'H' register operands:
21527
21528          In a pair of registers containing a DI or DF value the 'Q'
21529          operand returns the register number of the register containing
21530          the least significant part of the value.  The 'R' operand returns
21531          the register number of the register containing the most
21532          significant part of the value.
21533
21534          The 'H' operand returns the higher of the two register numbers.
21535          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21536          same as the 'Q' operand, since the most significant part of the
21537          value is held in the lower number register.  The reverse is true
21538          on systems where WORDS_BIG_ENDIAN is false.
21539
21540          The purpose of these operands is to distinguish between cases
21541          where the endian-ness of the values is important (for example
21542          when they are added together), and cases where the endian-ness
21543          is irrelevant, but the order of register operations is important.
21544          For example when loading a value from memory into a register
21545          pair, the endian-ness does not matter.  Provided that the value
21546          from the lower memory address is put into the lower numbered
21547          register, and the value from the higher address is put into the
21548          higher numbered register, the load will work regardless of whether
21549          the value being loaded is big-wordian or little-wordian.  The
21550          order of the two register loads can matter however, if the address
21551          of the memory location is actually held in one of the registers
21552          being overwritten by the load.
21553
21554          The 'Q' and 'R' constraints are also available for 64-bit
21555          constants.  */
21556     case 'Q':
21557       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21558         {
21559           rtx part = gen_lowpart (SImode, x);
21560           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21561           return;
21562         }
21563
21564       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21565         {
21566           output_operand_lossage ("invalid operand for code '%c'", code);
21567           return;
21568         }
21569
21570       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21571       return;
21572
21573     case 'R':
21574       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21575         {
21576           machine_mode mode = GET_MODE (x);
21577           rtx part;
21578
21579           if (mode == VOIDmode)
21580             mode = DImode;
21581           part = gen_highpart_mode (SImode, mode, x);
21582           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21583           return;
21584         }
21585
21586       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21587         {
21588           output_operand_lossage ("invalid operand for code '%c'", code);
21589           return;
21590         }
21591
21592       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21593       return;
21594
21595     case 'H':
21596       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21597         {
21598           output_operand_lossage ("invalid operand for code '%c'", code);
21599           return;
21600         }
21601
21602       asm_fprintf (stream, "%r", REGNO (x) + 1);
21603       return;
21604
21605     case 'J':
21606       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21607         {
21608           output_operand_lossage ("invalid operand for code '%c'", code);
21609           return;
21610         }
21611
21612       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21613       return;
21614
21615     case 'K':
21616       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21617         {
21618           output_operand_lossage ("invalid operand for code '%c'", code);
21619           return;
21620         }
21621
21622       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21623       return;
21624
21625     case 'm':
21626       asm_fprintf (stream, "%r",
21627                    REG_P (XEXP (x, 0))
21628                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21629       return;
21630
21631     case 'M':
21632       asm_fprintf (stream, "{%r-%r}",
21633                    REGNO (x),
21634                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21635       return;
21636
21637     /* Like 'M', but writing doubleword vector registers, for use by Neon
21638        insns.  */
21639     case 'h':
21640       {
21641         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21642         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21643         if (numregs == 1)
21644           asm_fprintf (stream, "{d%d}", regno);
21645         else
21646           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21647       }
21648       return;
21649
21650     case 'd':
21651       /* CONST_TRUE_RTX means always -- that's the default.  */
21652       if (x == const_true_rtx)
21653         return;
21654
21655       if (!COMPARISON_P (x))
21656         {
21657           output_operand_lossage ("invalid operand for code '%c'", code);
21658           return;
21659         }
21660
21661       fputs (arm_condition_codes[get_arm_condition_code (x)],
21662              stream);
21663       return;
21664
21665     case 'D':
21666       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
21667          want to do that.  */
21668       if (x == const_true_rtx)
21669         {
21670           output_operand_lossage ("instruction never executed");
21671           return;
21672         }
21673       if (!COMPARISON_P (x))
21674         {
21675           output_operand_lossage ("invalid operand for code '%c'", code);
21676           return;
21677         }
21678
21679       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21680                                  (get_arm_condition_code (x))],
21681              stream);
21682       return;
21683
21684     case 's':
21685     case 'V':
21686     case 'W':
21687     case 'X':
21688     case 'Y':
21689     case 'Z':
21690       /* Former Maverick support, removed after GCC-4.7.  */
21691       output_operand_lossage ("obsolete Maverick format code '%c'", code);
21692       return;
21693
21694     case 'U':
21695       if (!REG_P (x)
21696           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21697           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21698         /* Bad value for wCG register number.  */
21699         {
21700           output_operand_lossage ("invalid operand for code '%c'", code);
21701           return;
21702         }
21703
21704       else
21705         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21706       return;
21707
21708       /* Print an iWMMXt control register name.  */
21709     case 'w':
21710       if (!CONST_INT_P (x)
21711           || INTVAL (x) < 0
21712           || INTVAL (x) >= 16)
21713         /* Bad value for wC register number.  */
21714         {
21715           output_operand_lossage ("invalid operand for code '%c'", code);
21716           return;
21717         }
21718
21719       else
21720         {
21721           static const char * wc_reg_names [16] =
21722             {
21723               "wCID",  "wCon",  "wCSSF", "wCASF",
21724               "wC4",   "wC5",   "wC6",   "wC7",
21725               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21726               "wC12",  "wC13",  "wC14",  "wC15"
21727             };
21728
21729           fputs (wc_reg_names [INTVAL (x)], stream);
21730         }
21731       return;
21732
21733     /* Print the high single-precision register of a VFP double-precision
21734        register.  */
21735     case 'p':
21736       {
21737         machine_mode mode = GET_MODE (x);
21738         int regno;
21739
21740         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21741           {
21742             output_operand_lossage ("invalid operand for code '%c'", code);
21743             return;
21744           }
21745
21746         regno = REGNO (x);
21747         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21748           {
21749             output_operand_lossage ("invalid operand for code '%c'", code);
21750             return;
21751           }
21752
21753         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21754       }
21755       return;
21756
21757     /* Print a VFP/Neon double precision or quad precision register name.  */
21758     case 'P':
21759     case 'q':
21760       {
21761         machine_mode mode = GET_MODE (x);
21762         int is_quad = (code == 'q');
21763         int regno;
21764
21765         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21766           {
21767             output_operand_lossage ("invalid operand for code '%c'", code);
21768             return;
21769           }
21770
21771         if (!REG_P (x)
21772             || !IS_VFP_REGNUM (REGNO (x)))
21773           {
21774             output_operand_lossage ("invalid operand for code '%c'", code);
21775             return;
21776           }
21777
21778         regno = REGNO (x);
21779         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21780             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21781           {
21782             output_operand_lossage ("invalid operand for code '%c'", code);
21783             return;
21784           }
21785
21786         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21787           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21788       }
21789       return;
21790
21791     /* These two codes print the low/high doubleword register of a Neon quad
21792        register, respectively.  For pair-structure types, can also print
21793        low/high quadword registers.  */
21794     case 'e':
21795     case 'f':
21796       {
21797         machine_mode mode = GET_MODE (x);
21798         int regno;
21799
21800         if ((GET_MODE_SIZE (mode) != 16
21801              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21802           {
21803             output_operand_lossage ("invalid operand for code '%c'", code);
21804             return;
21805           }
21806
21807         regno = REGNO (x);
21808         if (!NEON_REGNO_OK_FOR_QUAD (regno))
21809           {
21810             output_operand_lossage ("invalid operand for code '%c'", code);
21811             return;
21812           }
21813
21814         if (GET_MODE_SIZE (mode) == 16)
21815           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21816                                   + (code == 'f' ? 1 : 0));
21817         else
21818           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21819                                   + (code == 'f' ? 1 : 0));
21820       }
21821       return;
21822
21823     /* Print a VFPv3 floating-point constant, represented as an integer
21824        index.  */
21825     case 'G':
21826       {
21827         int index = vfp3_const_double_index (x);
21828         gcc_assert (index != -1);
21829         fprintf (stream, "%d", index);
21830       }
21831       return;
21832
21833     /* Print bits representing opcode features for Neon.
21834
21835        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
21836        and polynomials as unsigned.
21837
21838        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21839
21840        Bit 2 is 1 for rounding functions, 0 otherwise.  */
21841
21842     /* Identify the type as 's', 'u', 'p' or 'f'.  */
21843     case 'T':
21844       {
21845         HOST_WIDE_INT bits = INTVAL (x);
21846         fputc ("uspf"[bits & 3], stream);
21847       }
21848       return;
21849
21850     /* Likewise, but signed and unsigned integers are both 'i'.  */
21851     case 'F':
21852       {
21853         HOST_WIDE_INT bits = INTVAL (x);
21854         fputc ("iipf"[bits & 3], stream);
21855       }
21856       return;
21857
21858     /* As for 'T', but emit 'u' instead of 'p'.  */
21859     case 't':
21860       {
21861         HOST_WIDE_INT bits = INTVAL (x);
21862         fputc ("usuf"[bits & 3], stream);
21863       }
21864       return;
21865
21866     /* Bit 2: rounding (vs none).  */
21867     case 'O':
21868       {
21869         HOST_WIDE_INT bits = INTVAL (x);
21870         fputs ((bits & 4) != 0 ? "r" : "", stream);
21871       }
21872       return;
21873
21874     /* Memory operand for vld1/vst1 instruction.  */
21875     case 'A':
21876       {
21877         rtx addr;
21878         bool postinc = FALSE;
21879         rtx postinc_reg = NULL;
21880         unsigned align, memsize, align_bits;
21881
21882         gcc_assert (MEM_P (x));
21883         addr = XEXP (x, 0);
21884         if (GET_CODE (addr) == POST_INC)
21885           {
21886             postinc = 1;
21887             addr = XEXP (addr, 0);
21888           }
21889         if (GET_CODE (addr) == POST_MODIFY)
21890           {
21891             postinc_reg = XEXP( XEXP (addr, 1), 1);
21892             addr = XEXP (addr, 0);
21893           }
21894         asm_fprintf (stream, "[%r", REGNO (addr));
21895
21896         /* We know the alignment of this access, so we can emit a hint in the
21897            instruction (for some alignments) as an aid to the memory subsystem
21898            of the target.  */
21899         align = MEM_ALIGN (x) >> 3;
21900         memsize = MEM_SIZE (x);
21901
21902         /* Only certain alignment specifiers are supported by the hardware.  */
21903         if (memsize == 32 && (align % 32) == 0)
21904           align_bits = 256;
21905         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21906           align_bits = 128;
21907         else if (memsize >= 8 && (align % 8) == 0)
21908           align_bits = 64;
21909         else
21910           align_bits = 0;
21911
21912         if (align_bits != 0)
21913           asm_fprintf (stream, ":%d", align_bits);
21914
21915         asm_fprintf (stream, "]");
21916
21917         if (postinc)
21918           fputs("!", stream);
21919         if (postinc_reg)
21920           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
21921       }
21922       return;
21923
21924     case 'C':
21925       {
21926         rtx addr;
21927
21928         gcc_assert (MEM_P (x));
21929         addr = XEXP (x, 0);
21930         gcc_assert (REG_P (addr));
21931         asm_fprintf (stream, "[%r]", REGNO (addr));
21932       }
21933       return;
21934
21935     /* Translate an S register number into a D register number and element index.  */
21936     case 'y':
21937       {
21938         machine_mode mode = GET_MODE (x);
21939         int regno;
21940
21941         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21942           {
21943             output_operand_lossage ("invalid operand for code '%c'", code);
21944             return;
21945           }
21946
21947         regno = REGNO (x);
21948         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21949           {
21950             output_operand_lossage ("invalid operand for code '%c'", code);
21951             return;
21952           }
21953
21954         regno = regno - FIRST_VFP_REGNUM;
21955         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21956       }
21957       return;
21958
21959     case 'v':
21960         gcc_assert (CONST_DOUBLE_P (x));
21961         int result;
21962         result = vfp3_const_double_for_fract_bits (x);
21963         if (result == 0)
21964           result = vfp3_const_double_for_bits (x);
21965         fprintf (stream, "#%d", result);
21966         return;
21967
21968     /* Register specifier for vld1.16/vst1.16.  Translate the S register
21969        number into a D register number and element index.  */
21970     case 'z':
21971       {
21972         machine_mode mode = GET_MODE (x);
21973         int regno;
21974
21975         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21976           {
21977             output_operand_lossage ("invalid operand for code '%c'", code);
21978             return;
21979           }
21980
21981         regno = REGNO (x);
21982         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21983           {
21984             output_operand_lossage ("invalid operand for code '%c'", code);
21985             return;
21986           }
21987
21988         regno = regno - FIRST_VFP_REGNUM;
21989         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21990       }
21991       return;
21992
21993     default:
21994       if (x == 0)
21995         {
21996           output_operand_lossage ("missing operand");
21997           return;
21998         }
21999
22000       switch (GET_CODE (x))
22001         {
22002         case REG:
22003           asm_fprintf (stream, "%r", REGNO (x));
22004           break;
22005
22006         case MEM:
22007           output_memory_reference_mode = GET_MODE (x);
22008           output_address (XEXP (x, 0));
22009           break;
22010
22011         case CONST_DOUBLE:
22012           {
22013             char fpstr[20];
22014             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22015                               sizeof (fpstr), 0, 1);
22016             fprintf (stream, "#%s", fpstr);
22017           }
22018           break;
22019
22020         default:
22021           gcc_assert (GET_CODE (x) != NEG);
22022           fputc ('#', stream);
22023           if (GET_CODE (x) == HIGH)
22024             {
22025               fputs (":lower16:", stream);
22026               x = XEXP (x, 0);
22027             }
22028
22029           output_addr_const (stream, x);
22030           break;
22031         }
22032     }
22033 }
22034 \f
22035 /* Target hook for printing a memory address.  */
22036 static void
22037 arm_print_operand_address (FILE *stream, rtx x)
22038 {
22039   if (TARGET_32BIT)
22040     {
22041       int is_minus = GET_CODE (x) == MINUS;
22042
22043       if (REG_P (x))
22044         asm_fprintf (stream, "[%r]", REGNO (x));
22045       else if (GET_CODE (x) == PLUS || is_minus)
22046         {
22047           rtx base = XEXP (x, 0);
22048           rtx index = XEXP (x, 1);
22049           HOST_WIDE_INT offset = 0;
22050           if (!REG_P (base)
22051               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22052             {
22053               /* Ensure that BASE is a register.  */
22054               /* (one of them must be).  */
22055               /* Also ensure the SP is not used as in index register.  */
22056               std::swap (base, index);
22057             }
22058           switch (GET_CODE (index))
22059             {
22060             case CONST_INT:
22061               offset = INTVAL (index);
22062               if (is_minus)
22063                 offset = -offset;
22064               asm_fprintf (stream, "[%r, #%wd]",
22065                            REGNO (base), offset);
22066               break;
22067
22068             case REG:
22069               asm_fprintf (stream, "[%r, %s%r]",
22070                            REGNO (base), is_minus ? "-" : "",
22071                            REGNO (index));
22072               break;
22073
22074             case MULT:
22075             case ASHIFTRT:
22076             case LSHIFTRT:
22077             case ASHIFT:
22078             case ROTATERT:
22079               {
22080                 asm_fprintf (stream, "[%r, %s%r",
22081                              REGNO (base), is_minus ? "-" : "",
22082                              REGNO (XEXP (index, 0)));
22083                 arm_print_operand (stream, index, 'S');
22084                 fputs ("]", stream);
22085                 break;
22086               }
22087
22088             default:
22089               gcc_unreachable ();
22090             }
22091         }
22092       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22093                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22094         {
22095           extern machine_mode output_memory_reference_mode;
22096
22097           gcc_assert (REG_P (XEXP (x, 0)));
22098
22099           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22100             asm_fprintf (stream, "[%r, #%s%d]!",
22101                          REGNO (XEXP (x, 0)),
22102                          GET_CODE (x) == PRE_DEC ? "-" : "",
22103                          GET_MODE_SIZE (output_memory_reference_mode));
22104           else
22105             asm_fprintf (stream, "[%r], #%s%d",
22106                          REGNO (XEXP (x, 0)),
22107                          GET_CODE (x) == POST_DEC ? "-" : "",
22108                          GET_MODE_SIZE (output_memory_reference_mode));
22109         }
22110       else if (GET_CODE (x) == PRE_MODIFY)
22111         {
22112           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22113           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22114             asm_fprintf (stream, "#%wd]!",
22115                          INTVAL (XEXP (XEXP (x, 1), 1)));
22116           else
22117             asm_fprintf (stream, "%r]!",
22118                          REGNO (XEXP (XEXP (x, 1), 1)));
22119         }
22120       else if (GET_CODE (x) == POST_MODIFY)
22121         {
22122           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22123           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22124             asm_fprintf (stream, "#%wd",
22125                          INTVAL (XEXP (XEXP (x, 1), 1)));
22126           else
22127             asm_fprintf (stream, "%r",
22128                          REGNO (XEXP (XEXP (x, 1), 1)));
22129         }
22130       else output_addr_const (stream, x);
22131     }
22132   else
22133     {
22134       if (REG_P (x))
22135         asm_fprintf (stream, "[%r]", REGNO (x));
22136       else if (GET_CODE (x) == POST_INC)
22137         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22138       else if (GET_CODE (x) == PLUS)
22139         {
22140           gcc_assert (REG_P (XEXP (x, 0)));
22141           if (CONST_INT_P (XEXP (x, 1)))
22142             asm_fprintf (stream, "[%r, #%wd]",
22143                          REGNO (XEXP (x, 0)),
22144                          INTVAL (XEXP (x, 1)));
22145           else
22146             asm_fprintf (stream, "[%r, %r]",
22147                          REGNO (XEXP (x, 0)),
22148                          REGNO (XEXP (x, 1)));
22149         }
22150       else
22151         output_addr_const (stream, x);
22152     }
22153 }
22154 \f
22155 /* Target hook for indicating whether a punctuation character for
22156    TARGET_PRINT_OPERAND is valid.  */
22157 static bool
22158 arm_print_operand_punct_valid_p (unsigned char code)
22159 {
22160   return (code == '@' || code == '|' || code == '.'
22161           || code == '(' || code == ')' || code == '#'
22162           || (TARGET_32BIT && (code == '?'))
22163           || (TARGET_THUMB2 && (code == '!'))
22164           || (TARGET_THUMB && (code == '_')));
22165 }
22166 \f
22167 /* Target hook for assembling integer objects.  The ARM version needs to
22168    handle word-sized values specially.  */
22169 static bool
22170 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22171 {
22172   machine_mode mode;
22173
22174   if (size == UNITS_PER_WORD && aligned_p)
22175     {
22176       fputs ("\t.word\t", asm_out_file);
22177       output_addr_const (asm_out_file, x);
22178
22179       /* Mark symbols as position independent.  We only do this in the
22180          .text segment, not in the .data segment.  */
22181       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22182           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22183         {
22184           /* See legitimize_pic_address for an explanation of the
22185              TARGET_VXWORKS_RTP check.  */
22186           if (!arm_pic_data_is_text_relative
22187               || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22188             fputs ("(GOT)", asm_out_file);
22189           else
22190             fputs ("(GOTOFF)", asm_out_file);
22191         }
22192       fputc ('\n', asm_out_file);
22193       return true;
22194     }
22195
22196   mode = GET_MODE (x);
22197
22198   if (arm_vector_mode_supported_p (mode))
22199     {
22200       int i, units;
22201
22202       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22203
22204       units = CONST_VECTOR_NUNITS (x);
22205       size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22206
22207       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22208         for (i = 0; i < units; i++)
22209           {
22210             rtx elt = CONST_VECTOR_ELT (x, i);
22211             assemble_integer
22212               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22213           }
22214       else
22215         for (i = 0; i < units; i++)
22216           {
22217             rtx elt = CONST_VECTOR_ELT (x, i);
22218             REAL_VALUE_TYPE rval;
22219
22220             REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22221
22222             assemble_real
22223               (rval, GET_MODE_INNER (mode),
22224               i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22225           }
22226
22227       return true;
22228     }
22229
22230   return default_assemble_integer (x, size, aligned_p);
22231 }
22232
22233 static void
22234 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22235 {
22236   section *s;
22237
22238   if (!TARGET_AAPCS_BASED)
22239     {
22240       (is_ctor ?
22241        default_named_section_asm_out_constructor
22242        : default_named_section_asm_out_destructor) (symbol, priority);
22243       return;
22244     }
22245
22246   /* Put these in the .init_array section, using a special relocation.  */
22247   if (priority != DEFAULT_INIT_PRIORITY)
22248     {
22249       char buf[18];
22250       sprintf (buf, "%s.%.5u",
22251                is_ctor ? ".init_array" : ".fini_array",
22252                priority);
22253       s = get_section (buf, SECTION_WRITE, NULL_TREE);
22254     }
22255   else if (is_ctor)
22256     s = ctors_section;
22257   else
22258     s = dtors_section;
22259
22260   switch_to_section (s);
22261   assemble_align (POINTER_SIZE);
22262   fputs ("\t.word\t", asm_out_file);
22263   output_addr_const (asm_out_file, symbol);
22264   fputs ("(target1)\n", asm_out_file);
22265 }
22266
22267 /* Add a function to the list of static constructors.  */
22268
22269 static void
22270 arm_elf_asm_constructor (rtx symbol, int priority)
22271 {
22272   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22273 }
22274
22275 /* Add a function to the list of static destructors.  */
22276
22277 static void
22278 arm_elf_asm_destructor (rtx symbol, int priority)
22279 {
22280   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22281 }
22282 \f
22283 /* A finite state machine takes care of noticing whether or not instructions
22284    can be conditionally executed, and thus decrease execution time and code
22285    size by deleting branch instructions.  The fsm is controlled by
22286    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22287
22288 /* The state of the fsm controlling condition codes are:
22289    0: normal, do nothing special
22290    1: make ASM_OUTPUT_OPCODE not output this instruction
22291    2: make ASM_OUTPUT_OPCODE not output this instruction
22292    3: make instructions conditional
22293    4: make instructions conditional
22294
22295    State transitions (state->state by whom under condition):
22296    0 -> 1 final_prescan_insn if the `target' is a label
22297    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22298    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22299    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22300    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22301           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22302    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22303           (the target insn is arm_target_insn).
22304
22305    If the jump clobbers the conditions then we use states 2 and 4.
22306
22307    A similar thing can be done with conditional return insns.
22308
22309    XXX In case the `target' is an unconditional branch, this conditionalising
22310    of the instructions always reduces code size, but not always execution
22311    time.  But then, I want to reduce the code size to somewhere near what
22312    /bin/cc produces.  */
22313
22314 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22315    instructions.  When a COND_EXEC instruction is seen the subsequent
22316    instructions are scanned so that multiple conditional instructions can be
22317    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22318    specify the length and true/false mask for the IT block.  These will be
22319    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22320
22321 /* Returns the index of the ARM condition code string in
22322    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22323    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22324
22325 enum arm_cond_code
22326 maybe_get_arm_condition_code (rtx comparison)
22327 {
22328   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22329   enum arm_cond_code code;
22330   enum rtx_code comp_code = GET_CODE (comparison);
22331
22332   if (GET_MODE_CLASS (mode) != MODE_CC)
22333     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22334                            XEXP (comparison, 1));
22335
22336   switch (mode)
22337     {
22338     case CC_DNEmode: code = ARM_NE; goto dominance;
22339     case CC_DEQmode: code = ARM_EQ; goto dominance;
22340     case CC_DGEmode: code = ARM_GE; goto dominance;
22341     case CC_DGTmode: code = ARM_GT; goto dominance;
22342     case CC_DLEmode: code = ARM_LE; goto dominance;
22343     case CC_DLTmode: code = ARM_LT; goto dominance;
22344     case CC_DGEUmode: code = ARM_CS; goto dominance;
22345     case CC_DGTUmode: code = ARM_HI; goto dominance;
22346     case CC_DLEUmode: code = ARM_LS; goto dominance;
22347     case CC_DLTUmode: code = ARM_CC;
22348
22349     dominance:
22350       if (comp_code == EQ)
22351         return ARM_INVERSE_CONDITION_CODE (code);
22352       if (comp_code == NE)
22353         return code;
22354       return ARM_NV;
22355
22356     case CC_NOOVmode:
22357       switch (comp_code)
22358         {
22359         case NE: return ARM_NE;
22360         case EQ: return ARM_EQ;
22361         case GE: return ARM_PL;
22362         case LT: return ARM_MI;
22363         default: return ARM_NV;
22364         }
22365
22366     case CC_Zmode:
22367       switch (comp_code)
22368         {
22369         case NE: return ARM_NE;
22370         case EQ: return ARM_EQ;
22371         default: return ARM_NV;
22372         }
22373
22374     case CC_Nmode:
22375       switch (comp_code)
22376         {
22377         case NE: return ARM_MI;
22378         case EQ: return ARM_PL;
22379         default: return ARM_NV;
22380         }
22381
22382     case CCFPEmode:
22383     case CCFPmode:
22384       /* We can handle all cases except UNEQ and LTGT.  */
22385       switch (comp_code)
22386         {
22387         case GE: return ARM_GE;
22388         case GT: return ARM_GT;
22389         case LE: return ARM_LS;
22390         case LT: return ARM_MI;
22391         case NE: return ARM_NE;
22392         case EQ: return ARM_EQ;
22393         case ORDERED: return ARM_VC;
22394         case UNORDERED: return ARM_VS;
22395         case UNLT: return ARM_LT;
22396         case UNLE: return ARM_LE;
22397         case UNGT: return ARM_HI;
22398         case UNGE: return ARM_PL;
22399           /* UNEQ and LTGT do not have a representation.  */
22400         case UNEQ: /* Fall through.  */
22401         case LTGT: /* Fall through.  */
22402         default: return ARM_NV;
22403         }
22404
22405     case CC_SWPmode:
22406       switch (comp_code)
22407         {
22408         case NE: return ARM_NE;
22409         case EQ: return ARM_EQ;
22410         case GE: return ARM_LE;
22411         case GT: return ARM_LT;
22412         case LE: return ARM_GE;
22413         case LT: return ARM_GT;
22414         case GEU: return ARM_LS;
22415         case GTU: return ARM_CC;
22416         case LEU: return ARM_CS;
22417         case LTU: return ARM_HI;
22418         default: return ARM_NV;
22419         }
22420
22421     case CC_Cmode:
22422       switch (comp_code)
22423         {
22424         case LTU: return ARM_CS;
22425         case GEU: return ARM_CC;
22426         default: return ARM_NV;
22427         }
22428
22429     case CC_CZmode:
22430       switch (comp_code)
22431         {
22432         case NE: return ARM_NE;
22433         case EQ: return ARM_EQ;
22434         case GEU: return ARM_CS;
22435         case GTU: return ARM_HI;
22436         case LEU: return ARM_LS;
22437         case LTU: return ARM_CC;
22438         default: return ARM_NV;
22439         }
22440
22441     case CC_NCVmode:
22442       switch (comp_code)
22443         {
22444         case GE: return ARM_GE;
22445         case LT: return ARM_LT;
22446         case GEU: return ARM_CS;
22447         case LTU: return ARM_CC;
22448         default: return ARM_NV;
22449         }
22450
22451     case CCmode:
22452       switch (comp_code)
22453         {
22454         case NE: return ARM_NE;
22455         case EQ: return ARM_EQ;
22456         case GE: return ARM_GE;
22457         case GT: return ARM_GT;
22458         case LE: return ARM_LE;
22459         case LT: return ARM_LT;
22460         case GEU: return ARM_CS;
22461         case GTU: return ARM_HI;
22462         case LEU: return ARM_LS;
22463         case LTU: return ARM_CC;
22464         default: return ARM_NV;
22465         }
22466
22467     default: gcc_unreachable ();
22468     }
22469 }
22470
22471 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22472 static enum arm_cond_code
22473 get_arm_condition_code (rtx comparison)
22474 {
22475   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22476   gcc_assert (code != ARM_NV);
22477   return code;
22478 }
22479
22480 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22481    instructions.  */
22482 void
22483 thumb2_final_prescan_insn (rtx_insn *insn)
22484 {
22485   rtx_insn *first_insn = insn;
22486   rtx body = PATTERN (insn);
22487   rtx predicate;
22488   enum arm_cond_code code;
22489   int n;
22490   int mask;
22491   int max;
22492
22493   /* max_insns_skipped in the tune was already taken into account in the
22494      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22495      just emit the IT blocks as we can.  It does not make sense to split
22496      the IT blocks.  */
22497   max = MAX_INSN_PER_IT_BLOCK;
22498
22499   /* Remove the previous insn from the count of insns to be output.  */
22500   if (arm_condexec_count)
22501       arm_condexec_count--;
22502
22503   /* Nothing to do if we are already inside a conditional block.  */
22504   if (arm_condexec_count)
22505     return;
22506
22507   if (GET_CODE (body) != COND_EXEC)
22508     return;
22509
22510   /* Conditional jumps are implemented directly.  */
22511   if (JUMP_P (insn))
22512     return;
22513
22514   predicate = COND_EXEC_TEST (body);
22515   arm_current_cc = get_arm_condition_code (predicate);
22516
22517   n = get_attr_ce_count (insn);
22518   arm_condexec_count = 1;
22519   arm_condexec_mask = (1 << n) - 1;
22520   arm_condexec_masklen = n;
22521   /* See if subsequent instructions can be combined into the same block.  */
22522   for (;;)
22523     {
22524       insn = next_nonnote_insn (insn);
22525
22526       /* Jumping into the middle of an IT block is illegal, so a label or
22527          barrier terminates the block.  */
22528       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22529         break;
22530
22531       body = PATTERN (insn);
22532       /* USE and CLOBBER aren't really insns, so just skip them.  */
22533       if (GET_CODE (body) == USE
22534           || GET_CODE (body) == CLOBBER)
22535         continue;
22536
22537       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
22538       if (GET_CODE (body) != COND_EXEC)
22539         break;
22540       /* Maximum number of conditionally executed instructions in a block.  */
22541       n = get_attr_ce_count (insn);
22542       if (arm_condexec_masklen + n > max)
22543         break;
22544
22545       predicate = COND_EXEC_TEST (body);
22546       code = get_arm_condition_code (predicate);
22547       mask = (1 << n) - 1;
22548       if (arm_current_cc == code)
22549         arm_condexec_mask |= (mask << arm_condexec_masklen);
22550       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22551         break;
22552
22553       arm_condexec_count++;
22554       arm_condexec_masklen += n;
22555
22556       /* A jump must be the last instruction in a conditional block.  */
22557       if (JUMP_P (insn))
22558         break;
22559     }
22560   /* Restore recog_data (getting the attributes of other insns can
22561      destroy this array, but final.c assumes that it remains intact
22562      across this call).  */
22563   extract_constrain_insn_cached (first_insn);
22564 }
22565
22566 void
22567 arm_final_prescan_insn (rtx_insn *insn)
22568 {
22569   /* BODY will hold the body of INSN.  */
22570   rtx body = PATTERN (insn);
22571
22572   /* This will be 1 if trying to repeat the trick, and things need to be
22573      reversed if it appears to fail.  */
22574   int reverse = 0;
22575
22576   /* If we start with a return insn, we only succeed if we find another one.  */
22577   int seeking_return = 0;
22578   enum rtx_code return_code = UNKNOWN;
22579
22580   /* START_INSN will hold the insn from where we start looking.  This is the
22581      first insn after the following code_label if REVERSE is true.  */
22582   rtx_insn *start_insn = insn;
22583
22584   /* If in state 4, check if the target branch is reached, in order to
22585      change back to state 0.  */
22586   if (arm_ccfsm_state == 4)
22587     {
22588       if (insn == arm_target_insn)
22589         {
22590           arm_target_insn = NULL;
22591           arm_ccfsm_state = 0;
22592         }
22593       return;
22594     }
22595
22596   /* If in state 3, it is possible to repeat the trick, if this insn is an
22597      unconditional branch to a label, and immediately following this branch
22598      is the previous target label which is only used once, and the label this
22599      branch jumps to is not too far off.  */
22600   if (arm_ccfsm_state == 3)
22601     {
22602       if (simplejump_p (insn))
22603         {
22604           start_insn = next_nonnote_insn (start_insn);
22605           if (BARRIER_P (start_insn))
22606             {
22607               /* XXX Isn't this always a barrier?  */
22608               start_insn = next_nonnote_insn (start_insn);
22609             }
22610           if (LABEL_P (start_insn)
22611               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22612               && LABEL_NUSES (start_insn) == 1)
22613             reverse = TRUE;
22614           else
22615             return;
22616         }
22617       else if (ANY_RETURN_P (body))
22618         {
22619           start_insn = next_nonnote_insn (start_insn);
22620           if (BARRIER_P (start_insn))
22621             start_insn = next_nonnote_insn (start_insn);
22622           if (LABEL_P (start_insn)
22623               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22624               && LABEL_NUSES (start_insn) == 1)
22625             {
22626               reverse = TRUE;
22627               seeking_return = 1;
22628               return_code = GET_CODE (body);
22629             }
22630           else
22631             return;
22632         }
22633       else
22634         return;
22635     }
22636
22637   gcc_assert (!arm_ccfsm_state || reverse);
22638   if (!JUMP_P (insn))
22639     return;
22640
22641   /* This jump might be paralleled with a clobber of the condition codes
22642      the jump should always come first */
22643   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22644     body = XVECEXP (body, 0, 0);
22645
22646   if (reverse
22647       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22648           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22649     {
22650       int insns_skipped;
22651       int fail = FALSE, succeed = FALSE;
22652       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
22653       int then_not_else = TRUE;
22654       rtx_insn *this_insn = start_insn;
22655       rtx label = 0;
22656
22657       /* Register the insn jumped to.  */
22658       if (reverse)
22659         {
22660           if (!seeking_return)
22661             label = XEXP (SET_SRC (body), 0);
22662         }
22663       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22664         label = XEXP (XEXP (SET_SRC (body), 1), 0);
22665       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22666         {
22667           label = XEXP (XEXP (SET_SRC (body), 2), 0);
22668           then_not_else = FALSE;
22669         }
22670       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22671         {
22672           seeking_return = 1;
22673           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22674         }
22675       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22676         {
22677           seeking_return = 1;
22678           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22679           then_not_else = FALSE;
22680         }
22681       else
22682         gcc_unreachable ();
22683
22684       /* See how many insns this branch skips, and what kind of insns.  If all
22685          insns are okay, and the label or unconditional branch to the same
22686          label is not too far away, succeed.  */
22687       for (insns_skipped = 0;
22688            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22689         {
22690           rtx scanbody;
22691
22692           this_insn = next_nonnote_insn (this_insn);
22693           if (!this_insn)
22694             break;
22695
22696           switch (GET_CODE (this_insn))
22697             {
22698             case CODE_LABEL:
22699               /* Succeed if it is the target label, otherwise fail since
22700                  control falls in from somewhere else.  */
22701               if (this_insn == label)
22702                 {
22703                   arm_ccfsm_state = 1;
22704                   succeed = TRUE;
22705                 }
22706               else
22707                 fail = TRUE;
22708               break;
22709
22710             case BARRIER:
22711               /* Succeed if the following insn is the target label.
22712                  Otherwise fail.
22713                  If return insns are used then the last insn in a function
22714                  will be a barrier.  */
22715               this_insn = next_nonnote_insn (this_insn);
22716               if (this_insn && this_insn == label)
22717                 {
22718                   arm_ccfsm_state = 1;
22719                   succeed = TRUE;
22720                 }
22721               else
22722                 fail = TRUE;
22723               break;
22724
22725             case CALL_INSN:
22726               /* The AAPCS says that conditional calls should not be
22727                  used since they make interworking inefficient (the
22728                  linker can't transform BL<cond> into BLX).  That's
22729                  only a problem if the machine has BLX.  */
22730               if (arm_arch5)
22731                 {
22732                   fail = TRUE;
22733                   break;
22734                 }
22735
22736               /* Succeed if the following insn is the target label, or
22737                  if the following two insns are a barrier and the
22738                  target label.  */
22739               this_insn = next_nonnote_insn (this_insn);
22740               if (this_insn && BARRIER_P (this_insn))
22741                 this_insn = next_nonnote_insn (this_insn);
22742
22743               if (this_insn && this_insn == label
22744                   && insns_skipped < max_insns_skipped)
22745                 {
22746                   arm_ccfsm_state = 1;
22747                   succeed = TRUE;
22748                 }
22749               else
22750                 fail = TRUE;
22751               break;
22752
22753             case JUMP_INSN:
22754               /* If this is an unconditional branch to the same label, succeed.
22755                  If it is to another label, do nothing.  If it is conditional,
22756                  fail.  */
22757               /* XXX Probably, the tests for SET and the PC are
22758                  unnecessary.  */
22759
22760               scanbody = PATTERN (this_insn);
22761               if (GET_CODE (scanbody) == SET
22762                   && GET_CODE (SET_DEST (scanbody)) == PC)
22763                 {
22764                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22765                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22766                     {
22767                       arm_ccfsm_state = 2;
22768                       succeed = TRUE;
22769                     }
22770                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22771                     fail = TRUE;
22772                 }
22773               /* Fail if a conditional return is undesirable (e.g. on a
22774                  StrongARM), but still allow this if optimizing for size.  */
22775               else if (GET_CODE (scanbody) == return_code
22776                        && !use_return_insn (TRUE, NULL)
22777                        && !optimize_size)
22778                 fail = TRUE;
22779               else if (GET_CODE (scanbody) == return_code)
22780                 {
22781                   arm_ccfsm_state = 2;
22782                   succeed = TRUE;
22783                 }
22784               else if (GET_CODE (scanbody) == PARALLEL)
22785                 {
22786                   switch (get_attr_conds (this_insn))
22787                     {
22788                     case CONDS_NOCOND:
22789                       break;
22790                     default:
22791                       fail = TRUE;
22792                       break;
22793                     }
22794                 }
22795               else
22796                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
22797
22798               break;
22799
22800             case INSN:
22801               /* Instructions using or affecting the condition codes make it
22802                  fail.  */
22803               scanbody = PATTERN (this_insn);
22804               if (!(GET_CODE (scanbody) == SET
22805                     || GET_CODE (scanbody) == PARALLEL)
22806                   || get_attr_conds (this_insn) != CONDS_NOCOND)
22807                 fail = TRUE;
22808               break;
22809
22810             default:
22811               break;
22812             }
22813         }
22814       if (succeed)
22815         {
22816           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22817             arm_target_label = CODE_LABEL_NUMBER (label);
22818           else
22819             {
22820               gcc_assert (seeking_return || arm_ccfsm_state == 2);
22821
22822               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22823                 {
22824                   this_insn = next_nonnote_insn (this_insn);
22825                   gcc_assert (!this_insn
22826                               || (!BARRIER_P (this_insn)
22827                                   && !LABEL_P (this_insn)));
22828                 }
22829               if (!this_insn)
22830                 {
22831                   /* Oh, dear! we ran off the end.. give up.  */
22832                   extract_constrain_insn_cached (insn);
22833                   arm_ccfsm_state = 0;
22834                   arm_target_insn = NULL;
22835                   return;
22836                 }
22837               arm_target_insn = this_insn;
22838             }
22839
22840           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22841              what it was.  */
22842           if (!reverse)
22843             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22844
22845           if (reverse || then_not_else)
22846             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22847         }
22848
22849       /* Restore recog_data (getting the attributes of other insns can
22850          destroy this array, but final.c assumes that it remains intact
22851          across this call.  */
22852       extract_constrain_insn_cached (insn);
22853     }
22854 }
22855
22856 /* Output IT instructions.  */
22857 void
22858 thumb2_asm_output_opcode (FILE * stream)
22859 {
22860   char buff[5];
22861   int n;
22862
22863   if (arm_condexec_mask)
22864     {
22865       for (n = 0; n < arm_condexec_masklen; n++)
22866         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22867       buff[n] = 0;
22868       asm_fprintf(stream, "i%s\t%s\n\t", buff,
22869                   arm_condition_codes[arm_current_cc]);
22870       arm_condexec_mask = 0;
22871     }
22872 }
22873
22874 /* Returns true if REGNO is a valid register
22875    for holding a quantity of type MODE.  */
22876 int
22877 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22878 {
22879   if (GET_MODE_CLASS (mode) == MODE_CC)
22880     return (regno == CC_REGNUM
22881             || (TARGET_HARD_FLOAT && TARGET_VFP
22882                 && regno == VFPCC_REGNUM));
22883
22884   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
22885     return false;
22886
22887   if (TARGET_THUMB1)
22888     /* For the Thumb we only allow values bigger than SImode in
22889        registers 0 - 6, so that there is always a second low
22890        register available to hold the upper part of the value.
22891        We probably we ought to ensure that the register is the
22892        start of an even numbered register pair.  */
22893     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22894
22895   if (TARGET_HARD_FLOAT && TARGET_VFP
22896       && IS_VFP_REGNUM (regno))
22897     {
22898       if (mode == SFmode || mode == SImode)
22899         return VFP_REGNO_OK_FOR_SINGLE (regno);
22900
22901       if (mode == DFmode)
22902         return VFP_REGNO_OK_FOR_DOUBLE (regno);
22903
22904       /* VFP registers can hold HFmode values, but there is no point in
22905          putting them there unless we have hardware conversion insns. */
22906       if (mode == HFmode)
22907         return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22908
22909       if (TARGET_NEON)
22910         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22911                || (VALID_NEON_QREG_MODE (mode)
22912                    && NEON_REGNO_OK_FOR_QUAD (regno))
22913                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22914                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22915                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22916                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22917                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22918
22919       return FALSE;
22920     }
22921
22922   if (TARGET_REALLY_IWMMXT)
22923     {
22924       if (IS_IWMMXT_GR_REGNUM (regno))
22925         return mode == SImode;
22926
22927       if (IS_IWMMXT_REGNUM (regno))
22928         return VALID_IWMMXT_REG_MODE (mode);
22929     }
22930
22931   /* We allow almost any value to be stored in the general registers.
22932      Restrict doubleword quantities to even register pairs in ARM state
22933      so that we can use ldrd.  Do not allow very large Neon structure
22934      opaque modes in general registers; they would use too many.  */
22935   if (regno <= LAST_ARM_REGNUM)
22936     {
22937       if (ARM_NUM_REGS (mode) > 4)
22938           return FALSE;
22939
22940       if (TARGET_THUMB2)
22941         return TRUE;
22942
22943       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
22944     }
22945
22946   if (regno == FRAME_POINTER_REGNUM
22947       || regno == ARG_POINTER_REGNUM)
22948     /* We only allow integers in the fake hard registers.  */
22949     return GET_MODE_CLASS (mode) == MODE_INT;
22950
22951   return FALSE;
22952 }
22953
22954 /* Implement MODES_TIEABLE_P.  */
22955
22956 bool
22957 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
22958 {
22959   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22960     return true;
22961
22962   /* We specifically want to allow elements of "structure" modes to
22963      be tieable to the structure.  This more general condition allows
22964      other rarer situations too.  */
22965   if (TARGET_NEON
22966       && (VALID_NEON_DREG_MODE (mode1)
22967           || VALID_NEON_QREG_MODE (mode1)
22968           || VALID_NEON_STRUCT_MODE (mode1))
22969       && (VALID_NEON_DREG_MODE (mode2)
22970           || VALID_NEON_QREG_MODE (mode2)
22971           || VALID_NEON_STRUCT_MODE (mode2)))
22972     return true;
22973
22974   return false;
22975 }
22976
22977 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22978    not used in arm mode.  */
22979
22980 enum reg_class
22981 arm_regno_class (int regno)
22982 {
22983   if (regno == PC_REGNUM)
22984     return NO_REGS;
22985
22986   if (TARGET_THUMB1)
22987     {
22988       if (regno == STACK_POINTER_REGNUM)
22989         return STACK_REG;
22990       if (regno == CC_REGNUM)
22991         return CC_REG;
22992       if (regno < 8)
22993         return LO_REGS;
22994       return HI_REGS;
22995     }
22996
22997   if (TARGET_THUMB2 && regno < 8)
22998     return LO_REGS;
22999
23000   if (   regno <= LAST_ARM_REGNUM
23001       || regno == FRAME_POINTER_REGNUM
23002       || regno == ARG_POINTER_REGNUM)
23003     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23004
23005   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23006     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23007
23008   if (IS_VFP_REGNUM (regno))
23009     {
23010       if (regno <= D7_VFP_REGNUM)
23011         return VFP_D0_D7_REGS;
23012       else if (regno <= LAST_LO_VFP_REGNUM)
23013         return VFP_LO_REGS;
23014       else
23015         return VFP_HI_REGS;
23016     }
23017
23018   if (IS_IWMMXT_REGNUM (regno))
23019     return IWMMXT_REGS;
23020
23021   if (IS_IWMMXT_GR_REGNUM (regno))
23022     return IWMMXT_GR_REGS;
23023
23024   return NO_REGS;
23025 }
23026
23027 /* Handle a special case when computing the offset
23028    of an argument from the frame pointer.  */
23029 int
23030 arm_debugger_arg_offset (int value, rtx addr)
23031 {
23032   rtx_insn *insn;
23033
23034   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23035   if (value != 0)
23036     return 0;
23037
23038   /* We can only cope with the case where the address is held in a register.  */
23039   if (!REG_P (addr))
23040     return 0;
23041
23042   /* If we are using the frame pointer to point at the argument, then
23043      an offset of 0 is correct.  */
23044   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23045     return 0;
23046
23047   /* If we are using the stack pointer to point at the
23048      argument, then an offset of 0 is correct.  */
23049   /* ??? Check this is consistent with thumb2 frame layout.  */
23050   if ((TARGET_THUMB || !frame_pointer_needed)
23051       && REGNO (addr) == SP_REGNUM)
23052     return 0;
23053
23054   /* Oh dear.  The argument is pointed to by a register rather
23055      than being held in a register, or being stored at a known
23056      offset from the frame pointer.  Since GDB only understands
23057      those two kinds of argument we must translate the address
23058      held in the register into an offset from the frame pointer.
23059      We do this by searching through the insns for the function
23060      looking to see where this register gets its value.  If the
23061      register is initialized from the frame pointer plus an offset
23062      then we are in luck and we can continue, otherwise we give up.
23063
23064      This code is exercised by producing debugging information
23065      for a function with arguments like this:
23066
23067            double func (double a, double b, int c, double d) {return d;}
23068
23069      Without this code the stab for parameter 'd' will be set to
23070      an offset of 0 from the frame pointer, rather than 8.  */
23071
23072   /* The if() statement says:
23073
23074      If the insn is a normal instruction
23075      and if the insn is setting the value in a register
23076      and if the register being set is the register holding the address of the argument
23077      and if the address is computing by an addition
23078      that involves adding to a register
23079      which is the frame pointer
23080      a constant integer
23081
23082      then...  */
23083
23084   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23085     {
23086       if (   NONJUMP_INSN_P (insn)
23087           && GET_CODE (PATTERN (insn)) == SET
23088           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23089           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23090           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23091           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23092           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23093              )
23094         {
23095           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23096
23097           break;
23098         }
23099     }
23100
23101   if (value == 0)
23102     {
23103       debug_rtx (addr);
23104       warning (0, "unable to compute real location of stacked parameter");
23105       value = 8; /* XXX magic hack */
23106     }
23107
23108   return value;
23109 }
23110 \f
23111 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
23112
23113 static const char *
23114 arm_invalid_parameter_type (const_tree t)
23115 {
23116   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23117     return N_("function parameters cannot have __fp16 type");
23118   return NULL;
23119 }
23120
23121 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
23122
23123 static const char *
23124 arm_invalid_return_type (const_tree t)
23125 {
23126   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23127     return N_("functions cannot return __fp16 type");
23128   return NULL;
23129 }
23130
23131 /* Implement TARGET_PROMOTED_TYPE.  */
23132
23133 static tree
23134 arm_promoted_type (const_tree t)
23135 {
23136   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23137     return float_type_node;
23138   return NULL_TREE;
23139 }
23140
23141 /* Implement TARGET_CONVERT_TO_TYPE.
23142    Specifically, this hook implements the peculiarity of the ARM
23143    half-precision floating-point C semantics that requires conversions between
23144    __fp16 to or from double to do an intermediate conversion to float.  */
23145
23146 static tree
23147 arm_convert_to_type (tree type, tree expr)
23148 {
23149   tree fromtype = TREE_TYPE (expr);
23150   if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23151     return NULL_TREE;
23152   if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23153       || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23154     return convert (type, convert (float_type_node, expr));
23155   return NULL_TREE;
23156 }
23157
23158 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23159    This simply adds HFmode as a supported mode; even though we don't
23160    implement arithmetic on this type directly, it's supported by
23161    optabs conversions, much the way the double-word arithmetic is
23162    special-cased in the default hook.  */
23163
23164 static bool
23165 arm_scalar_mode_supported_p (machine_mode mode)
23166 {
23167   if (mode == HFmode)
23168     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23169   else if (ALL_FIXED_POINT_MODE_P (mode))
23170     return true;
23171   else
23172     return default_scalar_mode_supported_p (mode);
23173 }
23174
23175 /* Emit code to reinterpret one Neon type as another, without altering bits.  */
23176 void
23177 neon_reinterpret (rtx dest, rtx src)
23178 {
23179   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23180 }
23181
23182 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23183    not to early-clobber SRC registers in the process.
23184
23185    We assume that the operands described by SRC and DEST represent a
23186    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23187    number of components into which the copy has been decomposed.  */
23188 void
23189 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23190 {
23191   unsigned int i;
23192
23193   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23194       || REGNO (operands[0]) < REGNO (operands[1]))
23195     {
23196       for (i = 0; i < count; i++)
23197         {
23198           operands[2 * i] = dest[i];
23199           operands[2 * i + 1] = src[i];
23200         }
23201     }
23202   else
23203     {
23204       for (i = 0; i < count; i++)
23205         {
23206           operands[2 * i] = dest[count - i - 1];
23207           operands[2 * i + 1] = src[count - i - 1];
23208         }
23209     }
23210 }
23211
23212 /* Split operands into moves from op[1] + op[2] into op[0].  */
23213
23214 void
23215 neon_split_vcombine (rtx operands[3])
23216 {
23217   unsigned int dest = REGNO (operands[0]);
23218   unsigned int src1 = REGNO (operands[1]);
23219   unsigned int src2 = REGNO (operands[2]);
23220   machine_mode halfmode = GET_MODE (operands[1]);
23221   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23222   rtx destlo, desthi;
23223
23224   if (src1 == dest && src2 == dest + halfregs)
23225     {
23226       /* No-op move.  Can't split to nothing; emit something.  */
23227       emit_note (NOTE_INSN_DELETED);
23228       return;
23229     }
23230
23231   /* Preserve register attributes for variable tracking.  */
23232   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23233   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23234                                GET_MODE_SIZE (halfmode));
23235
23236   /* Special case of reversed high/low parts.  Use VSWP.  */
23237   if (src2 == dest && src1 == dest + halfregs)
23238     {
23239       rtx x = gen_rtx_SET (destlo, operands[1]);
23240       rtx y = gen_rtx_SET (desthi, operands[2]);
23241       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23242       return;
23243     }
23244
23245   if (!reg_overlap_mentioned_p (operands[2], destlo))
23246     {
23247       /* Try to avoid unnecessary moves if part of the result
23248          is in the right place already.  */
23249       if (src1 != dest)
23250         emit_move_insn (destlo, operands[1]);
23251       if (src2 != dest + halfregs)
23252         emit_move_insn (desthi, operands[2]);
23253     }
23254   else
23255     {
23256       if (src2 != dest + halfregs)
23257         emit_move_insn (desthi, operands[2]);
23258       if (src1 != dest)
23259         emit_move_insn (destlo, operands[1]);
23260     }
23261 }
23262 \f
23263 /* Return the number (counting from 0) of
23264    the least significant set bit in MASK.  */
23265
23266 inline static int
23267 number_of_first_bit_set (unsigned mask)
23268 {
23269   return ctz_hwi (mask);
23270 }
23271
23272 /* Like emit_multi_reg_push, but allowing for a different set of
23273    registers to be described as saved.  MASK is the set of registers
23274    to be saved; REAL_REGS is the set of registers to be described as
23275    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23276
23277 static rtx_insn *
23278 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23279 {
23280   unsigned long regno;
23281   rtx par[10], tmp, reg;
23282   rtx_insn *insn;
23283   int i, j;
23284
23285   /* Build the parallel of the registers actually being stored.  */
23286   for (i = 0; mask; ++i, mask &= mask - 1)
23287     {
23288       regno = ctz_hwi (mask);
23289       reg = gen_rtx_REG (SImode, regno);
23290
23291       if (i == 0)
23292         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23293       else
23294         tmp = gen_rtx_USE (VOIDmode, reg);
23295
23296       par[i] = tmp;
23297     }
23298
23299   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23300   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23301   tmp = gen_frame_mem (BLKmode, tmp);
23302   tmp = gen_rtx_SET (tmp, par[0]);
23303   par[0] = tmp;
23304
23305   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23306   insn = emit_insn (tmp);
23307
23308   /* Always build the stack adjustment note for unwind info.  */
23309   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23310   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23311   par[0] = tmp;
23312
23313   /* Build the parallel of the registers recorded as saved for unwind.  */
23314   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23315     {
23316       regno = ctz_hwi (real_regs);
23317       reg = gen_rtx_REG (SImode, regno);
23318
23319       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23320       tmp = gen_frame_mem (SImode, tmp);
23321       tmp = gen_rtx_SET (tmp, reg);
23322       RTX_FRAME_RELATED_P (tmp) = 1;
23323       par[j + 1] = tmp;
23324     }
23325
23326   if (j == 0)
23327     tmp = par[0];
23328   else
23329     {
23330       RTX_FRAME_RELATED_P (par[0]) = 1;
23331       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23332     }
23333
23334   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23335
23336   return insn;
23337 }
23338
23339 /* Emit code to push or pop registers to or from the stack.  F is the
23340    assembly file.  MASK is the registers to pop.  */
23341 static void
23342 thumb_pop (FILE *f, unsigned long mask)
23343 {
23344   int regno;
23345   int lo_mask = mask & 0xFF;
23346   int pushed_words = 0;
23347
23348   gcc_assert (mask);
23349
23350   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23351     {
23352       /* Special case.  Do not generate a POP PC statement here, do it in
23353          thumb_exit() */
23354       thumb_exit (f, -1);
23355       return;
23356     }
23357
23358   fprintf (f, "\tpop\t{");
23359
23360   /* Look at the low registers first.  */
23361   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23362     {
23363       if (lo_mask & 1)
23364         {
23365           asm_fprintf (f, "%r", regno);
23366
23367           if ((lo_mask & ~1) != 0)
23368             fprintf (f, ", ");
23369
23370           pushed_words++;
23371         }
23372     }
23373
23374   if (mask & (1 << PC_REGNUM))
23375     {
23376       /* Catch popping the PC.  */
23377       if (TARGET_INTERWORK || TARGET_BACKTRACE
23378           || crtl->calls_eh_return)
23379         {
23380           /* The PC is never poped directly, instead
23381              it is popped into r3 and then BX is used.  */
23382           fprintf (f, "}\n");
23383
23384           thumb_exit (f, -1);
23385
23386           return;
23387         }
23388       else
23389         {
23390           if (mask & 0xFF)
23391             fprintf (f, ", ");
23392
23393           asm_fprintf (f, "%r", PC_REGNUM);
23394         }
23395     }
23396
23397   fprintf (f, "}\n");
23398 }
23399
23400 /* Generate code to return from a thumb function.
23401    If 'reg_containing_return_addr' is -1, then the return address is
23402    actually on the stack, at the stack pointer.  */
23403 static void
23404 thumb_exit (FILE *f, int reg_containing_return_addr)
23405 {
23406   unsigned regs_available_for_popping;
23407   unsigned regs_to_pop;
23408   int pops_needed;
23409   unsigned available;
23410   unsigned required;
23411   machine_mode mode;
23412   int size;
23413   int restore_a4 = FALSE;
23414
23415   /* Compute the registers we need to pop.  */
23416   regs_to_pop = 0;
23417   pops_needed = 0;
23418
23419   if (reg_containing_return_addr == -1)
23420     {
23421       regs_to_pop |= 1 << LR_REGNUM;
23422       ++pops_needed;
23423     }
23424
23425   if (TARGET_BACKTRACE)
23426     {
23427       /* Restore the (ARM) frame pointer and stack pointer.  */
23428       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23429       pops_needed += 2;
23430     }
23431
23432   /* If there is nothing to pop then just emit the BX instruction and
23433      return.  */
23434   if (pops_needed == 0)
23435     {
23436       if (crtl->calls_eh_return)
23437         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23438
23439       asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23440       return;
23441     }
23442   /* Otherwise if we are not supporting interworking and we have not created
23443      a backtrace structure and the function was not entered in ARM mode then
23444      just pop the return address straight into the PC.  */
23445   else if (!TARGET_INTERWORK
23446            && !TARGET_BACKTRACE
23447            && !is_called_in_ARM_mode (current_function_decl)
23448            && !crtl->calls_eh_return)
23449     {
23450       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23451       return;
23452     }
23453
23454   /* Find out how many of the (return) argument registers we can corrupt.  */
23455   regs_available_for_popping = 0;
23456
23457   /* If returning via __builtin_eh_return, the bottom three registers
23458      all contain information needed for the return.  */
23459   if (crtl->calls_eh_return)
23460     size = 12;
23461   else
23462     {
23463       /* If we can deduce the registers used from the function's
23464          return value.  This is more reliable that examining
23465          df_regs_ever_live_p () because that will be set if the register is
23466          ever used in the function, not just if the register is used
23467          to hold a return value.  */
23468
23469       if (crtl->return_rtx != 0)
23470         mode = GET_MODE (crtl->return_rtx);
23471       else
23472         mode = DECL_MODE (DECL_RESULT (current_function_decl));
23473
23474       size = GET_MODE_SIZE (mode);
23475
23476       if (size == 0)
23477         {
23478           /* In a void function we can use any argument register.
23479              In a function that returns a structure on the stack
23480              we can use the second and third argument registers.  */
23481           if (mode == VOIDmode)
23482             regs_available_for_popping =
23483               (1 << ARG_REGISTER (1))
23484               | (1 << ARG_REGISTER (2))
23485               | (1 << ARG_REGISTER (3));
23486           else
23487             regs_available_for_popping =
23488               (1 << ARG_REGISTER (2))
23489               | (1 << ARG_REGISTER (3));
23490         }
23491       else if (size <= 4)
23492         regs_available_for_popping =
23493           (1 << ARG_REGISTER (2))
23494           | (1 << ARG_REGISTER (3));
23495       else if (size <= 8)
23496         regs_available_for_popping =
23497           (1 << ARG_REGISTER (3));
23498     }
23499
23500   /* Match registers to be popped with registers into which we pop them.  */
23501   for (available = regs_available_for_popping,
23502        required  = regs_to_pop;
23503        required != 0 && available != 0;
23504        available &= ~(available & - available),
23505        required  &= ~(required  & - required))
23506     -- pops_needed;
23507
23508   /* If we have any popping registers left over, remove them.  */
23509   if (available > 0)
23510     regs_available_for_popping &= ~available;
23511
23512   /* Otherwise if we need another popping register we can use
23513      the fourth argument register.  */
23514   else if (pops_needed)
23515     {
23516       /* If we have not found any free argument registers and
23517          reg a4 contains the return address, we must move it.  */
23518       if (regs_available_for_popping == 0
23519           && reg_containing_return_addr == LAST_ARG_REGNUM)
23520         {
23521           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23522           reg_containing_return_addr = LR_REGNUM;
23523         }
23524       else if (size > 12)
23525         {
23526           /* Register a4 is being used to hold part of the return value,
23527              but we have dire need of a free, low register.  */
23528           restore_a4 = TRUE;
23529
23530           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23531         }
23532
23533       if (reg_containing_return_addr != LAST_ARG_REGNUM)
23534         {
23535           /* The fourth argument register is available.  */
23536           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23537
23538           --pops_needed;
23539         }
23540     }
23541
23542   /* Pop as many registers as we can.  */
23543   thumb_pop (f, regs_available_for_popping);
23544
23545   /* Process the registers we popped.  */
23546   if (reg_containing_return_addr == -1)
23547     {
23548       /* The return address was popped into the lowest numbered register.  */
23549       regs_to_pop &= ~(1 << LR_REGNUM);
23550
23551       reg_containing_return_addr =
23552         number_of_first_bit_set (regs_available_for_popping);
23553
23554       /* Remove this register for the mask of available registers, so that
23555          the return address will not be corrupted by further pops.  */
23556       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23557     }
23558
23559   /* If we popped other registers then handle them here.  */
23560   if (regs_available_for_popping)
23561     {
23562       int frame_pointer;
23563
23564       /* Work out which register currently contains the frame pointer.  */
23565       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23566
23567       /* Move it into the correct place.  */
23568       asm_fprintf (f, "\tmov\t%r, %r\n",
23569                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23570
23571       /* (Temporarily) remove it from the mask of popped registers.  */
23572       regs_available_for_popping &= ~(1 << frame_pointer);
23573       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23574
23575       if (regs_available_for_popping)
23576         {
23577           int stack_pointer;
23578
23579           /* We popped the stack pointer as well,
23580              find the register that contains it.  */
23581           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23582
23583           /* Move it into the stack register.  */
23584           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23585
23586           /* At this point we have popped all necessary registers, so
23587              do not worry about restoring regs_available_for_popping
23588              to its correct value:
23589
23590              assert (pops_needed == 0)
23591              assert (regs_available_for_popping == (1 << frame_pointer))
23592              assert (regs_to_pop == (1 << STACK_POINTER))  */
23593         }
23594       else
23595         {
23596           /* Since we have just move the popped value into the frame
23597              pointer, the popping register is available for reuse, and
23598              we know that we still have the stack pointer left to pop.  */
23599           regs_available_for_popping |= (1 << frame_pointer);
23600         }
23601     }
23602
23603   /* If we still have registers left on the stack, but we no longer have
23604      any registers into which we can pop them, then we must move the return
23605      address into the link register and make available the register that
23606      contained it.  */
23607   if (regs_available_for_popping == 0 && pops_needed > 0)
23608     {
23609       regs_available_for_popping |= 1 << reg_containing_return_addr;
23610
23611       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23612                    reg_containing_return_addr);
23613
23614       reg_containing_return_addr = LR_REGNUM;
23615     }
23616
23617   /* If we have registers left on the stack then pop some more.
23618      We know that at most we will want to pop FP and SP.  */
23619   if (pops_needed > 0)
23620     {
23621       int  popped_into;
23622       int  move_to;
23623
23624       thumb_pop (f, regs_available_for_popping);
23625
23626       /* We have popped either FP or SP.
23627          Move whichever one it is into the correct register.  */
23628       popped_into = number_of_first_bit_set (regs_available_for_popping);
23629       move_to     = number_of_first_bit_set (regs_to_pop);
23630
23631       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23632
23633       regs_to_pop &= ~(1 << move_to);
23634
23635       --pops_needed;
23636     }
23637
23638   /* If we still have not popped everything then we must have only
23639      had one register available to us and we are now popping the SP.  */
23640   if (pops_needed > 0)
23641     {
23642       int  popped_into;
23643
23644       thumb_pop (f, regs_available_for_popping);
23645
23646       popped_into = number_of_first_bit_set (regs_available_for_popping);
23647
23648       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23649       /*
23650         assert (regs_to_pop == (1 << STACK_POINTER))
23651         assert (pops_needed == 1)
23652       */
23653     }
23654
23655   /* If necessary restore the a4 register.  */
23656   if (restore_a4)
23657     {
23658       if (reg_containing_return_addr != LR_REGNUM)
23659         {
23660           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23661           reg_containing_return_addr = LR_REGNUM;
23662         }
23663
23664       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23665     }
23666
23667   if (crtl->calls_eh_return)
23668     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23669
23670   /* Return to caller.  */
23671   asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23672 }
23673 \f
23674 /* Scan INSN just before assembler is output for it.
23675    For Thumb-1, we track the status of the condition codes; this
23676    information is used in the cbranchsi4_insn pattern.  */
23677 void
23678 thumb1_final_prescan_insn (rtx_insn *insn)
23679 {
23680   if (flag_print_asm_name)
23681     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23682                  INSN_ADDRESSES (INSN_UID (insn)));
23683   /* Don't overwrite the previous setter when we get to a cbranch.  */
23684   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23685     {
23686       enum attr_conds conds;
23687
23688       if (cfun->machine->thumb1_cc_insn)
23689         {
23690           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23691               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23692             CC_STATUS_INIT;
23693         }
23694       conds = get_attr_conds (insn);
23695       if (conds == CONDS_SET)
23696         {
23697           rtx set = single_set (insn);
23698           cfun->machine->thumb1_cc_insn = insn;
23699           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23700           cfun->machine->thumb1_cc_op1 = const0_rtx;
23701           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23702           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23703             {
23704               rtx src1 = XEXP (SET_SRC (set), 1);
23705               if (src1 == const0_rtx)
23706                 cfun->machine->thumb1_cc_mode = CCmode;
23707             }
23708           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23709             {
23710               /* Record the src register operand instead of dest because
23711                  cprop_hardreg pass propagates src.  */
23712               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23713             }
23714         }
23715       else if (conds != CONDS_NOCOND)
23716         cfun->machine->thumb1_cc_insn = NULL_RTX;
23717     }
23718
23719     /* Check if unexpected far jump is used.  */
23720     if (cfun->machine->lr_save_eliminated
23721         && get_attr_far_jump (insn) == FAR_JUMP_YES)
23722       internal_error("Unexpected thumb1 far jump");
23723 }
23724
23725 int
23726 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23727 {
23728   unsigned HOST_WIDE_INT mask = 0xff;
23729   int i;
23730
23731   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23732   if (val == 0) /* XXX */
23733     return 0;
23734
23735   for (i = 0; i < 25; i++)
23736     if ((val & (mask << i)) == val)
23737       return 1;
23738
23739   return 0;
23740 }
23741
23742 /* Returns nonzero if the current function contains,
23743    or might contain a far jump.  */
23744 static int
23745 thumb_far_jump_used_p (void)
23746 {
23747   rtx_insn *insn;
23748   bool far_jump = false;
23749   unsigned int func_size = 0;
23750
23751   /* This test is only important for leaf functions.  */
23752   /* assert (!leaf_function_p ()); */
23753
23754   /* If we have already decided that far jumps may be used,
23755      do not bother checking again, and always return true even if
23756      it turns out that they are not being used.  Once we have made
23757      the decision that far jumps are present (and that hence the link
23758      register will be pushed onto the stack) we cannot go back on it.  */
23759   if (cfun->machine->far_jump_used)
23760     return 1;
23761
23762   /* If this function is not being called from the prologue/epilogue
23763      generation code then it must be being called from the
23764      INITIAL_ELIMINATION_OFFSET macro.  */
23765   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23766     {
23767       /* In this case we know that we are being asked about the elimination
23768          of the arg pointer register.  If that register is not being used,
23769          then there are no arguments on the stack, and we do not have to
23770          worry that a far jump might force the prologue to push the link
23771          register, changing the stack offsets.  In this case we can just
23772          return false, since the presence of far jumps in the function will
23773          not affect stack offsets.
23774
23775          If the arg pointer is live (or if it was live, but has now been
23776          eliminated and so set to dead) then we do have to test to see if
23777          the function might contain a far jump.  This test can lead to some
23778          false negatives, since before reload is completed, then length of
23779          branch instructions is not known, so gcc defaults to returning their
23780          longest length, which in turn sets the far jump attribute to true.
23781
23782          A false negative will not result in bad code being generated, but it
23783          will result in a needless push and pop of the link register.  We
23784          hope that this does not occur too often.
23785
23786          If we need doubleword stack alignment this could affect the other
23787          elimination offsets so we can't risk getting it wrong.  */
23788       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23789         cfun->machine->arg_pointer_live = 1;
23790       else if (!cfun->machine->arg_pointer_live)
23791         return 0;
23792     }
23793
23794   /* We should not change far_jump_used during or after reload, as there is
23795      no chance to change stack frame layout.  */
23796   if (reload_in_progress || reload_completed)
23797     return 0;
23798
23799   /* Check to see if the function contains a branch
23800      insn with the far jump attribute set.  */
23801   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23802     {
23803       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23804         {
23805           far_jump = true;
23806         }
23807       func_size += get_attr_length (insn);
23808     }
23809
23810   /* Attribute far_jump will always be true for thumb1 before
23811      shorten_branch pass.  So checking far_jump attribute before
23812      shorten_branch isn't much useful.
23813
23814      Following heuristic tries to estimate more accurately if a far jump
23815      may finally be used.  The heuristic is very conservative as there is
23816      no chance to roll-back the decision of not to use far jump.
23817
23818      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
23819      2-byte insn is associated with a 4 byte constant pool.  Using
23820      function size 2048/3 as the threshold is conservative enough.  */
23821   if (far_jump)
23822     {
23823       if ((func_size * 3) >= 2048)
23824         {
23825           /* Record the fact that we have decided that
23826              the function does use far jumps.  */
23827           cfun->machine->far_jump_used = 1;
23828           return 1;
23829         }
23830     }
23831
23832   return 0;
23833 }
23834
23835 /* Return nonzero if FUNC must be entered in ARM mode.  */
23836 int
23837 is_called_in_ARM_mode (tree func)
23838 {
23839   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23840
23841   /* Ignore the problem about functions whose address is taken.  */
23842   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23843     return TRUE;
23844
23845 #ifdef ARM_PE
23846   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23847 #else
23848   return FALSE;
23849 #endif
23850 }
23851
23852 /* Given the stack offsets and register mask in OFFSETS, decide how
23853    many additional registers to push instead of subtracting a constant
23854    from SP.  For epilogues the principle is the same except we use pop.
23855    FOR_PROLOGUE indicates which we're generating.  */
23856 static int
23857 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23858 {
23859   HOST_WIDE_INT amount;
23860   unsigned long live_regs_mask = offsets->saved_regs_mask;
23861   /* Extract a mask of the ones we can give to the Thumb's push/pop
23862      instruction.  */
23863   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23864   /* Then count how many other high registers will need to be pushed.  */
23865   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23866   int n_free, reg_base, size;
23867
23868   if (!for_prologue && frame_pointer_needed)
23869     amount = offsets->locals_base - offsets->saved_regs;
23870   else
23871     amount = offsets->outgoing_args - offsets->saved_regs;
23872
23873   /* If the stack frame size is 512 exactly, we can save one load
23874      instruction, which should make this a win even when optimizing
23875      for speed.  */
23876   if (!optimize_size && amount != 512)
23877     return 0;
23878
23879   /* Can't do this if there are high registers to push.  */
23880   if (high_regs_pushed != 0)
23881     return 0;
23882
23883   /* Shouldn't do it in the prologue if no registers would normally
23884      be pushed at all.  In the epilogue, also allow it if we'll have
23885      a pop insn for the PC.  */
23886   if  (l_mask == 0
23887        && (for_prologue
23888            || TARGET_BACKTRACE
23889            || (live_regs_mask & 1 << LR_REGNUM) == 0
23890            || TARGET_INTERWORK
23891            || crtl->args.pretend_args_size != 0))
23892     return 0;
23893
23894   /* Don't do this if thumb_expand_prologue wants to emit instructions
23895      between the push and the stack frame allocation.  */
23896   if (for_prologue
23897       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
23898           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
23899     return 0;
23900
23901   reg_base = 0;
23902   n_free = 0;
23903   if (!for_prologue)
23904     {
23905       size = arm_size_return_regs ();
23906       reg_base = ARM_NUM_INTS (size);
23907       live_regs_mask >>= reg_base;
23908     }
23909
23910   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
23911          && (for_prologue || call_used_regs[reg_base + n_free]))
23912     {
23913       live_regs_mask >>= 1;
23914       n_free++;
23915     }
23916
23917   if (n_free == 0)
23918     return 0;
23919   gcc_assert (amount / 4 * 4 == amount);
23920
23921   if (amount >= 512 && (amount - n_free * 4) < 512)
23922     return (amount - 508) / 4;
23923   if (amount <= n_free * 4)
23924     return amount / 4;
23925   return 0;
23926 }
23927
23928 /* The bits which aren't usefully expanded as rtl.  */
23929 const char *
23930 thumb1_unexpanded_epilogue (void)
23931 {
23932   arm_stack_offsets *offsets;
23933   int regno;
23934   unsigned long live_regs_mask = 0;
23935   int high_regs_pushed = 0;
23936   int extra_pop;
23937   int had_to_push_lr;
23938   int size;
23939
23940   if (cfun->machine->return_used_this_function != 0)
23941     return "";
23942
23943   if (IS_NAKED (arm_current_func_type ()))
23944     return "";
23945
23946   offsets = arm_get_frame_offsets ();
23947   live_regs_mask = offsets->saved_regs_mask;
23948   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23949
23950   /* If we can deduce the registers used from the function's return value.
23951      This is more reliable that examining df_regs_ever_live_p () because that
23952      will be set if the register is ever used in the function, not just if
23953      the register is used to hold a return value.  */
23954   size = arm_size_return_regs ();
23955
23956   extra_pop = thumb1_extra_regs_pushed (offsets, false);
23957   if (extra_pop > 0)
23958     {
23959       unsigned long extra_mask = (1 << extra_pop) - 1;
23960       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
23961     }
23962
23963   /* The prolog may have pushed some high registers to use as
23964      work registers.  e.g. the testsuite file:
23965      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
23966      compiles to produce:
23967         push    {r4, r5, r6, r7, lr}
23968         mov     r7, r9
23969         mov     r6, r8
23970         push    {r6, r7}
23971      as part of the prolog.  We have to undo that pushing here.  */
23972
23973   if (high_regs_pushed)
23974     {
23975       unsigned long mask = live_regs_mask & 0xff;
23976       int next_hi_reg;
23977
23978       /* The available low registers depend on the size of the value we are
23979          returning.  */
23980       if (size <= 12)
23981         mask |=  1 << 3;
23982       if (size <= 8)
23983         mask |= 1 << 2;
23984
23985       if (mask == 0)
23986         /* Oh dear!  We have no low registers into which we can pop
23987            high registers!  */
23988         internal_error
23989           ("no low registers available for popping high registers");
23990
23991       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
23992         if (live_regs_mask & (1 << next_hi_reg))
23993           break;
23994
23995       while (high_regs_pushed)
23996         {
23997           /* Find lo register(s) into which the high register(s) can
23998              be popped.  */
23999           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24000             {
24001               if (mask & (1 << regno))
24002                 high_regs_pushed--;
24003               if (high_regs_pushed == 0)
24004                 break;
24005             }
24006
24007           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24008
24009           /* Pop the values into the low register(s).  */
24010           thumb_pop (asm_out_file, mask);
24011
24012           /* Move the value(s) into the high registers.  */
24013           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24014             {
24015               if (mask & (1 << regno))
24016                 {
24017                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24018                                regno);
24019
24020                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24021                     if (live_regs_mask & (1 << next_hi_reg))
24022                       break;
24023                 }
24024             }
24025         }
24026       live_regs_mask &= ~0x0f00;
24027     }
24028
24029   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24030   live_regs_mask &= 0xff;
24031
24032   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24033     {
24034       /* Pop the return address into the PC.  */
24035       if (had_to_push_lr)
24036         live_regs_mask |= 1 << PC_REGNUM;
24037
24038       /* Either no argument registers were pushed or a backtrace
24039          structure was created which includes an adjusted stack
24040          pointer, so just pop everything.  */
24041       if (live_regs_mask)
24042         thumb_pop (asm_out_file, live_regs_mask);
24043
24044       /* We have either just popped the return address into the
24045          PC or it is was kept in LR for the entire function.
24046          Note that thumb_pop has already called thumb_exit if the
24047          PC was in the list.  */
24048       if (!had_to_push_lr)
24049         thumb_exit (asm_out_file, LR_REGNUM);
24050     }
24051   else
24052     {
24053       /* Pop everything but the return address.  */
24054       if (live_regs_mask)
24055         thumb_pop (asm_out_file, live_regs_mask);
24056
24057       if (had_to_push_lr)
24058         {
24059           if (size > 12)
24060             {
24061               /* We have no free low regs, so save one.  */
24062               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24063                            LAST_ARG_REGNUM);
24064             }
24065
24066           /* Get the return address into a temporary register.  */
24067           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24068
24069           if (size > 12)
24070             {
24071               /* Move the return address to lr.  */
24072               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24073                            LAST_ARG_REGNUM);
24074               /* Restore the low register.  */
24075               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24076                            IP_REGNUM);
24077               regno = LR_REGNUM;
24078             }
24079           else
24080             regno = LAST_ARG_REGNUM;
24081         }
24082       else
24083         regno = LR_REGNUM;
24084
24085       /* Remove the argument registers that were pushed onto the stack.  */
24086       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24087                    SP_REGNUM, SP_REGNUM,
24088                    crtl->args.pretend_args_size);
24089
24090       thumb_exit (asm_out_file, regno);
24091     }
24092
24093   return "";
24094 }
24095
24096 /* Functions to save and restore machine-specific function data.  */
24097 static struct machine_function *
24098 arm_init_machine_status (void)
24099 {
24100   struct machine_function *machine;
24101   machine = ggc_cleared_alloc<machine_function> ();
24102
24103 #if ARM_FT_UNKNOWN != 0
24104   machine->func_type = ARM_FT_UNKNOWN;
24105 #endif
24106   return machine;
24107 }
24108
24109 /* Return an RTX indicating where the return address to the
24110    calling function can be found.  */
24111 rtx
24112 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24113 {
24114   if (count != 0)
24115     return NULL_RTX;
24116
24117   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24118 }
24119
24120 /* Do anything needed before RTL is emitted for each function.  */
24121 void
24122 arm_init_expanders (void)
24123 {
24124   /* Arrange to initialize and mark the machine per-function status.  */
24125   init_machine_status = arm_init_machine_status;
24126
24127   /* This is to stop the combine pass optimizing away the alignment
24128      adjustment of va_arg.  */
24129   /* ??? It is claimed that this should not be necessary.  */
24130   if (cfun)
24131     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24132 }
24133
24134
24135 /* Like arm_compute_initial_elimination offset.  Simpler because there
24136    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24137    to point at the base of the local variables after static stack
24138    space for a function has been allocated.  */
24139
24140 HOST_WIDE_INT
24141 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24142 {
24143   arm_stack_offsets *offsets;
24144
24145   offsets = arm_get_frame_offsets ();
24146
24147   switch (from)
24148     {
24149     case ARG_POINTER_REGNUM:
24150       switch (to)
24151         {
24152         case STACK_POINTER_REGNUM:
24153           return offsets->outgoing_args - offsets->saved_args;
24154
24155         case FRAME_POINTER_REGNUM:
24156           return offsets->soft_frame - offsets->saved_args;
24157
24158         case ARM_HARD_FRAME_POINTER_REGNUM:
24159           return offsets->saved_regs - offsets->saved_args;
24160
24161         case THUMB_HARD_FRAME_POINTER_REGNUM:
24162           return offsets->locals_base - offsets->saved_args;
24163
24164         default:
24165           gcc_unreachable ();
24166         }
24167       break;
24168
24169     case FRAME_POINTER_REGNUM:
24170       switch (to)
24171         {
24172         case STACK_POINTER_REGNUM:
24173           return offsets->outgoing_args - offsets->soft_frame;
24174
24175         case ARM_HARD_FRAME_POINTER_REGNUM:
24176           return offsets->saved_regs - offsets->soft_frame;
24177
24178         case THUMB_HARD_FRAME_POINTER_REGNUM:
24179           return offsets->locals_base - offsets->soft_frame;
24180
24181         default:
24182           gcc_unreachable ();
24183         }
24184       break;
24185
24186     default:
24187       gcc_unreachable ();
24188     }
24189 }
24190
24191 /* Generate the function's prologue.  */
24192
24193 void
24194 thumb1_expand_prologue (void)
24195 {
24196   rtx_insn *insn;
24197
24198   HOST_WIDE_INT amount;
24199   arm_stack_offsets *offsets;
24200   unsigned long func_type;
24201   int regno;
24202   unsigned long live_regs_mask;
24203   unsigned long l_mask;
24204   unsigned high_regs_pushed = 0;
24205
24206   func_type = arm_current_func_type ();
24207
24208   /* Naked functions don't have prologues.  */
24209   if (IS_NAKED (func_type))
24210     return;
24211
24212   if (IS_INTERRUPT (func_type))
24213     {
24214       error ("interrupt Service Routines cannot be coded in Thumb mode");
24215       return;
24216     }
24217
24218   if (is_called_in_ARM_mode (current_function_decl))
24219     emit_insn (gen_prologue_thumb1_interwork ());
24220
24221   offsets = arm_get_frame_offsets ();
24222   live_regs_mask = offsets->saved_regs_mask;
24223
24224   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24225   l_mask = live_regs_mask & 0x40ff;
24226   /* Then count how many other high registers will need to be pushed.  */
24227   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24228
24229   if (crtl->args.pretend_args_size)
24230     {
24231       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24232
24233       if (cfun->machine->uses_anonymous_args)
24234         {
24235           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24236           unsigned long mask;
24237
24238           mask = 1ul << (LAST_ARG_REGNUM + 1);
24239           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24240
24241           insn = thumb1_emit_multi_reg_push (mask, 0);
24242         }
24243       else
24244         {
24245           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24246                                         stack_pointer_rtx, x));
24247         }
24248       RTX_FRAME_RELATED_P (insn) = 1;
24249     }
24250
24251   if (TARGET_BACKTRACE)
24252     {
24253       HOST_WIDE_INT offset = 0;
24254       unsigned work_register;
24255       rtx work_reg, x, arm_hfp_rtx;
24256
24257       /* We have been asked to create a stack backtrace structure.
24258          The code looks like this:
24259
24260          0   .align 2
24261          0   func:
24262          0     sub   SP, #16         Reserve space for 4 registers.
24263          2     push  {R7}            Push low registers.
24264          4     add   R7, SP, #20     Get the stack pointer before the push.
24265          6     str   R7, [SP, #8]    Store the stack pointer
24266                                         (before reserving the space).
24267          8     mov   R7, PC          Get hold of the start of this code + 12.
24268         10     str   R7, [SP, #16]   Store it.
24269         12     mov   R7, FP          Get hold of the current frame pointer.
24270         14     str   R7, [SP, #4]    Store it.
24271         16     mov   R7, LR          Get hold of the current return address.
24272         18     str   R7, [SP, #12]   Store it.
24273         20     add   R7, SP, #16     Point at the start of the
24274                                         backtrace structure.
24275         22     mov   FP, R7          Put this value into the frame pointer.  */
24276
24277       work_register = thumb_find_work_register (live_regs_mask);
24278       work_reg = gen_rtx_REG (SImode, work_register);
24279       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24280
24281       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24282                                     stack_pointer_rtx, GEN_INT (-16)));
24283       RTX_FRAME_RELATED_P (insn) = 1;
24284
24285       if (l_mask)
24286         {
24287           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24288           RTX_FRAME_RELATED_P (insn) = 1;
24289
24290           offset = bit_count (l_mask) * UNITS_PER_WORD;
24291         }
24292
24293       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24294       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24295
24296       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24297       x = gen_frame_mem (SImode, x);
24298       emit_move_insn (x, work_reg);
24299
24300       /* Make sure that the instruction fetching the PC is in the right place
24301          to calculate "start of backtrace creation code + 12".  */
24302       /* ??? The stores using the common WORK_REG ought to be enough to
24303          prevent the scheduler from doing anything weird.  Failing that
24304          we could always move all of the following into an UNSPEC_VOLATILE.  */
24305       if (l_mask)
24306         {
24307           x = gen_rtx_REG (SImode, PC_REGNUM);
24308           emit_move_insn (work_reg, x);
24309
24310           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24311           x = gen_frame_mem (SImode, x);
24312           emit_move_insn (x, work_reg);
24313
24314           emit_move_insn (work_reg, arm_hfp_rtx);
24315
24316           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24317           x = gen_frame_mem (SImode, x);
24318           emit_move_insn (x, work_reg);
24319         }
24320       else
24321         {
24322           emit_move_insn (work_reg, arm_hfp_rtx);
24323
24324           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24325           x = gen_frame_mem (SImode, x);
24326           emit_move_insn (x, work_reg);
24327
24328           x = gen_rtx_REG (SImode, PC_REGNUM);
24329           emit_move_insn (work_reg, x);
24330
24331           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24332           x = gen_frame_mem (SImode, x);
24333           emit_move_insn (x, work_reg);
24334         }
24335
24336       x = gen_rtx_REG (SImode, LR_REGNUM);
24337       emit_move_insn (work_reg, x);
24338
24339       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24340       x = gen_frame_mem (SImode, x);
24341       emit_move_insn (x, work_reg);
24342
24343       x = GEN_INT (offset + 12);
24344       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24345
24346       emit_move_insn (arm_hfp_rtx, work_reg);
24347     }
24348   /* Optimization:  If we are not pushing any low registers but we are going
24349      to push some high registers then delay our first push.  This will just
24350      be a push of LR and we can combine it with the push of the first high
24351      register.  */
24352   else if ((l_mask & 0xff) != 0
24353            || (high_regs_pushed == 0 && l_mask))
24354     {
24355       unsigned long mask = l_mask;
24356       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24357       insn = thumb1_emit_multi_reg_push (mask, mask);
24358       RTX_FRAME_RELATED_P (insn) = 1;
24359     }
24360
24361   if (high_regs_pushed)
24362     {
24363       unsigned pushable_regs;
24364       unsigned next_hi_reg;
24365       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24366                                                  : crtl->args.info.nregs;
24367       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24368
24369       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24370         if (live_regs_mask & (1 << next_hi_reg))
24371           break;
24372
24373       /* Here we need to mask out registers used for passing arguments
24374          even if they can be pushed.  This is to avoid using them to stash the high
24375          registers.  Such kind of stash may clobber the use of arguments.  */
24376       pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24377
24378       if (pushable_regs == 0)
24379         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24380
24381       while (high_regs_pushed > 0)
24382         {
24383           unsigned long real_regs_mask = 0;
24384
24385           for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24386             {
24387               if (pushable_regs & (1 << regno))
24388                 {
24389                   emit_move_insn (gen_rtx_REG (SImode, regno),
24390                                   gen_rtx_REG (SImode, next_hi_reg));
24391
24392                   high_regs_pushed --;
24393                   real_regs_mask |= (1 << next_hi_reg);
24394
24395                   if (high_regs_pushed)
24396                     {
24397                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24398                            next_hi_reg --)
24399                         if (live_regs_mask & (1 << next_hi_reg))
24400                           break;
24401                     }
24402                   else
24403                     {
24404                       pushable_regs &= ~((1 << regno) - 1);
24405                       break;
24406                     }
24407                 }
24408             }
24409
24410           /* If we had to find a work register and we have not yet
24411              saved the LR then add it to the list of regs to push.  */
24412           if (l_mask == (1 << LR_REGNUM))
24413             {
24414               pushable_regs |= l_mask;
24415               real_regs_mask |= l_mask;
24416               l_mask = 0;
24417             }
24418
24419           insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24420           RTX_FRAME_RELATED_P (insn) = 1;
24421         }
24422     }
24423
24424   /* Load the pic register before setting the frame pointer,
24425      so we can use r7 as a temporary work register.  */
24426   if (flag_pic && arm_pic_register != INVALID_REGNUM)
24427     arm_load_pic_register (live_regs_mask);
24428
24429   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24430     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24431                     stack_pointer_rtx);
24432
24433   if (flag_stack_usage_info)
24434     current_function_static_stack_size
24435       = offsets->outgoing_args - offsets->saved_args;
24436
24437   amount = offsets->outgoing_args - offsets->saved_regs;
24438   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24439   if (amount)
24440     {
24441       if (amount < 512)
24442         {
24443           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24444                                         GEN_INT (- amount)));
24445           RTX_FRAME_RELATED_P (insn) = 1;
24446         }
24447       else
24448         {
24449           rtx reg, dwarf;
24450
24451           /* The stack decrement is too big for an immediate value in a single
24452              insn.  In theory we could issue multiple subtracts, but after
24453              three of them it becomes more space efficient to place the full
24454              value in the constant pool and load into a register.  (Also the
24455              ARM debugger really likes to see only one stack decrement per
24456              function).  So instead we look for a scratch register into which
24457              we can load the decrement, and then we subtract this from the
24458              stack pointer.  Unfortunately on the thumb the only available
24459              scratch registers are the argument registers, and we cannot use
24460              these as they may hold arguments to the function.  Instead we
24461              attempt to locate a call preserved register which is used by this
24462              function.  If we can find one, then we know that it will have
24463              been pushed at the start of the prologue and so we can corrupt
24464              it now.  */
24465           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24466             if (live_regs_mask & (1 << regno))
24467               break;
24468
24469           gcc_assert(regno <= LAST_LO_REGNUM);
24470
24471           reg = gen_rtx_REG (SImode, regno);
24472
24473           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24474
24475           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24476                                         stack_pointer_rtx, reg));
24477
24478           dwarf = gen_rtx_SET (stack_pointer_rtx,
24479                                plus_constant (Pmode, stack_pointer_rtx,
24480                                               -amount));
24481           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24482           RTX_FRAME_RELATED_P (insn) = 1;
24483         }
24484     }
24485
24486   if (frame_pointer_needed)
24487     thumb_set_frame_pointer (offsets);
24488
24489   /* If we are profiling, make sure no instructions are scheduled before
24490      the call to mcount.  Similarly if the user has requested no
24491      scheduling in the prolog.  Similarly if we want non-call exceptions
24492      using the EABI unwinder, to prevent faulting instructions from being
24493      swapped with a stack adjustment.  */
24494   if (crtl->profile || !TARGET_SCHED_PROLOG
24495       || (arm_except_unwind_info (&global_options) == UI_TARGET
24496           && cfun->can_throw_non_call_exceptions))
24497     emit_insn (gen_blockage ());
24498
24499   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24500   if (live_regs_mask & 0xff)
24501     cfun->machine->lr_save_eliminated = 0;
24502 }
24503
24504 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24505    POP instruction can be generated.  LR should be replaced by PC.  All
24506    the checks required are already done by  USE_RETURN_INSN ().  Hence,
24507    all we really need to check here is if single register is to be
24508    returned, or multiple register return.  */
24509 void
24510 thumb2_expand_return (bool simple_return)
24511 {
24512   int i, num_regs;
24513   unsigned long saved_regs_mask;
24514   arm_stack_offsets *offsets;
24515
24516   offsets = arm_get_frame_offsets ();
24517   saved_regs_mask = offsets->saved_regs_mask;
24518
24519   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24520     if (saved_regs_mask & (1 << i))
24521       num_regs++;
24522
24523   if (!simple_return && saved_regs_mask)
24524     {
24525       if (num_regs == 1)
24526         {
24527           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24528           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24529           rtx addr = gen_rtx_MEM (SImode,
24530                                   gen_rtx_POST_INC (SImode,
24531                                                     stack_pointer_rtx));
24532           set_mem_alias_set (addr, get_frame_alias_set ());
24533           XVECEXP (par, 0, 0) = ret_rtx;
24534           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
24535           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24536           emit_jump_insn (par);
24537         }
24538       else
24539         {
24540           saved_regs_mask &= ~ (1 << LR_REGNUM);
24541           saved_regs_mask |=   (1 << PC_REGNUM);
24542           arm_emit_multi_reg_pop (saved_regs_mask);
24543         }
24544     }
24545   else
24546     {
24547       emit_jump_insn (simple_return_rtx);
24548     }
24549 }
24550
24551 void
24552 thumb1_expand_epilogue (void)
24553 {
24554   HOST_WIDE_INT amount;
24555   arm_stack_offsets *offsets;
24556   int regno;
24557
24558   /* Naked functions don't have prologues.  */
24559   if (IS_NAKED (arm_current_func_type ()))
24560     return;
24561
24562   offsets = arm_get_frame_offsets ();
24563   amount = offsets->outgoing_args - offsets->saved_regs;
24564
24565   if (frame_pointer_needed)
24566     {
24567       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24568       amount = offsets->locals_base - offsets->saved_regs;
24569     }
24570   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24571
24572   gcc_assert (amount >= 0);
24573   if (amount)
24574     {
24575       emit_insn (gen_blockage ());
24576
24577       if (amount < 512)
24578         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24579                                GEN_INT (amount)));
24580       else
24581         {
24582           /* r3 is always free in the epilogue.  */
24583           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24584
24585           emit_insn (gen_movsi (reg, GEN_INT (amount)));
24586           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24587         }
24588     }
24589
24590   /* Emit a USE (stack_pointer_rtx), so that
24591      the stack adjustment will not be deleted.  */
24592   emit_insn (gen_force_register_use (stack_pointer_rtx));
24593
24594   if (crtl->profile || !TARGET_SCHED_PROLOG)
24595     emit_insn (gen_blockage ());
24596
24597   /* Emit a clobber for each insn that will be restored in the epilogue,
24598      so that flow2 will get register lifetimes correct.  */
24599   for (regno = 0; regno < 13; regno++)
24600     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24601       emit_clobber (gen_rtx_REG (SImode, regno));
24602
24603   if (! df_regs_ever_live_p (LR_REGNUM))
24604     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24605 }
24606
24607 /* Epilogue code for APCS frame.  */
24608 static void
24609 arm_expand_epilogue_apcs_frame (bool really_return)
24610 {
24611   unsigned long func_type;
24612   unsigned long saved_regs_mask;
24613   int num_regs = 0;
24614   int i;
24615   int floats_from_frame = 0;
24616   arm_stack_offsets *offsets;
24617
24618   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24619   func_type = arm_current_func_type ();
24620
24621   /* Get frame offsets for ARM.  */
24622   offsets = arm_get_frame_offsets ();
24623   saved_regs_mask = offsets->saved_regs_mask;
24624
24625   /* Find the offset of the floating-point save area in the frame.  */
24626   floats_from_frame
24627     = (offsets->saved_args
24628        + arm_compute_static_chain_stack_bytes ()
24629        - offsets->frame);
24630
24631   /* Compute how many core registers saved and how far away the floats are.  */
24632   for (i = 0; i <= LAST_ARM_REGNUM; i++)
24633     if (saved_regs_mask & (1 << i))
24634       {
24635         num_regs++;
24636         floats_from_frame += 4;
24637       }
24638
24639   if (TARGET_HARD_FLOAT && TARGET_VFP)
24640     {
24641       int start_reg;
24642       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24643
24644       /* The offset is from IP_REGNUM.  */
24645       int saved_size = arm_get_vfp_saved_size ();
24646       if (saved_size > 0)
24647         {
24648           rtx_insn *insn;
24649           floats_from_frame += saved_size;
24650           insn = emit_insn (gen_addsi3 (ip_rtx,
24651                                         hard_frame_pointer_rtx,
24652                                         GEN_INT (-floats_from_frame)));
24653           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24654                                        ip_rtx, hard_frame_pointer_rtx);
24655         }
24656
24657       /* Generate VFP register multi-pop.  */
24658       start_reg = FIRST_VFP_REGNUM;
24659
24660       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24661         /* Look for a case where a reg does not need restoring.  */
24662         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24663             && (!df_regs_ever_live_p (i + 1)
24664                 || call_used_regs[i + 1]))
24665           {
24666             if (start_reg != i)
24667               arm_emit_vfp_multi_reg_pop (start_reg,
24668                                           (i - start_reg) / 2,
24669                                           gen_rtx_REG (SImode,
24670                                                        IP_REGNUM));
24671             start_reg = i + 2;
24672           }
24673
24674       /* Restore the remaining regs that we have discovered (or possibly
24675          even all of them, if the conditional in the for loop never
24676          fired).  */
24677       if (start_reg != i)
24678         arm_emit_vfp_multi_reg_pop (start_reg,
24679                                     (i - start_reg) / 2,
24680                                     gen_rtx_REG (SImode, IP_REGNUM));
24681     }
24682
24683   if (TARGET_IWMMXT)
24684     {
24685       /* The frame pointer is guaranteed to be non-double-word aligned, as
24686          it is set to double-word-aligned old_stack_pointer - 4.  */
24687       rtx_insn *insn;
24688       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24689
24690       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24691         if (df_regs_ever_live_p (i) && !call_used_regs[i])
24692           {
24693             rtx addr = gen_frame_mem (V2SImode,
24694                                  plus_constant (Pmode, hard_frame_pointer_rtx,
24695                                                 - lrm_count * 4));
24696             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24697             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24698                                                gen_rtx_REG (V2SImode, i),
24699                                                NULL_RTX);
24700             lrm_count += 2;
24701           }
24702     }
24703
24704   /* saved_regs_mask should contain IP which contains old stack pointer
24705      at the time of activation creation.  Since SP and IP are adjacent registers,
24706      we can restore the value directly into SP.  */
24707   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24708   saved_regs_mask &= ~(1 << IP_REGNUM);
24709   saved_regs_mask |= (1 << SP_REGNUM);
24710
24711   /* There are two registers left in saved_regs_mask - LR and PC.  We
24712      only need to restore LR (the return address), but to
24713      save time we can load it directly into PC, unless we need a
24714      special function exit sequence, or we are not really returning.  */
24715   if (really_return
24716       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24717       && !crtl->calls_eh_return)
24718     /* Delete LR from the register mask, so that LR on
24719        the stack is loaded into the PC in the register mask.  */
24720     saved_regs_mask &= ~(1 << LR_REGNUM);
24721   else
24722     saved_regs_mask &= ~(1 << PC_REGNUM);
24723
24724   num_regs = bit_count (saved_regs_mask);
24725   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24726     {
24727       rtx_insn *insn;
24728       emit_insn (gen_blockage ());
24729       /* Unwind the stack to just below the saved registers.  */
24730       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24731                                     hard_frame_pointer_rtx,
24732                                     GEN_INT (- 4 * num_regs)));
24733
24734       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24735                                    stack_pointer_rtx, hard_frame_pointer_rtx);
24736     }
24737
24738   arm_emit_multi_reg_pop (saved_regs_mask);
24739
24740   if (IS_INTERRUPT (func_type))
24741     {
24742       /* Interrupt handlers will have pushed the
24743          IP onto the stack, so restore it now.  */
24744       rtx_insn *insn;
24745       rtx addr = gen_rtx_MEM (SImode,
24746                               gen_rtx_POST_INC (SImode,
24747                               stack_pointer_rtx));
24748       set_mem_alias_set (addr, get_frame_alias_set ());
24749       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24750       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24751                                          gen_rtx_REG (SImode, IP_REGNUM),
24752                                          NULL_RTX);
24753     }
24754
24755   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24756     return;
24757
24758   if (crtl->calls_eh_return)
24759     emit_insn (gen_addsi3 (stack_pointer_rtx,
24760                            stack_pointer_rtx,
24761                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24762
24763   if (IS_STACKALIGN (func_type))
24764     /* Restore the original stack pointer.  Before prologue, the stack was
24765        realigned and the original stack pointer saved in r0.  For details,
24766        see comment in arm_expand_prologue.  */
24767     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
24768
24769   emit_jump_insn (simple_return_rtx);
24770 }
24771
24772 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
24773    function is not a sibcall.  */
24774 void
24775 arm_expand_epilogue (bool really_return)
24776 {
24777   unsigned long func_type;
24778   unsigned long saved_regs_mask;
24779   int num_regs = 0;
24780   int i;
24781   int amount;
24782   arm_stack_offsets *offsets;
24783
24784   func_type = arm_current_func_type ();
24785
24786   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
24787      let output_return_instruction take care of instruction emission if any.  */
24788   if (IS_NAKED (func_type)
24789       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24790     {
24791       if (really_return)
24792         emit_jump_insn (simple_return_rtx);
24793       return;
24794     }
24795
24796   /* If we are throwing an exception, then we really must be doing a
24797      return, so we can't tail-call.  */
24798   gcc_assert (!crtl->calls_eh_return || really_return);
24799
24800   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24801     {
24802       arm_expand_epilogue_apcs_frame (really_return);
24803       return;
24804     }
24805
24806   /* Get frame offsets for ARM.  */
24807   offsets = arm_get_frame_offsets ();
24808   saved_regs_mask = offsets->saved_regs_mask;
24809   num_regs = bit_count (saved_regs_mask);
24810
24811   if (frame_pointer_needed)
24812     {
24813       rtx_insn *insn;
24814       /* Restore stack pointer if necessary.  */
24815       if (TARGET_ARM)
24816         {
24817           /* In ARM mode, frame pointer points to first saved register.
24818              Restore stack pointer to last saved register.  */
24819           amount = offsets->frame - offsets->saved_regs;
24820
24821           /* Force out any pending memory operations that reference stacked data
24822              before stack de-allocation occurs.  */
24823           emit_insn (gen_blockage ());
24824           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24825                             hard_frame_pointer_rtx,
24826                             GEN_INT (amount)));
24827           arm_add_cfa_adjust_cfa_note (insn, amount,
24828                                        stack_pointer_rtx,
24829                                        hard_frame_pointer_rtx);
24830
24831           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24832              deleted.  */
24833           emit_insn (gen_force_register_use (stack_pointer_rtx));
24834         }
24835       else
24836         {
24837           /* In Thumb-2 mode, the frame pointer points to the last saved
24838              register.  */
24839           amount = offsets->locals_base - offsets->saved_regs;
24840           if (amount)
24841             {
24842               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24843                                 hard_frame_pointer_rtx,
24844                                 GEN_INT (amount)));
24845               arm_add_cfa_adjust_cfa_note (insn, amount,
24846                                            hard_frame_pointer_rtx,
24847                                            hard_frame_pointer_rtx);
24848             }
24849
24850           /* Force out any pending memory operations that reference stacked data
24851              before stack de-allocation occurs.  */
24852           emit_insn (gen_blockage ());
24853           insn = emit_insn (gen_movsi (stack_pointer_rtx,
24854                                        hard_frame_pointer_rtx));
24855           arm_add_cfa_adjust_cfa_note (insn, 0,
24856                                        stack_pointer_rtx,
24857                                        hard_frame_pointer_rtx);
24858           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24859              deleted.  */
24860           emit_insn (gen_force_register_use (stack_pointer_rtx));
24861         }
24862     }
24863   else
24864     {
24865       /* Pop off outgoing args and local frame to adjust stack pointer to
24866          last saved register.  */
24867       amount = offsets->outgoing_args - offsets->saved_regs;
24868       if (amount)
24869         {
24870           rtx_insn *tmp;
24871           /* Force out any pending memory operations that reference stacked data
24872              before stack de-allocation occurs.  */
24873           emit_insn (gen_blockage ());
24874           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24875                                        stack_pointer_rtx,
24876                                        GEN_INT (amount)));
24877           arm_add_cfa_adjust_cfa_note (tmp, amount,
24878                                        stack_pointer_rtx, stack_pointer_rtx);
24879           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24880              not deleted.  */
24881           emit_insn (gen_force_register_use (stack_pointer_rtx));
24882         }
24883     }
24884
24885   if (TARGET_HARD_FLOAT && TARGET_VFP)
24886     {
24887       /* Generate VFP register multi-pop.  */
24888       int end_reg = LAST_VFP_REGNUM + 1;
24889
24890       /* Scan the registers in reverse order.  We need to match
24891          any groupings made in the prologue and generate matching
24892          vldm operations.  The need to match groups is because,
24893          unlike pop, vldm can only do consecutive regs.  */
24894       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
24895         /* Look for a case where a reg does not need restoring.  */
24896         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24897             && (!df_regs_ever_live_p (i + 1)
24898                 || call_used_regs[i + 1]))
24899           {
24900             /* Restore the regs discovered so far (from reg+2 to
24901                end_reg).  */
24902             if (end_reg > i + 2)
24903               arm_emit_vfp_multi_reg_pop (i + 2,
24904                                           (end_reg - (i + 2)) / 2,
24905                                           stack_pointer_rtx);
24906             end_reg = i;
24907           }
24908
24909       /* Restore the remaining regs that we have discovered (or possibly
24910          even all of them, if the conditional in the for loop never
24911          fired).  */
24912       if (end_reg > i + 2)
24913         arm_emit_vfp_multi_reg_pop (i + 2,
24914                                     (end_reg - (i + 2)) / 2,
24915                                     stack_pointer_rtx);
24916     }
24917
24918   if (TARGET_IWMMXT)
24919     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
24920       if (df_regs_ever_live_p (i) && !call_used_regs[i])
24921         {
24922           rtx_insn *insn;
24923           rtx addr = gen_rtx_MEM (V2SImode,
24924                                   gen_rtx_POST_INC (SImode,
24925                                                     stack_pointer_rtx));
24926           set_mem_alias_set (addr, get_frame_alias_set ());
24927           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24928           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24929                                              gen_rtx_REG (V2SImode, i),
24930                                              NULL_RTX);
24931           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24932                                        stack_pointer_rtx, stack_pointer_rtx);
24933         }
24934
24935   if (saved_regs_mask)
24936     {
24937       rtx insn;
24938       bool return_in_pc = false;
24939
24940       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
24941           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
24942           && !IS_STACKALIGN (func_type)
24943           && really_return
24944           && crtl->args.pretend_args_size == 0
24945           && saved_regs_mask & (1 << LR_REGNUM)
24946           && !crtl->calls_eh_return)
24947         {
24948           saved_regs_mask &= ~(1 << LR_REGNUM);
24949           saved_regs_mask |= (1 << PC_REGNUM);
24950           return_in_pc = true;
24951         }
24952
24953       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
24954         {
24955           for (i = 0; i <= LAST_ARM_REGNUM; i++)
24956             if (saved_regs_mask & (1 << i))
24957               {
24958                 rtx addr = gen_rtx_MEM (SImode,
24959                                         gen_rtx_POST_INC (SImode,
24960                                                           stack_pointer_rtx));
24961                 set_mem_alias_set (addr, get_frame_alias_set ());
24962
24963                 if (i == PC_REGNUM)
24964                   {
24965                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24966                     XVECEXP (insn, 0, 0) = ret_rtx;
24967                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
24968                                                         addr);
24969                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
24970                     insn = emit_jump_insn (insn);
24971                   }
24972                 else
24973                   {
24974                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
24975                                                  addr));
24976                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24977                                                        gen_rtx_REG (SImode, i),
24978                                                        NULL_RTX);
24979                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24980                                                  stack_pointer_rtx,
24981                                                  stack_pointer_rtx);
24982                   }
24983               }
24984         }
24985       else
24986         {
24987           if (TARGET_LDRD
24988               && current_tune->prefer_ldrd_strd
24989               && !optimize_function_for_size_p (cfun))
24990             {
24991               if (TARGET_THUMB2)
24992                 thumb2_emit_ldrd_pop (saved_regs_mask);
24993               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
24994                 arm_emit_ldrd_pop (saved_regs_mask);
24995               else
24996                 arm_emit_multi_reg_pop (saved_regs_mask);
24997             }
24998           else
24999             arm_emit_multi_reg_pop (saved_regs_mask);
25000         }
25001
25002       if (return_in_pc)
25003         return;
25004     }
25005
25006   if (crtl->args.pretend_args_size)
25007     {
25008       int i, j;
25009       rtx dwarf = NULL_RTX;
25010       rtx_insn *tmp =
25011         emit_insn (gen_addsi3 (stack_pointer_rtx,
25012                                stack_pointer_rtx,
25013                                GEN_INT (crtl->args.pretend_args_size)));
25014
25015       RTX_FRAME_RELATED_P (tmp) = 1;
25016
25017       if (cfun->machine->uses_anonymous_args)
25018         {
25019           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25020              pretend_args in stack.  */
25021           int num_regs = crtl->args.pretend_args_size / 4;
25022           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25023           for (j = 0, i = 0; j < num_regs; i++)
25024             if (saved_regs_mask & (1 << i))
25025               {
25026                 rtx reg = gen_rtx_REG (SImode, i);
25027                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25028                 j++;
25029               }
25030           REG_NOTES (tmp) = dwarf;
25031         }
25032       arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25033                                    stack_pointer_rtx, stack_pointer_rtx);
25034     }
25035
25036   if (!really_return)
25037     return;
25038
25039   if (crtl->calls_eh_return)
25040     emit_insn (gen_addsi3 (stack_pointer_rtx,
25041                            stack_pointer_rtx,
25042                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25043
25044   if (IS_STACKALIGN (func_type))
25045     /* Restore the original stack pointer.  Before prologue, the stack was
25046        realigned and the original stack pointer saved in r0.  For details,
25047        see comment in arm_expand_prologue.  */
25048     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25049
25050   emit_jump_insn (simple_return_rtx);
25051 }
25052
25053 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25054    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25055
25056 const char *
25057 thumb1_output_interwork (void)
25058 {
25059   const char * name;
25060   FILE *f = asm_out_file;
25061
25062   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25063   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25064               == SYMBOL_REF);
25065   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25066
25067   /* Generate code sequence to switch us into Thumb mode.  */
25068   /* The .code 32 directive has already been emitted by
25069      ASM_DECLARE_FUNCTION_NAME.  */
25070   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25071   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25072
25073   /* Generate a label, so that the debugger will notice the
25074      change in instruction sets.  This label is also used by
25075      the assembler to bypass the ARM code when this function
25076      is called from a Thumb encoded function elsewhere in the
25077      same file.  Hence the definition of STUB_NAME here must
25078      agree with the definition in gas/config/tc-arm.c.  */
25079
25080 #define STUB_NAME ".real_start_of"
25081
25082   fprintf (f, "\t.code\t16\n");
25083 #ifdef ARM_PE
25084   if (arm_dllexport_name_p (name))
25085     name = arm_strip_name_encoding (name);
25086 #endif
25087   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25088   fprintf (f, "\t.thumb_func\n");
25089   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25090
25091   return "";
25092 }
25093
25094 /* Handle the case of a double word load into a low register from
25095    a computed memory address.  The computed address may involve a
25096    register which is overwritten by the load.  */
25097 const char *
25098 thumb_load_double_from_address (rtx *operands)
25099 {
25100   rtx addr;
25101   rtx base;
25102   rtx offset;
25103   rtx arg1;
25104   rtx arg2;
25105
25106   gcc_assert (REG_P (operands[0]));
25107   gcc_assert (MEM_P (operands[1]));
25108
25109   /* Get the memory address.  */
25110   addr = XEXP (operands[1], 0);
25111
25112   /* Work out how the memory address is computed.  */
25113   switch (GET_CODE (addr))
25114     {
25115     case REG:
25116       operands[2] = adjust_address (operands[1], SImode, 4);
25117
25118       if (REGNO (operands[0]) == REGNO (addr))
25119         {
25120           output_asm_insn ("ldr\t%H0, %2", operands);
25121           output_asm_insn ("ldr\t%0, %1", operands);
25122         }
25123       else
25124         {
25125           output_asm_insn ("ldr\t%0, %1", operands);
25126           output_asm_insn ("ldr\t%H0, %2", operands);
25127         }
25128       break;
25129
25130     case CONST:
25131       /* Compute <address> + 4 for the high order load.  */
25132       operands[2] = adjust_address (operands[1], SImode, 4);
25133
25134       output_asm_insn ("ldr\t%0, %1", operands);
25135       output_asm_insn ("ldr\t%H0, %2", operands);
25136       break;
25137
25138     case PLUS:
25139       arg1   = XEXP (addr, 0);
25140       arg2   = XEXP (addr, 1);
25141
25142       if (CONSTANT_P (arg1))
25143         base = arg2, offset = arg1;
25144       else
25145         base = arg1, offset = arg2;
25146
25147       gcc_assert (REG_P (base));
25148
25149       /* Catch the case of <address> = <reg> + <reg> */
25150       if (REG_P (offset))
25151         {
25152           int reg_offset = REGNO (offset);
25153           int reg_base   = REGNO (base);
25154           int reg_dest   = REGNO (operands[0]);
25155
25156           /* Add the base and offset registers together into the
25157              higher destination register.  */
25158           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25159                        reg_dest + 1, reg_base, reg_offset);
25160
25161           /* Load the lower destination register from the address in
25162              the higher destination register.  */
25163           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25164                        reg_dest, reg_dest + 1);
25165
25166           /* Load the higher destination register from its own address
25167              plus 4.  */
25168           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25169                        reg_dest + 1, reg_dest + 1);
25170         }
25171       else
25172         {
25173           /* Compute <address> + 4 for the high order load.  */
25174           operands[2] = adjust_address (operands[1], SImode, 4);
25175
25176           /* If the computed address is held in the low order register
25177              then load the high order register first, otherwise always
25178              load the low order register first.  */
25179           if (REGNO (operands[0]) == REGNO (base))
25180             {
25181               output_asm_insn ("ldr\t%H0, %2", operands);
25182               output_asm_insn ("ldr\t%0, %1", operands);
25183             }
25184           else
25185             {
25186               output_asm_insn ("ldr\t%0, %1", operands);
25187               output_asm_insn ("ldr\t%H0, %2", operands);
25188             }
25189         }
25190       break;
25191
25192     case LABEL_REF:
25193       /* With no registers to worry about we can just load the value
25194          directly.  */
25195       operands[2] = adjust_address (operands[1], SImode, 4);
25196
25197       output_asm_insn ("ldr\t%H0, %2", operands);
25198       output_asm_insn ("ldr\t%0, %1", operands);
25199       break;
25200
25201     default:
25202       gcc_unreachable ();
25203     }
25204
25205   return "";
25206 }
25207
25208 const char *
25209 thumb_output_move_mem_multiple (int n, rtx *operands)
25210 {
25211   rtx tmp;
25212
25213   switch (n)
25214     {
25215     case 2:
25216       if (REGNO (operands[4]) > REGNO (operands[5]))
25217         {
25218           tmp = operands[4];
25219           operands[4] = operands[5];
25220           operands[5] = tmp;
25221         }
25222       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25223       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25224       break;
25225
25226     case 3:
25227       if (REGNO (operands[4]) > REGNO (operands[5]))
25228         std::swap (operands[4], operands[5]);
25229       if (REGNO (operands[5]) > REGNO (operands[6]))
25230         std::swap (operands[5], operands[6]);
25231       if (REGNO (operands[4]) > REGNO (operands[5]))
25232         std::swap (operands[4], operands[5]);
25233
25234       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25235       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25236       break;
25237
25238     default:
25239       gcc_unreachable ();
25240     }
25241
25242   return "";
25243 }
25244
25245 /* Output a call-via instruction for thumb state.  */
25246 const char *
25247 thumb_call_via_reg (rtx reg)
25248 {
25249   int regno = REGNO (reg);
25250   rtx *labelp;
25251
25252   gcc_assert (regno < LR_REGNUM);
25253
25254   /* If we are in the normal text section we can use a single instance
25255      per compilation unit.  If we are doing function sections, then we need
25256      an entry per section, since we can't rely on reachability.  */
25257   if (in_section == text_section)
25258     {
25259       thumb_call_reg_needed = 1;
25260
25261       if (thumb_call_via_label[regno] == NULL)
25262         thumb_call_via_label[regno] = gen_label_rtx ();
25263       labelp = thumb_call_via_label + regno;
25264     }
25265   else
25266     {
25267       if (cfun->machine->call_via[regno] == NULL)
25268         cfun->machine->call_via[regno] = gen_label_rtx ();
25269       labelp = cfun->machine->call_via + regno;
25270     }
25271
25272   output_asm_insn ("bl\t%a0", labelp);
25273   return "";
25274 }
25275
25276 /* Routines for generating rtl.  */
25277 void
25278 thumb_expand_movmemqi (rtx *operands)
25279 {
25280   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25281   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25282   HOST_WIDE_INT len = INTVAL (operands[2]);
25283   HOST_WIDE_INT offset = 0;
25284
25285   while (len >= 12)
25286     {
25287       emit_insn (gen_movmem12b (out, in, out, in));
25288       len -= 12;
25289     }
25290
25291   if (len >= 8)
25292     {
25293       emit_insn (gen_movmem8b (out, in, out, in));
25294       len -= 8;
25295     }
25296
25297   if (len >= 4)
25298     {
25299       rtx reg = gen_reg_rtx (SImode);
25300       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25301       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25302       len -= 4;
25303       offset += 4;
25304     }
25305
25306   if (len >= 2)
25307     {
25308       rtx reg = gen_reg_rtx (HImode);
25309       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25310                                               plus_constant (Pmode, in,
25311                                                              offset))));
25312       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25313                                                                 offset)),
25314                             reg));
25315       len -= 2;
25316       offset += 2;
25317     }
25318
25319   if (len)
25320     {
25321       rtx reg = gen_reg_rtx (QImode);
25322       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25323                                               plus_constant (Pmode, in,
25324                                                              offset))));
25325       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25326                                                                 offset)),
25327                             reg));
25328     }
25329 }
25330
25331 void
25332 thumb_reload_out_hi (rtx *operands)
25333 {
25334   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25335 }
25336
25337 /* Handle reading a half-word from memory during reload.  */
25338 void
25339 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25340 {
25341   gcc_unreachable ();
25342 }
25343
25344 /* Return the length of a function name prefix
25345     that starts with the character 'c'.  */
25346 static int
25347 arm_get_strip_length (int c)
25348 {
25349   switch (c)
25350     {
25351     ARM_NAME_ENCODING_LENGTHS
25352       default: return 0;
25353     }
25354 }
25355
25356 /* Return a pointer to a function's name with any
25357    and all prefix encodings stripped from it.  */
25358 const char *
25359 arm_strip_name_encoding (const char *name)
25360 {
25361   int skip;
25362
25363   while ((skip = arm_get_strip_length (* name)))
25364     name += skip;
25365
25366   return name;
25367 }
25368
25369 /* If there is a '*' anywhere in the name's prefix, then
25370    emit the stripped name verbatim, otherwise prepend an
25371    underscore if leading underscores are being used.  */
25372 void
25373 arm_asm_output_labelref (FILE *stream, const char *name)
25374 {
25375   int skip;
25376   int verbatim = 0;
25377
25378   while ((skip = arm_get_strip_length (* name)))
25379     {
25380       verbatim |= (*name == '*');
25381       name += skip;
25382     }
25383
25384   if (verbatim)
25385     fputs (name, stream);
25386   else
25387     asm_fprintf (stream, "%U%s", name);
25388 }
25389
25390 /* This function is used to emit an EABI tag and its associated value.
25391    We emit the numerical value of the tag in case the assembler does not
25392    support textual tags.  (Eg gas prior to 2.20).  If requested we include
25393    the tag name in a comment so that anyone reading the assembler output
25394    will know which tag is being set.
25395
25396    This function is not static because arm-c.c needs it too.  */
25397
25398 void
25399 arm_emit_eabi_attribute (const char *name, int num, int val)
25400 {
25401   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25402   if (flag_verbose_asm || flag_debug_asm)
25403     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25404   asm_fprintf (asm_out_file, "\n");
25405 }
25406
25407 /* This function is used to print CPU tuning information as comment
25408    in assembler file.  Pointers are not printed for now.  */
25409
25410 void
25411 arm_print_tune_info (void)
25412 {
25413   asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25414   asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25415                current_tune->constant_limit);
25416   asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25417                current_tune->max_insns_skipped);
25418   asm_fprintf (asm_out_file, "\t\t@num_prefetch_slots:\t%d\n",
25419                current_tune->num_prefetch_slots);
25420   asm_fprintf (asm_out_file, "\t\t@l1_cache_size:\t%d\n",
25421                current_tune->l1_cache_size);
25422   asm_fprintf (asm_out_file, "\t\t@l1_cache_line_size:\t%d\n",
25423                current_tune->l1_cache_line_size);
25424   asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25425                (int) current_tune->prefer_constant_pool);
25426   asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25427   asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25428   asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25429                current_tune->branch_cost (false, false));
25430   asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25431                current_tune->branch_cost (false, true));
25432   asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25433                current_tune->branch_cost (true, false));
25434   asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25435                current_tune->branch_cost (true, true));
25436   asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25437                (int) current_tune->prefer_ldrd_strd);
25438   asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25439                (int) current_tune->logical_op_non_short_circuit[0],
25440                (int) current_tune->logical_op_non_short_circuit[1]);
25441   asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25442                (int) current_tune->prefer_neon_for_64bits);
25443   asm_fprintf (asm_out_file,
25444                "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25445                (int) current_tune->disparage_flag_setting_t16_encodings);
25446   asm_fprintf (asm_out_file,
25447                "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n",
25448                (int) current_tune
25449                        ->disparage_partial_flag_setting_t16_encodings);
25450   asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25451                (int) current_tune->string_ops_prefer_neon);
25452   asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25453                current_tune->max_insns_inline_memset);
25454   asm_fprintf (asm_out_file, "\t\t@fuseable_ops:\t%u\n",
25455                current_tune->fuseable_ops);
25456   asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25457                (int) current_tune->sched_autopref);
25458 }
25459
25460 static void
25461 arm_file_start (void)
25462 {
25463   int val;
25464
25465   if (TARGET_UNIFIED_ASM)
25466     asm_fprintf (asm_out_file, "\t.syntax unified\n");
25467
25468   if (TARGET_BPABI)
25469     {
25470       const char *fpu_name;
25471       if (arm_selected_arch)
25472         {
25473           /* armv7ve doesn't support any extensions.  */
25474           if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25475             {
25476               /* Keep backward compatability for assemblers
25477                  which don't support armv7ve.  */
25478               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25479               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25480               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25481               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25482               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25483             }
25484           else
25485             {
25486               const char* pos = strchr (arm_selected_arch->name, '+');
25487               if (pos)
25488                 {
25489                   char buf[15];
25490                   gcc_assert (strlen (arm_selected_arch->name)
25491                               <= sizeof (buf) / sizeof (*pos));
25492                   strncpy (buf, arm_selected_arch->name,
25493                                 (pos - arm_selected_arch->name) * sizeof (*pos));
25494                   buf[pos - arm_selected_arch->name] = '\0';
25495                   asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25496                   asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25497                 }
25498               else
25499                 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25500             }
25501         }
25502       else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25503         asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25504       else
25505         {
25506           const char* truncated_name
25507             = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25508           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25509         }
25510
25511       if (print_tune_info)
25512         arm_print_tune_info ();
25513
25514       if (TARGET_SOFT_FLOAT)
25515         {
25516           fpu_name = "softvfp";
25517         }
25518       else
25519         {
25520           fpu_name = arm_fpu_desc->name;
25521           if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25522             {
25523               if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
25524                 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25525
25526               if (TARGET_HARD_FLOAT_ABI)
25527                 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25528             }
25529         }
25530       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25531
25532       /* Some of these attributes only apply when the corresponding features
25533          are used.  However we don't have any easy way of figuring this out.
25534          Conservatively record the setting that would have been used.  */
25535
25536       if (flag_rounding_math)
25537         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25538
25539       if (!flag_unsafe_math_optimizations)
25540         {
25541           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25542           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25543         }
25544       if (flag_signaling_nans)
25545         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25546
25547       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25548                            flag_finite_math_only ? 1 : 3);
25549
25550       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25551       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25552       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25553                                flag_short_enums ? 1 : 2);
25554
25555       /* Tag_ABI_optimization_goals.  */
25556       if (optimize_size)
25557         val = 4;
25558       else if (optimize >= 2)
25559         val = 2;
25560       else if (optimize)
25561         val = 1;
25562       else
25563         val = 6;
25564       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25565
25566       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25567                                unaligned_access);
25568
25569       if (arm_fp16_format)
25570         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25571                              (int) arm_fp16_format);
25572
25573       if (arm_lang_output_object_attributes_hook)
25574         arm_lang_output_object_attributes_hook();
25575     }
25576
25577   default_file_start ();
25578 }
25579
25580 static void
25581 arm_file_end (void)
25582 {
25583   int regno;
25584
25585   if (NEED_INDICATE_EXEC_STACK)
25586     /* Add .note.GNU-stack.  */
25587     file_end_indicate_exec_stack ();
25588
25589   if (! thumb_call_reg_needed)
25590     return;
25591
25592   switch_to_section (text_section);
25593   asm_fprintf (asm_out_file, "\t.code 16\n");
25594   ASM_OUTPUT_ALIGN (asm_out_file, 1);
25595
25596   for (regno = 0; regno < LR_REGNUM; regno++)
25597     {
25598       rtx label = thumb_call_via_label[regno];
25599
25600       if (label != 0)
25601         {
25602           targetm.asm_out.internal_label (asm_out_file, "L",
25603                                           CODE_LABEL_NUMBER (label));
25604           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25605         }
25606     }
25607 }
25608
25609 #ifndef ARM_PE
25610 /* Symbols in the text segment can be accessed without indirecting via the
25611    constant pool; it may take an extra binary operation, but this is still
25612    faster than indirecting via memory.  Don't do this when not optimizing,
25613    since we won't be calculating al of the offsets necessary to do this
25614    simplification.  */
25615
25616 static void
25617 arm_encode_section_info (tree decl, rtx rtl, int first)
25618 {
25619   if (optimize > 0 && TREE_CONSTANT (decl))
25620     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25621
25622   default_encode_section_info (decl, rtl, first);
25623 }
25624 #endif /* !ARM_PE */
25625
25626 static void
25627 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25628 {
25629   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25630       && !strcmp (prefix, "L"))
25631     {
25632       arm_ccfsm_state = 0;
25633       arm_target_insn = NULL;
25634     }
25635   default_internal_label (stream, prefix, labelno);
25636 }
25637
25638 /* Output code to add DELTA to the first argument, and then jump
25639    to FUNCTION.  Used for C++ multiple inheritance.  */
25640 static void
25641 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25642                      HOST_WIDE_INT delta,
25643                      HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25644                      tree function)
25645 {
25646   static int thunk_label = 0;
25647   char label[256];
25648   char labelpc[256];
25649   int mi_delta = delta;
25650   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25651   int shift = 0;
25652   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25653                     ? 1 : 0);
25654   if (mi_delta < 0)
25655     mi_delta = - mi_delta;
25656
25657   final_start_function (emit_barrier (), file, 1);
25658
25659   if (TARGET_THUMB1)
25660     {
25661       int labelno = thunk_label++;
25662       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25663       /* Thunks are entered in arm mode when avaiable.  */
25664       if (TARGET_THUMB1_ONLY)
25665         {
25666           /* push r3 so we can use it as a temporary.  */
25667           /* TODO: Omit this save if r3 is not used.  */
25668           fputs ("\tpush {r3}\n", file);
25669           fputs ("\tldr\tr3, ", file);
25670         }
25671       else
25672         {
25673           fputs ("\tldr\tr12, ", file);
25674         }
25675       assemble_name (file, label);
25676       fputc ('\n', file);
25677       if (flag_pic)
25678         {
25679           /* If we are generating PIC, the ldr instruction below loads
25680              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
25681              the address of the add + 8, so we have:
25682
25683              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25684                  = target + 1.
25685
25686              Note that we have "+ 1" because some versions of GNU ld
25687              don't set the low bit of the result for R_ARM_REL32
25688              relocations against thumb function symbols.
25689              On ARMv6M this is +4, not +8.  */
25690           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25691           assemble_name (file, labelpc);
25692           fputs (":\n", file);
25693           if (TARGET_THUMB1_ONLY)
25694             {
25695               /* This is 2 insns after the start of the thunk, so we know it
25696                  is 4-byte aligned.  */
25697               fputs ("\tadd\tr3, pc, r3\n", file);
25698               fputs ("\tmov r12, r3\n", file);
25699             }
25700           else
25701             fputs ("\tadd\tr12, pc, r12\n", file);
25702         }
25703       else if (TARGET_THUMB1_ONLY)
25704         fputs ("\tmov r12, r3\n", file);
25705     }
25706   if (TARGET_THUMB1_ONLY)
25707     {
25708       if (mi_delta > 255)
25709         {
25710           fputs ("\tldr\tr3, ", file);
25711           assemble_name (file, label);
25712           fputs ("+4\n", file);
25713           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25714                        mi_op, this_regno, this_regno);
25715         }
25716       else if (mi_delta != 0)
25717         {
25718           /* Thumb1 unified syntax requires s suffix in instruction name when
25719              one of the operands is immediate.  */
25720           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25721                        mi_op, this_regno, this_regno,
25722                        mi_delta);
25723         }
25724     }
25725   else
25726     {
25727       /* TODO: Use movw/movt for large constants when available.  */
25728       while (mi_delta != 0)
25729         {
25730           if ((mi_delta & (3 << shift)) == 0)
25731             shift += 2;
25732           else
25733             {
25734               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25735                            mi_op, this_regno, this_regno,
25736                            mi_delta & (0xff << shift));
25737               mi_delta &= ~(0xff << shift);
25738               shift += 8;
25739             }
25740         }
25741     }
25742   if (TARGET_THUMB1)
25743     {
25744       if (TARGET_THUMB1_ONLY)
25745         fputs ("\tpop\t{r3}\n", file);
25746
25747       fprintf (file, "\tbx\tr12\n");
25748       ASM_OUTPUT_ALIGN (file, 2);
25749       assemble_name (file, label);
25750       fputs (":\n", file);
25751       if (flag_pic)
25752         {
25753           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
25754           rtx tem = XEXP (DECL_RTL (function), 0);
25755           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25756              pipeline offset is four rather than eight.  Adjust the offset
25757              accordingly.  */
25758           tem = plus_constant (GET_MODE (tem), tem,
25759                                TARGET_THUMB1_ONLY ? -3 : -7);
25760           tem = gen_rtx_MINUS (GET_MODE (tem),
25761                                tem,
25762                                gen_rtx_SYMBOL_REF (Pmode,
25763                                                    ggc_strdup (labelpc)));
25764           assemble_integer (tem, 4, BITS_PER_WORD, 1);
25765         }
25766       else
25767         /* Output ".word .LTHUNKn".  */
25768         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25769
25770       if (TARGET_THUMB1_ONLY && mi_delta > 255)
25771         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25772     }
25773   else
25774     {
25775       fputs ("\tb\t", file);
25776       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25777       if (NEED_PLT_RELOC)
25778         fputs ("(PLT)", file);
25779       fputc ('\n', file);
25780     }
25781
25782   final_end_function ();
25783 }
25784
25785 int
25786 arm_emit_vector_const (FILE *file, rtx x)
25787 {
25788   int i;
25789   const char * pattern;
25790
25791   gcc_assert (GET_CODE (x) == CONST_VECTOR);
25792
25793   switch (GET_MODE (x))
25794     {
25795     case V2SImode: pattern = "%08x"; break;
25796     case V4HImode: pattern = "%04x"; break;
25797     case V8QImode: pattern = "%02x"; break;
25798     default:       gcc_unreachable ();
25799     }
25800
25801   fprintf (file, "0x");
25802   for (i = CONST_VECTOR_NUNITS (x); i--;)
25803     {
25804       rtx element;
25805
25806       element = CONST_VECTOR_ELT (x, i);
25807       fprintf (file, pattern, INTVAL (element));
25808     }
25809
25810   return 1;
25811 }
25812
25813 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25814    HFmode constant pool entries are actually loaded with ldr.  */
25815 void
25816 arm_emit_fp16_const (rtx c)
25817 {
25818   REAL_VALUE_TYPE r;
25819   long bits;
25820
25821   REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25822   bits = real_to_target (NULL, &r, HFmode);
25823   if (WORDS_BIG_ENDIAN)
25824     assemble_zeros (2);
25825   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25826   if (!WORDS_BIG_ENDIAN)
25827     assemble_zeros (2);
25828 }
25829
25830 const char *
25831 arm_output_load_gr (rtx *operands)
25832 {
25833   rtx reg;
25834   rtx offset;
25835   rtx wcgr;
25836   rtx sum;
25837
25838   if (!MEM_P (operands [1])
25839       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25840       || !REG_P (reg = XEXP (sum, 0))
25841       || !CONST_INT_P (offset = XEXP (sum, 1))
25842       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25843     return "wldrw%?\t%0, %1";
25844
25845   /* Fix up an out-of-range load of a GR register.  */
25846   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25847   wcgr = operands[0];
25848   operands[0] = reg;
25849   output_asm_insn ("ldr%?\t%0, %1", operands);
25850
25851   operands[0] = wcgr;
25852   operands[1] = reg;
25853   output_asm_insn ("tmcr%?\t%0, %1", operands);
25854   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25855
25856   return "";
25857 }
25858
25859 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25860
25861    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25862    named arg and all anonymous args onto the stack.
25863    XXX I know the prologue shouldn't be pushing registers, but it is faster
25864    that way.  */
25865
25866 static void
25867 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25868                             machine_mode mode,
25869                             tree type,
25870                             int *pretend_size,
25871                             int second_time ATTRIBUTE_UNUSED)
25872 {
25873   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25874   int nregs;
25875
25876   cfun->machine->uses_anonymous_args = 1;
25877   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25878     {
25879       nregs = pcum->aapcs_ncrn;
25880       if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25881         nregs++;
25882     }
25883   else
25884     nregs = pcum->nregs;
25885
25886   if (nregs < NUM_ARG_REGS)
25887     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25888 }
25889
25890 /* We can't rely on the caller doing the proper promotion when
25891    using APCS or ATPCS.  */
25892
25893 static bool
25894 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25895 {
25896     return !TARGET_AAPCS_BASED;
25897 }
25898
25899 static machine_mode
25900 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25901                            machine_mode mode,
25902                            int *punsignedp ATTRIBUTE_UNUSED,
25903                            const_tree fntype ATTRIBUTE_UNUSED,
25904                            int for_return ATTRIBUTE_UNUSED)
25905 {
25906   if (GET_MODE_CLASS (mode) == MODE_INT
25907       && GET_MODE_SIZE (mode) < 4)
25908     return SImode;
25909
25910   return mode;
25911 }
25912
25913 /* AAPCS based ABIs use short enums by default.  */
25914
25915 static bool
25916 arm_default_short_enums (void)
25917 {
25918   return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25919 }
25920
25921
25922 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
25923
25924 static bool
25925 arm_align_anon_bitfield (void)
25926 {
25927   return TARGET_AAPCS_BASED;
25928 }
25929
25930
25931 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
25932
25933 static tree
25934 arm_cxx_guard_type (void)
25935 {
25936   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25937 }
25938
25939
25940 /* The EABI says test the least significant bit of a guard variable.  */
25941
25942 static bool
25943 arm_cxx_guard_mask_bit (void)
25944 {
25945   return TARGET_AAPCS_BASED;
25946 }
25947
25948
25949 /* The EABI specifies that all array cookies are 8 bytes long.  */
25950
25951 static tree
25952 arm_get_cookie_size (tree type)
25953 {
25954   tree size;
25955
25956   if (!TARGET_AAPCS_BASED)
25957     return default_cxx_get_cookie_size (type);
25958
25959   size = build_int_cst (sizetype, 8);
25960   return size;
25961 }
25962
25963
25964 /* The EABI says that array cookies should also contain the element size.  */
25965
25966 static bool
25967 arm_cookie_has_size (void)
25968 {
25969   return TARGET_AAPCS_BASED;
25970 }
25971
25972
25973 /* The EABI says constructors and destructors should return a pointer to
25974    the object constructed/destroyed.  */
25975
25976 static bool
25977 arm_cxx_cdtor_returns_this (void)
25978 {
25979   return TARGET_AAPCS_BASED;
25980 }
25981
25982 /* The EABI says that an inline function may never be the key
25983    method.  */
25984
25985 static bool
25986 arm_cxx_key_method_may_be_inline (void)
25987 {
25988   return !TARGET_AAPCS_BASED;
25989 }
25990
25991 static void
25992 arm_cxx_determine_class_data_visibility (tree decl)
25993 {
25994   if (!TARGET_AAPCS_BASED
25995       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
25996     return;
25997
25998   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
25999      is exported.  However, on systems without dynamic vague linkage,
26000      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26001   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26002     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26003   else
26004     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26005   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26006 }
26007
26008 static bool
26009 arm_cxx_class_data_always_comdat (void)
26010 {
26011   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26012      vague linkage if the class has no key function.  */
26013   return !TARGET_AAPCS_BASED;
26014 }
26015
26016
26017 /* The EABI says __aeabi_atexit should be used to register static
26018    destructors.  */
26019
26020 static bool
26021 arm_cxx_use_aeabi_atexit (void)
26022 {
26023   return TARGET_AAPCS_BASED;
26024 }
26025
26026
26027 void
26028 arm_set_return_address (rtx source, rtx scratch)
26029 {
26030   arm_stack_offsets *offsets;
26031   HOST_WIDE_INT delta;
26032   rtx addr;
26033   unsigned long saved_regs;
26034
26035   offsets = arm_get_frame_offsets ();
26036   saved_regs = offsets->saved_regs_mask;
26037
26038   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26039     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26040   else
26041     {
26042       if (frame_pointer_needed)
26043         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26044       else
26045         {
26046           /* LR will be the first saved register.  */
26047           delta = offsets->outgoing_args - (offsets->frame + 4);
26048
26049
26050           if (delta >= 4096)
26051             {
26052               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26053                                      GEN_INT (delta & ~4095)));
26054               addr = scratch;
26055               delta &= 4095;
26056             }
26057           else
26058             addr = stack_pointer_rtx;
26059
26060           addr = plus_constant (Pmode, addr, delta);
26061         }
26062       /* The store needs to be marked as frame related in order to prevent
26063          DSE from deleting it as dead if it is based on fp.  */
26064       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26065       RTX_FRAME_RELATED_P (insn) = 1;
26066       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26067     }
26068 }
26069
26070
26071 void
26072 thumb_set_return_address (rtx source, rtx scratch)
26073 {
26074   arm_stack_offsets *offsets;
26075   HOST_WIDE_INT delta;
26076   HOST_WIDE_INT limit;
26077   int reg;
26078   rtx addr;
26079   unsigned long mask;
26080
26081   emit_use (source);
26082
26083   offsets = arm_get_frame_offsets ();
26084   mask = offsets->saved_regs_mask;
26085   if (mask & (1 << LR_REGNUM))
26086     {
26087       limit = 1024;
26088       /* Find the saved regs.  */
26089       if (frame_pointer_needed)
26090         {
26091           delta = offsets->soft_frame - offsets->saved_args;
26092           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26093           if (TARGET_THUMB1)
26094             limit = 128;
26095         }
26096       else
26097         {
26098           delta = offsets->outgoing_args - offsets->saved_args;
26099           reg = SP_REGNUM;
26100         }
26101       /* Allow for the stack frame.  */
26102       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26103         delta -= 16;
26104       /* The link register is always the first saved register.  */
26105       delta -= 4;
26106
26107       /* Construct the address.  */
26108       addr = gen_rtx_REG (SImode, reg);
26109       if (delta > limit)
26110         {
26111           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26112           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26113           addr = scratch;
26114         }
26115       else
26116         addr = plus_constant (Pmode, addr, delta);
26117
26118       /* The store needs to be marked as frame related in order to prevent
26119          DSE from deleting it as dead if it is based on fp.  */
26120       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26121       RTX_FRAME_RELATED_P (insn) = 1;
26122       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26123     }
26124   else
26125     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26126 }
26127
26128 /* Implements target hook vector_mode_supported_p.  */
26129 bool
26130 arm_vector_mode_supported_p (machine_mode mode)
26131 {
26132   /* Neon also supports V2SImode, etc. listed in the clause below.  */
26133   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26134       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26135     return true;
26136
26137   if ((TARGET_NEON || TARGET_IWMMXT)
26138       && ((mode == V2SImode)
26139           || (mode == V4HImode)
26140           || (mode == V8QImode)))
26141     return true;
26142
26143   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26144       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26145       || mode == V2HAmode))
26146     return true;
26147
26148   return false;
26149 }
26150
26151 /* Implements target hook array_mode_supported_p.  */
26152
26153 static bool
26154 arm_array_mode_supported_p (machine_mode mode,
26155                             unsigned HOST_WIDE_INT nelems)
26156 {
26157   if (TARGET_NEON
26158       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26159       && (nelems >= 2 && nelems <= 4))
26160     return true;
26161
26162   return false;
26163 }
26164
26165 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26166    registers when autovectorizing for Neon, at least until multiple vector
26167    widths are supported properly by the middle-end.  */
26168
26169 static machine_mode
26170 arm_preferred_simd_mode (machine_mode mode)
26171 {
26172   if (TARGET_NEON)
26173     switch (mode)
26174       {
26175       case SFmode:
26176         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26177       case SImode:
26178         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26179       case HImode:
26180         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26181       case QImode:
26182         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26183       case DImode:
26184         if (!TARGET_NEON_VECTORIZE_DOUBLE)
26185           return V2DImode;
26186         break;
26187
26188       default:;
26189       }
26190
26191   if (TARGET_REALLY_IWMMXT)
26192     switch (mode)
26193       {
26194       case SImode:
26195         return V2SImode;
26196       case HImode:
26197         return V4HImode;
26198       case QImode:
26199         return V8QImode;
26200
26201       default:;
26202       }
26203
26204   return word_mode;
26205 }
26206
26207 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26208
26209    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
26210    using r0-r4 for function arguments, r7 for the stack frame and don't have
26211    enough left over to do doubleword arithmetic.  For Thumb-2 all the
26212    potentially problematic instructions accept high registers so this is not
26213    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
26214    that require many low registers.  */
26215 static bool
26216 arm_class_likely_spilled_p (reg_class_t rclass)
26217 {
26218   if ((TARGET_THUMB1 && rclass == LO_REGS)
26219       || rclass  == CC_REG)
26220     return true;
26221
26222   return false;
26223 }
26224
26225 /* Implements target hook small_register_classes_for_mode_p.  */
26226 bool
26227 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26228 {
26229   return TARGET_THUMB1;
26230 }
26231
26232 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
26233    ARM insns and therefore guarantee that the shift count is modulo 256.
26234    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26235    guarantee no particular behavior for out-of-range counts.  */
26236
26237 static unsigned HOST_WIDE_INT
26238 arm_shift_truncation_mask (machine_mode mode)
26239 {
26240   return mode == SImode ? 255 : 0;
26241 }
26242
26243
26244 /* Map internal gcc register numbers to DWARF2 register numbers.  */
26245
26246 unsigned int
26247 arm_dbx_register_number (unsigned int regno)
26248 {
26249   if (regno < 16)
26250     return regno;
26251
26252   if (IS_VFP_REGNUM (regno))
26253     {
26254       /* See comment in arm_dwarf_register_span.  */
26255       if (VFP_REGNO_OK_FOR_SINGLE (regno))
26256         return 64 + regno - FIRST_VFP_REGNUM;
26257       else
26258         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26259     }
26260
26261   if (IS_IWMMXT_GR_REGNUM (regno))
26262     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26263
26264   if (IS_IWMMXT_REGNUM (regno))
26265     return 112 + regno - FIRST_IWMMXT_REGNUM;
26266
26267   gcc_unreachable ();
26268 }
26269
26270 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26271    GCC models tham as 64 32-bit registers, so we need to describe this to
26272    the DWARF generation code.  Other registers can use the default.  */
26273 static rtx
26274 arm_dwarf_register_span (rtx rtl)
26275 {
26276   machine_mode mode;
26277   unsigned regno;
26278   rtx parts[16];
26279   int nregs;
26280   int i;
26281
26282   regno = REGNO (rtl);
26283   if (!IS_VFP_REGNUM (regno))
26284     return NULL_RTX;
26285
26286   /* XXX FIXME: The EABI defines two VFP register ranges:
26287         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26288         256-287: D0-D31
26289      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26290      corresponding D register.  Until GDB supports this, we shall use the
26291      legacy encodings.  We also use these encodings for D0-D15 for
26292      compatibility with older debuggers.  */
26293   mode = GET_MODE (rtl);
26294   if (GET_MODE_SIZE (mode) < 8)
26295     return NULL_RTX;
26296
26297   if (VFP_REGNO_OK_FOR_SINGLE (regno))
26298     {
26299       nregs = GET_MODE_SIZE (mode) / 4;
26300       for (i = 0; i < nregs; i += 2)
26301         if (TARGET_BIG_END)
26302           {
26303             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26304             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26305           }
26306         else
26307           {
26308             parts[i] = gen_rtx_REG (SImode, regno + i);
26309             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26310           }
26311     }
26312   else
26313     {
26314       nregs = GET_MODE_SIZE (mode) / 8;
26315       for (i = 0; i < nregs; i++)
26316         parts[i] = gen_rtx_REG (DImode, regno + i);
26317     }
26318
26319   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26320 }
26321
26322 #if ARM_UNWIND_INFO
26323 /* Emit unwind directives for a store-multiple instruction or stack pointer
26324    push during alignment.
26325    These should only ever be generated by the function prologue code, so
26326    expect them to have a particular form.
26327    The store-multiple instruction sometimes pushes pc as the last register,
26328    although it should not be tracked into unwind information, or for -Os
26329    sometimes pushes some dummy registers before first register that needs
26330    to be tracked in unwind information; such dummy registers are there just
26331    to avoid separate stack adjustment, and will not be restored in the
26332    epilogue.  */
26333
26334 static void
26335 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26336 {
26337   int i;
26338   HOST_WIDE_INT offset;
26339   HOST_WIDE_INT nregs;
26340   int reg_size;
26341   unsigned reg;
26342   unsigned lastreg;
26343   unsigned padfirst = 0, padlast = 0;
26344   rtx e;
26345
26346   e = XVECEXP (p, 0, 0);
26347   gcc_assert (GET_CODE (e) == SET);
26348
26349   /* First insn will adjust the stack pointer.  */
26350   gcc_assert (GET_CODE (e) == SET
26351               && REG_P (SET_DEST (e))
26352               && REGNO (SET_DEST (e)) == SP_REGNUM
26353               && GET_CODE (SET_SRC (e)) == PLUS);
26354
26355   offset = -INTVAL (XEXP (SET_SRC (e), 1));
26356   nregs = XVECLEN (p, 0) - 1;
26357   gcc_assert (nregs);
26358
26359   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26360   if (reg < 16)
26361     {
26362       /* For -Os dummy registers can be pushed at the beginning to
26363          avoid separate stack pointer adjustment.  */
26364       e = XVECEXP (p, 0, 1);
26365       e = XEXP (SET_DEST (e), 0);
26366       if (GET_CODE (e) == PLUS)
26367         padfirst = INTVAL (XEXP (e, 1));
26368       gcc_assert (padfirst == 0 || optimize_size);
26369       /* The function prologue may also push pc, but not annotate it as it is
26370          never restored.  We turn this into a stack pointer adjustment.  */
26371       e = XVECEXP (p, 0, nregs);
26372       e = XEXP (SET_DEST (e), 0);
26373       if (GET_CODE (e) == PLUS)
26374         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26375       else
26376         padlast = offset - 4;
26377       gcc_assert (padlast == 0 || padlast == 4);
26378       if (padlast == 4)
26379         fprintf (asm_out_file, "\t.pad #4\n");
26380       reg_size = 4;
26381       fprintf (asm_out_file, "\t.save {");
26382     }
26383   else if (IS_VFP_REGNUM (reg))
26384     {
26385       reg_size = 8;
26386       fprintf (asm_out_file, "\t.vsave {");
26387     }
26388   else
26389     /* Unknown register type.  */
26390     gcc_unreachable ();
26391
26392   /* If the stack increment doesn't match the size of the saved registers,
26393      something has gone horribly wrong.  */
26394   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26395
26396   offset = padfirst;
26397   lastreg = 0;
26398   /* The remaining insns will describe the stores.  */
26399   for (i = 1; i <= nregs; i++)
26400     {
26401       /* Expect (set (mem <addr>) (reg)).
26402          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
26403       e = XVECEXP (p, 0, i);
26404       gcc_assert (GET_CODE (e) == SET
26405                   && MEM_P (SET_DEST (e))
26406                   && REG_P (SET_SRC (e)));
26407
26408       reg = REGNO (SET_SRC (e));
26409       gcc_assert (reg >= lastreg);
26410
26411       if (i != 1)
26412         fprintf (asm_out_file, ", ");
26413       /* We can't use %r for vfp because we need to use the
26414          double precision register names.  */
26415       if (IS_VFP_REGNUM (reg))
26416         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26417       else
26418         asm_fprintf (asm_out_file, "%r", reg);
26419
26420 #ifdef ENABLE_CHECKING
26421       /* Check that the addresses are consecutive.  */
26422       e = XEXP (SET_DEST (e), 0);
26423       if (GET_CODE (e) == PLUS)
26424         gcc_assert (REG_P (XEXP (e, 0))
26425                     && REGNO (XEXP (e, 0)) == SP_REGNUM
26426                     && CONST_INT_P (XEXP (e, 1))
26427                     && offset == INTVAL (XEXP (e, 1)));
26428       else
26429         gcc_assert (i == 1
26430                     && REG_P (e)
26431                     && REGNO (e) == SP_REGNUM);
26432       offset += reg_size;
26433 #endif
26434     }
26435   fprintf (asm_out_file, "}\n");
26436   if (padfirst)
26437     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26438 }
26439
26440 /*  Emit unwind directives for a SET.  */
26441
26442 static void
26443 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26444 {
26445   rtx e0;
26446   rtx e1;
26447   unsigned reg;
26448
26449   e0 = XEXP (p, 0);
26450   e1 = XEXP (p, 1);
26451   switch (GET_CODE (e0))
26452     {
26453     case MEM:
26454       /* Pushing a single register.  */
26455       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26456           || !REG_P (XEXP (XEXP (e0, 0), 0))
26457           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26458         abort ();
26459
26460       asm_fprintf (asm_out_file, "\t.save ");
26461       if (IS_VFP_REGNUM (REGNO (e1)))
26462         asm_fprintf(asm_out_file, "{d%d}\n",
26463                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26464       else
26465         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26466       break;
26467
26468     case REG:
26469       if (REGNO (e0) == SP_REGNUM)
26470         {
26471           /* A stack increment.  */
26472           if (GET_CODE (e1) != PLUS
26473               || !REG_P (XEXP (e1, 0))
26474               || REGNO (XEXP (e1, 0)) != SP_REGNUM
26475               || !CONST_INT_P (XEXP (e1, 1)))
26476             abort ();
26477
26478           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26479                        -INTVAL (XEXP (e1, 1)));
26480         }
26481       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26482         {
26483           HOST_WIDE_INT offset;
26484
26485           if (GET_CODE (e1) == PLUS)
26486             {
26487               if (!REG_P (XEXP (e1, 0))
26488                   || !CONST_INT_P (XEXP (e1, 1)))
26489                 abort ();
26490               reg = REGNO (XEXP (e1, 0));
26491               offset = INTVAL (XEXP (e1, 1));
26492               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26493                            HARD_FRAME_POINTER_REGNUM, reg,
26494                            offset);
26495             }
26496           else if (REG_P (e1))
26497             {
26498               reg = REGNO (e1);
26499               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26500                            HARD_FRAME_POINTER_REGNUM, reg);
26501             }
26502           else
26503             abort ();
26504         }
26505       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26506         {
26507           /* Move from sp to reg.  */
26508           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26509         }
26510      else if (GET_CODE (e1) == PLUS
26511               && REG_P (XEXP (e1, 0))
26512               && REGNO (XEXP (e1, 0)) == SP_REGNUM
26513               && CONST_INT_P (XEXP (e1, 1)))
26514         {
26515           /* Set reg to offset from sp.  */
26516           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26517                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26518         }
26519       else
26520         abort ();
26521       break;
26522
26523     default:
26524       abort ();
26525     }
26526 }
26527
26528
26529 /* Emit unwind directives for the given insn.  */
26530
26531 static void
26532 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26533 {
26534   rtx note, pat;
26535   bool handled_one = false;
26536
26537   if (arm_except_unwind_info (&global_options) != UI_TARGET)
26538     return;
26539
26540   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26541       && (TREE_NOTHROW (current_function_decl)
26542           || crtl->all_throwers_are_sibcalls))
26543     return;
26544
26545   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26546     return;
26547
26548   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26549     {
26550       switch (REG_NOTE_KIND (note))
26551         {
26552         case REG_FRAME_RELATED_EXPR:
26553           pat = XEXP (note, 0);
26554           goto found;
26555
26556         case REG_CFA_REGISTER:
26557           pat = XEXP (note, 0);
26558           if (pat == NULL)
26559             {
26560               pat = PATTERN (insn);
26561               if (GET_CODE (pat) == PARALLEL)
26562                 pat = XVECEXP (pat, 0, 0);
26563             }
26564
26565           /* Only emitted for IS_STACKALIGN re-alignment.  */
26566           {
26567             rtx dest, src;
26568             unsigned reg;
26569
26570             src = SET_SRC (pat);
26571             dest = SET_DEST (pat);
26572
26573             gcc_assert (src == stack_pointer_rtx);
26574             reg = REGNO (dest);
26575             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26576                          reg + 0x90, reg);
26577           }
26578           handled_one = true;
26579           break;
26580
26581         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
26582            to get correct dwarf information for shrink-wrap.  We should not
26583            emit unwind information for it because these are used either for
26584            pretend arguments or notes to adjust sp and restore registers from
26585            stack.  */
26586         case REG_CFA_DEF_CFA:
26587         case REG_CFA_ADJUST_CFA:
26588         case REG_CFA_RESTORE:
26589           return;
26590
26591         case REG_CFA_EXPRESSION:
26592         case REG_CFA_OFFSET:
26593           /* ??? Only handling here what we actually emit.  */
26594           gcc_unreachable ();
26595
26596         default:
26597           break;
26598         }
26599     }
26600   if (handled_one)
26601     return;
26602   pat = PATTERN (insn);
26603  found:
26604
26605   switch (GET_CODE (pat))
26606     {
26607     case SET:
26608       arm_unwind_emit_set (asm_out_file, pat);
26609       break;
26610
26611     case SEQUENCE:
26612       /* Store multiple.  */
26613       arm_unwind_emit_sequence (asm_out_file, pat);
26614       break;
26615
26616     default:
26617       abort();
26618     }
26619 }
26620
26621
26622 /* Output a reference from a function exception table to the type_info
26623    object X.  The EABI specifies that the symbol should be relocated by
26624    an R_ARM_TARGET2 relocation.  */
26625
26626 static bool
26627 arm_output_ttype (rtx x)
26628 {
26629   fputs ("\t.word\t", asm_out_file);
26630   output_addr_const (asm_out_file, x);
26631   /* Use special relocations for symbol references.  */
26632   if (!CONST_INT_P (x))
26633     fputs ("(TARGET2)", asm_out_file);
26634   fputc ('\n', asm_out_file);
26635
26636   return TRUE;
26637 }
26638
26639 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
26640
26641 static void
26642 arm_asm_emit_except_personality (rtx personality)
26643 {
26644   fputs ("\t.personality\t", asm_out_file);
26645   output_addr_const (asm_out_file, personality);
26646   fputc ('\n', asm_out_file);
26647 }
26648
26649 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
26650
26651 static void
26652 arm_asm_init_sections (void)
26653 {
26654   exception_section = get_unnamed_section (0, output_section_asm_op,
26655                                            "\t.handlerdata");
26656 }
26657 #endif /* ARM_UNWIND_INFO */
26658
26659 /* Output unwind directives for the start/end of a function.  */
26660
26661 void
26662 arm_output_fn_unwind (FILE * f, bool prologue)
26663 {
26664   if (arm_except_unwind_info (&global_options) != UI_TARGET)
26665     return;
26666
26667   if (prologue)
26668     fputs ("\t.fnstart\n", f);
26669   else
26670     {
26671       /* If this function will never be unwound, then mark it as such.
26672          The came condition is used in arm_unwind_emit to suppress
26673          the frame annotations.  */
26674       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26675           && (TREE_NOTHROW (current_function_decl)
26676               || crtl->all_throwers_are_sibcalls))
26677         fputs("\t.cantunwind\n", f);
26678
26679       fputs ("\t.fnend\n", f);
26680     }
26681 }
26682
26683 static bool
26684 arm_emit_tls_decoration (FILE *fp, rtx x)
26685 {
26686   enum tls_reloc reloc;
26687   rtx val;
26688
26689   val = XVECEXP (x, 0, 0);
26690   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26691
26692   output_addr_const (fp, val);
26693
26694   switch (reloc)
26695     {
26696     case TLS_GD32:
26697       fputs ("(tlsgd)", fp);
26698       break;
26699     case TLS_LDM32:
26700       fputs ("(tlsldm)", fp);
26701       break;
26702     case TLS_LDO32:
26703       fputs ("(tlsldo)", fp);
26704       break;
26705     case TLS_IE32:
26706       fputs ("(gottpoff)", fp);
26707       break;
26708     case TLS_LE32:
26709       fputs ("(tpoff)", fp);
26710       break;
26711     case TLS_DESCSEQ:
26712       fputs ("(tlsdesc)", fp);
26713       break;
26714     default:
26715       gcc_unreachable ();
26716     }
26717
26718   switch (reloc)
26719     {
26720     case TLS_GD32:
26721     case TLS_LDM32:
26722     case TLS_IE32:
26723     case TLS_DESCSEQ:
26724       fputs (" + (. - ", fp);
26725       output_addr_const (fp, XVECEXP (x, 0, 2));
26726       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26727       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26728       output_addr_const (fp, XVECEXP (x, 0, 3));
26729       fputc (')', fp);
26730       break;
26731     default:
26732       break;
26733     }
26734
26735   return TRUE;
26736 }
26737
26738 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
26739
26740 static void
26741 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26742 {
26743   gcc_assert (size == 4);
26744   fputs ("\t.word\t", file);
26745   output_addr_const (file, x);
26746   fputs ("(tlsldo)", file);
26747 }
26748
26749 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
26750
26751 static bool
26752 arm_output_addr_const_extra (FILE *fp, rtx x)
26753 {
26754   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26755     return arm_emit_tls_decoration (fp, x);
26756   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26757     {
26758       char label[256];
26759       int labelno = INTVAL (XVECEXP (x, 0, 0));
26760
26761       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26762       assemble_name_raw (fp, label);
26763
26764       return TRUE;
26765     }
26766   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26767     {
26768       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26769       if (GOT_PCREL)
26770         fputs ("+.", fp);
26771       fputs ("-(", fp);
26772       output_addr_const (fp, XVECEXP (x, 0, 0));
26773       fputc (')', fp);
26774       return TRUE;
26775     }
26776   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26777     {
26778       output_addr_const (fp, XVECEXP (x, 0, 0));
26779       if (GOT_PCREL)
26780         fputs ("+.", fp);
26781       fputs ("-(", fp);
26782       output_addr_const (fp, XVECEXP (x, 0, 1));
26783       fputc (')', fp);
26784       return TRUE;
26785     }
26786   else if (GET_CODE (x) == CONST_VECTOR)
26787     return arm_emit_vector_const (fp, x);
26788
26789   return FALSE;
26790 }
26791
26792 /* Output assembly for a shift instruction.
26793    SET_FLAGS determines how the instruction modifies the condition codes.
26794    0 - Do not set condition codes.
26795    1 - Set condition codes.
26796    2 - Use smallest instruction.  */
26797 const char *
26798 arm_output_shift(rtx * operands, int set_flags)
26799 {
26800   char pattern[100];
26801   static const char flag_chars[3] = {'?', '.', '!'};
26802   const char *shift;
26803   HOST_WIDE_INT val;
26804   char c;
26805
26806   c = flag_chars[set_flags];
26807   if (TARGET_UNIFIED_ASM)
26808     {
26809       shift = shift_op(operands[3], &val);
26810       if (shift)
26811         {
26812           if (val != -1)
26813             operands[2] = GEN_INT(val);
26814           sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26815         }
26816       else
26817         sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26818     }
26819   else
26820     sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26821   output_asm_insn (pattern, operands);
26822   return "";
26823 }
26824
26825 /* Output assembly for a WMMX immediate shift instruction.  */
26826 const char *
26827 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26828 {
26829   int shift = INTVAL (operands[2]);
26830   char templ[50];
26831   machine_mode opmode = GET_MODE (operands[0]);
26832
26833   gcc_assert (shift >= 0);
26834
26835   /* If the shift value in the register versions is > 63 (for D qualifier),
26836      31 (for W qualifier) or 15 (for H qualifier).  */
26837   if (((opmode == V4HImode) && (shift > 15))
26838         || ((opmode == V2SImode) && (shift > 31))
26839         || ((opmode == DImode) && (shift > 63)))
26840   {
26841     if (wror_or_wsra)
26842       {
26843         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26844         output_asm_insn (templ, operands);
26845         if (opmode == DImode)
26846           {
26847             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26848             output_asm_insn (templ, operands);
26849           }
26850       }
26851     else
26852       {
26853         /* The destination register will contain all zeros.  */
26854         sprintf (templ, "wzero\t%%0");
26855         output_asm_insn (templ, operands);
26856       }
26857     return "";
26858   }
26859
26860   if ((opmode == DImode) && (shift > 32))
26861     {
26862       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26863       output_asm_insn (templ, operands);
26864       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26865       output_asm_insn (templ, operands);
26866     }
26867   else
26868     {
26869       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26870       output_asm_insn (templ, operands);
26871     }
26872   return "";
26873 }
26874
26875 /* Output assembly for a WMMX tinsr instruction.  */
26876 const char *
26877 arm_output_iwmmxt_tinsr (rtx *operands)
26878 {
26879   int mask = INTVAL (operands[3]);
26880   int i;
26881   char templ[50];
26882   int units = mode_nunits[GET_MODE (operands[0])];
26883   gcc_assert ((mask & (mask - 1)) == 0);
26884   for (i = 0; i < units; ++i)
26885     {
26886       if ((mask & 0x01) == 1)
26887         {
26888           break;
26889         }
26890       mask >>= 1;
26891     }
26892   gcc_assert (i < units);
26893   {
26894     switch (GET_MODE (operands[0]))
26895       {
26896       case V8QImode:
26897         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26898         break;
26899       case V4HImode:
26900         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26901         break;
26902       case V2SImode:
26903         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26904         break;
26905       default:
26906         gcc_unreachable ();
26907         break;
26908       }
26909     output_asm_insn (templ, operands);
26910   }
26911   return "";
26912 }
26913
26914 /* Output a Thumb-1 casesi dispatch sequence.  */
26915 const char *
26916 thumb1_output_casesi (rtx *operands)
26917 {
26918   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
26919
26920   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26921
26922   switch (GET_MODE(diff_vec))
26923     {
26924     case QImode:
26925       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26926               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26927     case HImode:
26928       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26929               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26930     case SImode:
26931       return "bl\t%___gnu_thumb1_case_si";
26932     default:
26933       gcc_unreachable ();
26934     }
26935 }
26936
26937 /* Output a Thumb-2 casesi instruction.  */
26938 const char *
26939 thumb2_output_casesi (rtx *operands)
26940 {
26941   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
26942
26943   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26944
26945   output_asm_insn ("cmp\t%0, %1", operands);
26946   output_asm_insn ("bhi\t%l3", operands);
26947   switch (GET_MODE(diff_vec))
26948     {
26949     case QImode:
26950       return "tbb\t[%|pc, %0]";
26951     case HImode:
26952       return "tbh\t[%|pc, %0, lsl #1]";
26953     case SImode:
26954       if (flag_pic)
26955         {
26956           output_asm_insn ("adr\t%4, %l2", operands);
26957           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
26958           output_asm_insn ("add\t%4, %4, %5", operands);
26959           return "bx\t%4";
26960         }
26961       else
26962         {
26963           output_asm_insn ("adr\t%4, %l2", operands);
26964           return "ldr\t%|pc, [%4, %0, lsl #2]";
26965         }
26966     default:
26967       gcc_unreachable ();
26968     }
26969 }
26970
26971 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
26972    per-core tuning structs.  */
26973 static int
26974 arm_issue_rate (void)
26975 {
26976   return current_tune->issue_rate;
26977 }
26978
26979 /* Return how many instructions should scheduler lookahead to choose the
26980    best one.  */
26981 static int
26982 arm_first_cycle_multipass_dfa_lookahead (void)
26983 {
26984   int issue_rate = arm_issue_rate ();
26985
26986   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
26987 }
26988
26989 /* Enable modeling of L2 auto-prefetcher.  */
26990 static int
26991 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
26992 {
26993   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
26994 }
26995
26996 const char *
26997 arm_mangle_type (const_tree type)
26998 {
26999   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27000      has to be managled as if it is in the "std" namespace.  */
27001   if (TARGET_AAPCS_BASED
27002       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27003     return "St9__va_list";
27004
27005   /* Half-precision float.  */
27006   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27007     return "Dh";
27008
27009   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27010      builtin type.  */
27011   if (TYPE_NAME (type) != NULL)
27012     return arm_mangle_builtin_type (type);
27013
27014   /* Use the default mangling.  */
27015   return NULL;
27016 }
27017
27018 /* Order of allocation of core registers for Thumb: this allocation is
27019    written over the corresponding initial entries of the array
27020    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27021    first.  Saving and restoring a low register is usually cheaper than
27022    using a call-clobbered high register.  */
27023
27024 static const int thumb_core_reg_alloc_order[] =
27025 {
27026    3,  2,  1,  0,  4,  5,  6,  7,
27027   14, 12,  8,  9, 10, 11
27028 };
27029
27030 /* Adjust register allocation order when compiling for Thumb.  */
27031
27032 void
27033 arm_order_regs_for_local_alloc (void)
27034 {
27035   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27036   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27037   if (TARGET_THUMB)
27038     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27039             sizeof (thumb_core_reg_alloc_order));
27040 }
27041
27042 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27043
27044 bool
27045 arm_frame_pointer_required (void)
27046 {
27047   return (cfun->has_nonlocal_label
27048           || SUBTARGET_FRAME_POINTER_REQUIRED
27049           || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27050 }
27051
27052 /* Only thumb1 can't support conditional execution, so return true if
27053    the target is not thumb1.  */
27054 static bool
27055 arm_have_conditional_execution (void)
27056 {
27057   return !TARGET_THUMB1;
27058 }
27059
27060 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27061 static HOST_WIDE_INT
27062 arm_vector_alignment (const_tree type)
27063 {
27064   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27065
27066   if (TARGET_AAPCS_BASED)
27067     align = MIN (align, 64);
27068
27069   return align;
27070 }
27071
27072 static unsigned int
27073 arm_autovectorize_vector_sizes (void)
27074 {
27075   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27076 }
27077
27078 static bool
27079 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27080 {
27081   /* Vectors which aren't in packed structures will not be less aligned than
27082      the natural alignment of their element type, so this is safe.  */
27083   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27084     return !is_packed;
27085
27086   return default_builtin_vector_alignment_reachable (type, is_packed);
27087 }
27088
27089 static bool
27090 arm_builtin_support_vector_misalignment (machine_mode mode,
27091                                          const_tree type, int misalignment,
27092                                          bool is_packed)
27093 {
27094   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27095     {
27096       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27097
27098       if (is_packed)
27099         return align == 1;
27100
27101       /* If the misalignment is unknown, we should be able to handle the access
27102          so long as it is not to a member of a packed data structure.  */
27103       if (misalignment == -1)
27104         return true;
27105
27106       /* Return true if the misalignment is a multiple of the natural alignment
27107          of the vector's element type.  This is probably always going to be
27108          true in practice, since we've already established that this isn't a
27109          packed access.  */
27110       return ((misalignment % align) == 0);
27111     }
27112
27113   return default_builtin_support_vector_misalignment (mode, type, misalignment,
27114                                                       is_packed);
27115 }
27116
27117 static void
27118 arm_conditional_register_usage (void)
27119 {
27120   int regno;
27121
27122   if (TARGET_THUMB1 && optimize_size)
27123     {
27124       /* When optimizing for size on Thumb-1, it's better not
27125         to use the HI regs, because of the overhead of
27126         stacking them.  */
27127       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27128         fixed_regs[regno] = call_used_regs[regno] = 1;
27129     }
27130
27131   /* The link register can be clobbered by any branch insn,
27132      but we have no way to track that at present, so mark
27133      it as unavailable.  */
27134   if (TARGET_THUMB1)
27135     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27136
27137   if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27138     {
27139       /* VFPv3 registers are disabled when earlier VFP
27140          versions are selected due to the definition of
27141          LAST_VFP_REGNUM.  */
27142       for (regno = FIRST_VFP_REGNUM;
27143            regno <= LAST_VFP_REGNUM; ++ regno)
27144         {
27145           fixed_regs[regno] = 0;
27146           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27147             || regno >= FIRST_VFP_REGNUM + 32;
27148         }
27149     }
27150
27151   if (TARGET_REALLY_IWMMXT)
27152     {
27153       regno = FIRST_IWMMXT_GR_REGNUM;
27154       /* The 2002/10/09 revision of the XScale ABI has wCG0
27155          and wCG1 as call-preserved registers.  The 2002/11/21
27156          revision changed this so that all wCG registers are
27157          scratch registers.  */
27158       for (regno = FIRST_IWMMXT_GR_REGNUM;
27159            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27160         fixed_regs[regno] = 0;
27161       /* The XScale ABI has wR0 - wR9 as scratch registers,
27162          the rest as call-preserved registers.  */
27163       for (regno = FIRST_IWMMXT_REGNUM;
27164            regno <= LAST_IWMMXT_REGNUM; ++ regno)
27165         {
27166           fixed_regs[regno] = 0;
27167           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27168         }
27169     }
27170
27171   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27172     {
27173       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27174       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27175     }
27176   else if (TARGET_APCS_STACK)
27177     {
27178       fixed_regs[10]     = 1;
27179       call_used_regs[10] = 1;
27180     }
27181   /* -mcaller-super-interworking reserves r11 for calls to
27182      _interwork_r11_call_via_rN().  Making the register global
27183      is an easy way of ensuring that it remains valid for all
27184      calls.  */
27185   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27186       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27187     {
27188       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27189       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27190       if (TARGET_CALLER_INTERWORKING)
27191         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27192     }
27193   SUBTARGET_CONDITIONAL_REGISTER_USAGE
27194 }
27195
27196 static reg_class_t
27197 arm_preferred_rename_class (reg_class_t rclass)
27198 {
27199   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27200      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
27201      and code size can be reduced.  */
27202   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27203     return LO_REGS;
27204   else
27205     return NO_REGS;
27206 }
27207
27208 /* Compute the atrribute "length" of insn "*push_multi".
27209    So this function MUST be kept in sync with that insn pattern.  */
27210 int
27211 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27212 {
27213   int i, regno, hi_reg;
27214   int num_saves = XVECLEN (parallel_op, 0);
27215
27216   /* ARM mode.  */
27217   if (TARGET_ARM)
27218     return 4;
27219   /* Thumb1 mode.  */
27220   if (TARGET_THUMB1)
27221     return 2;
27222
27223   /* Thumb2 mode.  */
27224   regno = REGNO (first_op);
27225   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27226   for (i = 1; i < num_saves && !hi_reg; i++)
27227     {
27228       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27229       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27230     }
27231
27232   if (!hi_reg)
27233     return 2;
27234   return 4;
27235 }
27236
27237 /* Compute the number of instructions emitted by output_move_double.  */
27238 int
27239 arm_count_output_move_double_insns (rtx *operands)
27240 {
27241   int count;
27242   rtx ops[2];
27243   /* output_move_double may modify the operands array, so call it
27244      here on a copy of the array.  */
27245   ops[0] = operands[0];
27246   ops[1] = operands[1];
27247   output_move_double (ops, false, &count);
27248   return count;
27249 }
27250
27251 int
27252 vfp3_const_double_for_fract_bits (rtx operand)
27253 {
27254   REAL_VALUE_TYPE r0;
27255
27256   if (!CONST_DOUBLE_P (operand))
27257     return 0;
27258
27259   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27260   if (exact_real_inverse (DFmode, &r0))
27261     {
27262       if (exact_real_truncate (DFmode, &r0))
27263         {
27264           HOST_WIDE_INT value = real_to_integer (&r0);
27265           value = value & 0xffffffff;
27266           if ((value != 0) && ( (value & (value - 1)) == 0))
27267             return int_log2 (value);
27268         }
27269     }
27270   return 0;
27271 }
27272
27273 int
27274 vfp3_const_double_for_bits (rtx operand)
27275 {
27276   REAL_VALUE_TYPE r0;
27277
27278   if (!CONST_DOUBLE_P (operand))
27279     return 0;
27280
27281   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27282   if (exact_real_truncate (DFmode, &r0))
27283     {
27284       HOST_WIDE_INT value = real_to_integer (&r0);
27285       value = value & 0xffffffff;
27286       if ((value != 0) && ( (value & (value - 1)) == 0))
27287         return int_log2 (value);
27288     }
27289
27290   return 0;
27291 }
27292 \f
27293 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
27294
27295 static void
27296 arm_pre_atomic_barrier (enum memmodel model)
27297 {
27298   if (need_atomic_barrier_p (model, true))
27299     emit_insn (gen_memory_barrier ());
27300 }
27301
27302 static void
27303 arm_post_atomic_barrier (enum memmodel model)
27304 {
27305   if (need_atomic_barrier_p (model, false))
27306     emit_insn (gen_memory_barrier ());
27307 }
27308
27309 /* Emit the load-exclusive and store-exclusive instructions.
27310    Use acquire and release versions if necessary.  */
27311
27312 static void
27313 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27314 {
27315   rtx (*gen) (rtx, rtx);
27316
27317   if (acq)
27318     {
27319       switch (mode)
27320         {
27321         case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27322         case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27323         case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27324         case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27325         default:
27326           gcc_unreachable ();
27327         }
27328     }
27329   else
27330     {
27331       switch (mode)
27332         {
27333         case QImode: gen = gen_arm_load_exclusiveqi; break;
27334         case HImode: gen = gen_arm_load_exclusivehi; break;
27335         case SImode: gen = gen_arm_load_exclusivesi; break;
27336         case DImode: gen = gen_arm_load_exclusivedi; break;
27337         default:
27338           gcc_unreachable ();
27339         }
27340     }
27341
27342   emit_insn (gen (rval, mem));
27343 }
27344
27345 static void
27346 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27347                           rtx mem, bool rel)
27348 {
27349   rtx (*gen) (rtx, rtx, rtx);
27350
27351   if (rel)
27352     {
27353       switch (mode)
27354         {
27355         case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27356         case HImode: gen = gen_arm_store_release_exclusivehi; break;
27357         case SImode: gen = gen_arm_store_release_exclusivesi; break;
27358         case DImode: gen = gen_arm_store_release_exclusivedi; break;
27359         default:
27360           gcc_unreachable ();
27361         }
27362     }
27363   else
27364     {
27365       switch (mode)
27366         {
27367         case QImode: gen = gen_arm_store_exclusiveqi; break;
27368         case HImode: gen = gen_arm_store_exclusivehi; break;
27369         case SImode: gen = gen_arm_store_exclusivesi; break;
27370         case DImode: gen = gen_arm_store_exclusivedi; break;
27371         default:
27372           gcc_unreachable ();
27373         }
27374     }
27375
27376   emit_insn (gen (bval, rval, mem));
27377 }
27378
27379 /* Mark the previous jump instruction as unlikely.  */
27380
27381 static void
27382 emit_unlikely_jump (rtx insn)
27383 {
27384   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27385
27386   insn = emit_jump_insn (insn);
27387   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27388 }
27389
27390 /* Expand a compare and swap pattern.  */
27391
27392 void
27393 arm_expand_compare_and_swap (rtx operands[])
27394 {
27395   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27396   machine_mode mode;
27397   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27398
27399   bval = operands[0];
27400   rval = operands[1];
27401   mem = operands[2];
27402   oldval = operands[3];
27403   newval = operands[4];
27404   is_weak = operands[5];
27405   mod_s = operands[6];
27406   mod_f = operands[7];
27407   mode = GET_MODE (mem);
27408
27409   /* Normally the succ memory model must be stronger than fail, but in the
27410      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27411      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
27412
27413   if (TARGET_HAVE_LDACQ
27414       && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27415       && INTVAL (mod_s) == MEMMODEL_RELEASE)
27416     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27417
27418   switch (mode)
27419     {
27420     case QImode:
27421     case HImode:
27422       /* For narrow modes, we're going to perform the comparison in SImode,
27423          so do the zero-extension now.  */
27424       rval = gen_reg_rtx (SImode);
27425       oldval = convert_modes (SImode, mode, oldval, true);
27426       /* FALLTHRU */
27427
27428     case SImode:
27429       /* Force the value into a register if needed.  We waited until after
27430          the zero-extension above to do this properly.  */
27431       if (!arm_add_operand (oldval, SImode))
27432         oldval = force_reg (SImode, oldval);
27433       break;
27434
27435     case DImode:
27436       if (!cmpdi_operand (oldval, mode))
27437         oldval = force_reg (mode, oldval);
27438       break;
27439
27440     default:
27441       gcc_unreachable ();
27442     }
27443
27444   switch (mode)
27445     {
27446     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27447     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27448     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27449     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27450     default:
27451       gcc_unreachable ();
27452     }
27453
27454   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27455
27456   if (mode == QImode || mode == HImode)
27457     emit_move_insn (operands[1], gen_lowpart (mode, rval));
27458
27459   /* In all cases, we arrange for success to be signaled by Z set.
27460      This arrangement allows for the boolean result to be used directly
27461      in a subsequent branch, post optimization.  */
27462   x = gen_rtx_REG (CCmode, CC_REGNUM);
27463   x = gen_rtx_EQ (SImode, x, const0_rtx);
27464   emit_insn (gen_rtx_SET (bval, x));
27465 }
27466
27467 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
27468    another memory store between the load-exclusive and store-exclusive can
27469    reset the monitor from Exclusive to Open state.  This means we must wait
27470    until after reload to split the pattern, lest we get a register spill in
27471    the middle of the atomic sequence.  */
27472
27473 void
27474 arm_split_compare_and_swap (rtx operands[])
27475 {
27476   rtx rval, mem, oldval, newval, scratch;
27477   machine_mode mode;
27478   enum memmodel mod_s, mod_f;
27479   bool is_weak;
27480   rtx_code_label *label1, *label2;
27481   rtx x, cond;
27482
27483   rval = operands[0];
27484   mem = operands[1];
27485   oldval = operands[2];
27486   newval = operands[3];
27487   is_weak = (operands[4] != const0_rtx);
27488   mod_s = (enum memmodel) INTVAL (operands[5]);
27489   mod_f = (enum memmodel) INTVAL (operands[6]);
27490   scratch = operands[7];
27491   mode = GET_MODE (mem);
27492
27493   bool use_acquire = TARGET_HAVE_LDACQ
27494                      && !(mod_s == MEMMODEL_RELAXED
27495                           || mod_s == MEMMODEL_CONSUME
27496                           || mod_s == MEMMODEL_RELEASE);
27497
27498   bool use_release = TARGET_HAVE_LDACQ
27499                      && !(mod_s == MEMMODEL_RELAXED
27500                           || mod_s == MEMMODEL_CONSUME
27501                           || mod_s == MEMMODEL_ACQUIRE);
27502
27503   /* Checks whether a barrier is needed and emits one accordingly.  */
27504   if (!(use_acquire || use_release))
27505     arm_pre_atomic_barrier (mod_s);
27506
27507   label1 = NULL;
27508   if (!is_weak)
27509     {
27510       label1 = gen_label_rtx ();
27511       emit_label (label1);
27512     }
27513   label2 = gen_label_rtx ();
27514
27515   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27516
27517   cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27518   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27519   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27520                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27521   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27522
27523   arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27524
27525   /* Weak or strong, we want EQ to be true for success, so that we
27526      match the flags that we got from the compare above.  */
27527   cond = gen_rtx_REG (CCmode, CC_REGNUM);
27528   x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27529   emit_insn (gen_rtx_SET (cond, x));
27530
27531   if (!is_weak)
27532     {
27533       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27534       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27535                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27536       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27537     }
27538
27539   if (mod_f != MEMMODEL_RELAXED)
27540     emit_label (label2);
27541
27542   /* Checks whether a barrier is needed and emits one accordingly.  */
27543   if (!(use_acquire || use_release))
27544     arm_post_atomic_barrier (mod_s);
27545
27546   if (mod_f == MEMMODEL_RELAXED)
27547     emit_label (label2);
27548 }
27549
27550 void
27551 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27552                      rtx value, rtx model_rtx, rtx cond)
27553 {
27554   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27555   machine_mode mode = GET_MODE (mem);
27556   machine_mode wmode = (mode == DImode ? DImode : SImode);
27557   rtx_code_label *label;
27558   rtx x;
27559
27560   bool use_acquire = TARGET_HAVE_LDACQ
27561                      && !(model == MEMMODEL_RELAXED
27562                           || model == MEMMODEL_CONSUME
27563                           || model == MEMMODEL_RELEASE);
27564
27565   bool use_release = TARGET_HAVE_LDACQ
27566                      && !(model == MEMMODEL_RELAXED
27567                           || model == MEMMODEL_CONSUME
27568                           || model == MEMMODEL_ACQUIRE);
27569
27570   /* Checks whether a barrier is needed and emits one accordingly.  */
27571   if (!(use_acquire || use_release))
27572     arm_pre_atomic_barrier (model);
27573
27574   label = gen_label_rtx ();
27575   emit_label (label);
27576
27577   if (new_out)
27578     new_out = gen_lowpart (wmode, new_out);
27579   if (old_out)
27580     old_out = gen_lowpart (wmode, old_out);
27581   else
27582     old_out = new_out;
27583   value = simplify_gen_subreg (wmode, value, mode, 0);
27584
27585   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27586
27587   switch (code)
27588     {
27589     case SET:
27590       new_out = value;
27591       break;
27592
27593     case NOT:
27594       x = gen_rtx_AND (wmode, old_out, value);
27595       emit_insn (gen_rtx_SET (new_out, x));
27596       x = gen_rtx_NOT (wmode, new_out);
27597       emit_insn (gen_rtx_SET (new_out, x));
27598       break;
27599
27600     case MINUS:
27601       if (CONST_INT_P (value))
27602         {
27603           value = GEN_INT (-INTVAL (value));
27604           code = PLUS;
27605         }
27606       /* FALLTHRU */
27607
27608     case PLUS:
27609       if (mode == DImode)
27610         {
27611           /* DImode plus/minus need to clobber flags.  */
27612           /* The adddi3 and subdi3 patterns are incorrectly written so that
27613              they require matching operands, even when we could easily support
27614              three operands.  Thankfully, this can be fixed up post-splitting,
27615              as the individual add+adc patterns do accept three operands and
27616              post-reload cprop can make these moves go away.  */
27617           emit_move_insn (new_out, old_out);
27618           if (code == PLUS)
27619             x = gen_adddi3 (new_out, new_out, value);
27620           else
27621             x = gen_subdi3 (new_out, new_out, value);
27622           emit_insn (x);
27623           break;
27624         }
27625       /* FALLTHRU */
27626
27627     default:
27628       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27629       emit_insn (gen_rtx_SET (new_out, x));
27630       break;
27631     }
27632
27633   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27634                             use_release);
27635
27636   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27637   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27638
27639   /* Checks whether a barrier is needed and emits one accordingly.  */
27640   if (!(use_acquire || use_release))
27641     arm_post_atomic_barrier (model);
27642 }
27643 \f
27644 #define MAX_VECT_LEN 16
27645
27646 struct expand_vec_perm_d
27647 {
27648   rtx target, op0, op1;
27649   unsigned char perm[MAX_VECT_LEN];
27650   machine_mode vmode;
27651   unsigned char nelt;
27652   bool one_vector_p;
27653   bool testing_p;
27654 };
27655
27656 /* Generate a variable permutation.  */
27657
27658 static void
27659 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27660 {
27661   machine_mode vmode = GET_MODE (target);
27662   bool one_vector_p = rtx_equal_p (op0, op1);
27663
27664   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27665   gcc_checking_assert (GET_MODE (op0) == vmode);
27666   gcc_checking_assert (GET_MODE (op1) == vmode);
27667   gcc_checking_assert (GET_MODE (sel) == vmode);
27668   gcc_checking_assert (TARGET_NEON);
27669
27670   if (one_vector_p)
27671     {
27672       if (vmode == V8QImode)
27673         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27674       else
27675         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27676     }
27677   else
27678     {
27679       rtx pair;
27680
27681       if (vmode == V8QImode)
27682         {
27683           pair = gen_reg_rtx (V16QImode);
27684           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27685           pair = gen_lowpart (TImode, pair);
27686           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27687         }
27688       else
27689         {
27690           pair = gen_reg_rtx (OImode);
27691           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27692           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27693         }
27694     }
27695 }
27696
27697 void
27698 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27699 {
27700   machine_mode vmode = GET_MODE (target);
27701   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27702   bool one_vector_p = rtx_equal_p (op0, op1);
27703   rtx rmask[MAX_VECT_LEN], mask;
27704
27705   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
27706      numbering of elements for big-endian, we must reverse the order.  */
27707   gcc_checking_assert (!BYTES_BIG_ENDIAN);
27708
27709   /* The VTBL instruction does not use a modulo index, so we must take care
27710      of that ourselves.  */
27711   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27712   for (i = 0; i < nelt; ++i)
27713     rmask[i] = mask;
27714   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27715   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27716
27717   arm_expand_vec_perm_1 (target, op0, op1, sel);
27718 }
27719
27720 /* Generate or test for an insn that supports a constant permutation.  */
27721
27722 /* Recognize patterns for the VUZP insns.  */
27723
27724 static bool
27725 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27726 {
27727   unsigned int i, odd, mask, nelt = d->nelt;
27728   rtx out0, out1, in0, in1, x;
27729   rtx (*gen)(rtx, rtx, rtx, rtx);
27730
27731   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27732     return false;
27733
27734   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
27735   if (d->perm[0] == 0)
27736     odd = 0;
27737   else if (d->perm[0] == 1)
27738     odd = 1;
27739   else
27740     return false;
27741   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27742
27743   for (i = 0; i < nelt; i++)
27744     {
27745       unsigned elt = (i * 2 + odd) & mask;
27746       if (d->perm[i] != elt)
27747         return false;
27748     }
27749
27750   /* Success!  */
27751   if (d->testing_p)
27752     return true;
27753
27754   switch (d->vmode)
27755     {
27756     case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27757     case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
27758     case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
27759     case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
27760     case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
27761     case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
27762     case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
27763     case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
27764     default:
27765       gcc_unreachable ();
27766     }
27767
27768   in0 = d->op0;
27769   in1 = d->op1;
27770   if (BYTES_BIG_ENDIAN)
27771     {
27772       x = in0, in0 = in1, in1 = x;
27773       odd = !odd;
27774     }
27775
27776   out0 = d->target;
27777   out1 = gen_reg_rtx (d->vmode);
27778   if (odd)
27779     x = out0, out0 = out1, out1 = x;
27780
27781   emit_insn (gen (out0, in0, in1, out1));
27782   return true;
27783 }
27784
27785 /* Recognize patterns for the VZIP insns.  */
27786
27787 static bool
27788 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27789 {
27790   unsigned int i, high, mask, nelt = d->nelt;
27791   rtx out0, out1, in0, in1, x;
27792   rtx (*gen)(rtx, rtx, rtx, rtx);
27793
27794   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27795     return false;
27796
27797   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
27798   high = nelt / 2;
27799   if (d->perm[0] == high)
27800     ;
27801   else if (d->perm[0] == 0)
27802     high = 0;
27803   else
27804     return false;
27805   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27806
27807   for (i = 0; i < nelt / 2; i++)
27808     {
27809       unsigned elt = (i + high) & mask;
27810       if (d->perm[i * 2] != elt)
27811         return false;
27812       elt = (elt + nelt) & mask;
27813       if (d->perm[i * 2 + 1] != elt)
27814         return false;
27815     }
27816
27817   /* Success!  */
27818   if (d->testing_p)
27819     return true;
27820
27821   switch (d->vmode)
27822     {
27823     case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27824     case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
27825     case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
27826     case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
27827     case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
27828     case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
27829     case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
27830     case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
27831     default:
27832       gcc_unreachable ();
27833     }
27834
27835   in0 = d->op0;
27836   in1 = d->op1;
27837   if (BYTES_BIG_ENDIAN)
27838     {
27839       x = in0, in0 = in1, in1 = x;
27840       high = !high;
27841     }
27842
27843   out0 = d->target;
27844   out1 = gen_reg_rtx (d->vmode);
27845   if (high)
27846     x = out0, out0 = out1, out1 = x;
27847
27848   emit_insn (gen (out0, in0, in1, out1));
27849   return true;
27850 }
27851
27852 /* Recognize patterns for the VREV insns.  */
27853
27854 static bool
27855 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27856 {
27857   unsigned int i, j, diff, nelt = d->nelt;
27858   rtx (*gen)(rtx, rtx);
27859
27860   if (!d->one_vector_p)
27861     return false;
27862
27863   diff = d->perm[0];
27864   switch (diff)
27865     {
27866     case 7:
27867       switch (d->vmode)
27868         {
27869         case V16QImode: gen = gen_neon_vrev64v16qi; break;
27870         case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
27871         default:
27872           return false;
27873         }
27874       break;
27875     case 3:
27876       switch (d->vmode)
27877         {
27878         case V16QImode: gen = gen_neon_vrev32v16qi; break;
27879         case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
27880         case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
27881         case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
27882         default:
27883           return false;
27884         }
27885       break;
27886     case 1:
27887       switch (d->vmode)
27888         {
27889         case V16QImode: gen = gen_neon_vrev16v16qi; break;
27890         case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
27891         case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
27892         case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
27893         case V4SImode:  gen = gen_neon_vrev64v4si;  break;
27894         case V2SImode:  gen = gen_neon_vrev64v2si;  break;
27895         case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
27896         case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
27897         default:
27898           return false;
27899         }
27900       break;
27901     default:
27902       return false;
27903     }
27904
27905   for (i = 0; i < nelt ; i += diff + 1)
27906     for (j = 0; j <= diff; j += 1)
27907       {
27908         /* This is guaranteed to be true as the value of diff
27909            is 7, 3, 1 and we should have enough elements in the
27910            queue to generate this. Getting a vector mask with a
27911            value of diff other than these values implies that
27912            something is wrong by the time we get here.  */
27913         gcc_assert (i + j < nelt);
27914         if (d->perm[i + j] != i + diff - j)
27915           return false;
27916       }
27917
27918   /* Success! */
27919   if (d->testing_p)
27920     return true;
27921
27922   emit_insn (gen (d->target, d->op0));
27923   return true;
27924 }
27925
27926 /* Recognize patterns for the VTRN insns.  */
27927
27928 static bool
27929 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27930 {
27931   unsigned int i, odd, mask, nelt = d->nelt;
27932   rtx out0, out1, in0, in1, x;
27933   rtx (*gen)(rtx, rtx, rtx, rtx);
27934
27935   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27936     return false;
27937
27938   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
27939   if (d->perm[0] == 0)
27940     odd = 0;
27941   else if (d->perm[0] == 1)
27942     odd = 1;
27943   else
27944     return false;
27945   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27946
27947   for (i = 0; i < nelt; i += 2)
27948     {
27949       if (d->perm[i] != i + odd)
27950         return false;
27951       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
27952         return false;
27953     }
27954
27955   /* Success!  */
27956   if (d->testing_p)
27957     return true;
27958
27959   switch (d->vmode)
27960     {
27961     case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
27962     case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
27963     case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
27964     case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
27965     case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
27966     case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
27967     case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
27968     case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
27969     default:
27970       gcc_unreachable ();
27971     }
27972
27973   in0 = d->op0;
27974   in1 = d->op1;
27975   if (BYTES_BIG_ENDIAN)
27976     {
27977       x = in0, in0 = in1, in1 = x;
27978       odd = !odd;
27979     }
27980
27981   out0 = d->target;
27982   out1 = gen_reg_rtx (d->vmode);
27983   if (odd)
27984     x = out0, out0 = out1, out1 = x;
27985
27986   emit_insn (gen (out0, in0, in1, out1));
27987   return true;
27988 }
27989
27990 /* Recognize patterns for the VEXT insns.  */
27991
27992 static bool
27993 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
27994 {
27995   unsigned int i, nelt = d->nelt;
27996   rtx (*gen) (rtx, rtx, rtx, rtx);
27997   rtx offset;
27998
27999   unsigned int location;
28000
28001   unsigned int next  = d->perm[0] + 1;
28002
28003   /* TODO: Handle GCC's numbering of elements for big-endian.  */
28004   if (BYTES_BIG_ENDIAN)
28005     return false;
28006
28007   /* Check if the extracted indexes are increasing by one.  */
28008   for (i = 1; i < nelt; next++, i++)
28009     {
28010       /* If we hit the most significant element of the 2nd vector in
28011          the previous iteration, no need to test further.  */
28012       if (next == 2 * nelt)
28013         return false;
28014
28015       /* If we are operating on only one vector: it could be a
28016          rotation.  If there are only two elements of size < 64, let
28017          arm_evpc_neon_vrev catch it.  */
28018       if (d->one_vector_p && (next == nelt))
28019         {
28020           if ((nelt == 2) && (d->vmode != V2DImode))
28021             return false;
28022           else
28023             next = 0;
28024         }
28025
28026       if (d->perm[i] != next)
28027         return false;
28028     }
28029
28030   location = d->perm[0];
28031
28032   switch (d->vmode)
28033     {
28034     case V16QImode: gen = gen_neon_vextv16qi; break;
28035     case V8QImode: gen = gen_neon_vextv8qi; break;
28036     case V4HImode: gen = gen_neon_vextv4hi; break;
28037     case V8HImode: gen = gen_neon_vextv8hi; break;
28038     case V2SImode: gen = gen_neon_vextv2si; break;
28039     case V4SImode: gen = gen_neon_vextv4si; break;
28040     case V2SFmode: gen = gen_neon_vextv2sf; break;
28041     case V4SFmode: gen = gen_neon_vextv4sf; break;
28042     case V2DImode: gen = gen_neon_vextv2di; break;
28043     default:
28044       return false;
28045     }
28046
28047   /* Success! */
28048   if (d->testing_p)
28049     return true;
28050
28051   offset = GEN_INT (location);
28052   emit_insn (gen (d->target, d->op0, d->op1, offset));
28053   return true;
28054 }
28055
28056 /* The NEON VTBL instruction is a fully variable permuation that's even
28057    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
28058    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
28059    can do slightly better by expanding this as a constant where we don't
28060    have to apply a mask.  */
28061
28062 static bool
28063 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28064 {
28065   rtx rperm[MAX_VECT_LEN], sel;
28066   machine_mode vmode = d->vmode;
28067   unsigned int i, nelt = d->nelt;
28068
28069   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28070      numbering of elements for big-endian, we must reverse the order.  */
28071   if (BYTES_BIG_ENDIAN)
28072     return false;
28073
28074   if (d->testing_p)
28075     return true;
28076
28077   /* Generic code will try constant permutation twice.  Once with the
28078      original mode and again with the elements lowered to QImode.
28079      So wait and don't do the selector expansion ourselves.  */
28080   if (vmode != V8QImode && vmode != V16QImode)
28081     return false;
28082
28083   for (i = 0; i < nelt; ++i)
28084     rperm[i] = GEN_INT (d->perm[i]);
28085   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28086   sel = force_reg (vmode, sel);
28087
28088   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28089   return true;
28090 }
28091
28092 static bool
28093 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28094 {
28095   /* Check if the input mask matches vext before reordering the
28096      operands.  */
28097   if (TARGET_NEON)
28098     if (arm_evpc_neon_vext (d))
28099       return true;
28100
28101   /* The pattern matching functions above are written to look for a small
28102      number to begin the sequence (0, 1, N/2).  If we begin with an index
28103      from the second operand, we can swap the operands.  */
28104   if (d->perm[0] >= d->nelt)
28105     {
28106       unsigned i, nelt = d->nelt;
28107       rtx x;
28108
28109       for (i = 0; i < nelt; ++i)
28110         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28111
28112       x = d->op0;
28113       d->op0 = d->op1;
28114       d->op1 = x;
28115     }
28116
28117   if (TARGET_NEON)
28118     {
28119       if (arm_evpc_neon_vuzp (d))
28120         return true;
28121       if (arm_evpc_neon_vzip (d))
28122         return true;
28123       if (arm_evpc_neon_vrev (d))
28124         return true;
28125       if (arm_evpc_neon_vtrn (d))
28126         return true;
28127       return arm_evpc_neon_vtbl (d);
28128     }
28129   return false;
28130 }
28131
28132 /* Expand a vec_perm_const pattern.  */
28133
28134 bool
28135 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28136 {
28137   struct expand_vec_perm_d d;
28138   int i, nelt, which;
28139
28140   d.target = target;
28141   d.op0 = op0;
28142   d.op1 = op1;
28143
28144   d.vmode = GET_MODE (target);
28145   gcc_assert (VECTOR_MODE_P (d.vmode));
28146   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28147   d.testing_p = false;
28148
28149   for (i = which = 0; i < nelt; ++i)
28150     {
28151       rtx e = XVECEXP (sel, 0, i);
28152       int ei = INTVAL (e) & (2 * nelt - 1);
28153       which |= (ei < nelt ? 1 : 2);
28154       d.perm[i] = ei;
28155     }
28156
28157   switch (which)
28158     {
28159     default:
28160       gcc_unreachable();
28161
28162     case 3:
28163       d.one_vector_p = false;
28164       if (!rtx_equal_p (op0, op1))
28165         break;
28166
28167       /* The elements of PERM do not suggest that only the first operand
28168          is used, but both operands are identical.  Allow easier matching
28169          of the permutation by folding the permutation into the single
28170          input vector.  */
28171       /* FALLTHRU */
28172     case 2:
28173       for (i = 0; i < nelt; ++i)
28174         d.perm[i] &= nelt - 1;
28175       d.op0 = op1;
28176       d.one_vector_p = true;
28177       break;
28178
28179     case 1:
28180       d.op1 = op0;
28181       d.one_vector_p = true;
28182       break;
28183     }
28184
28185   return arm_expand_vec_perm_const_1 (&d);
28186 }
28187
28188 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
28189
28190 static bool
28191 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28192                                  const unsigned char *sel)
28193 {
28194   struct expand_vec_perm_d d;
28195   unsigned int i, nelt, which;
28196   bool ret;
28197
28198   d.vmode = vmode;
28199   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28200   d.testing_p = true;
28201   memcpy (d.perm, sel, nelt);
28202
28203   /* Categorize the set of elements in the selector.  */
28204   for (i = which = 0; i < nelt; ++i)
28205     {
28206       unsigned char e = d.perm[i];
28207       gcc_assert (e < 2 * nelt);
28208       which |= (e < nelt ? 1 : 2);
28209     }
28210
28211   /* For all elements from second vector, fold the elements to first.  */
28212   if (which == 2)
28213     for (i = 0; i < nelt; ++i)
28214       d.perm[i] -= nelt;
28215
28216   /* Check whether the mask can be applied to the vector type.  */
28217   d.one_vector_p = (which != 3);
28218
28219   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28220   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28221   if (!d.one_vector_p)
28222     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28223
28224   start_sequence ();
28225   ret = arm_expand_vec_perm_const_1 (&d);
28226   end_sequence ();
28227
28228   return ret;
28229 }
28230
28231 bool
28232 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28233 {
28234   /* If we are soft float and we do not have ldrd
28235      then all auto increment forms are ok.  */
28236   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28237     return true;
28238
28239   switch (code)
28240     {
28241       /* Post increment and Pre Decrement are supported for all
28242          instruction forms except for vector forms.  */
28243     case ARM_POST_INC:
28244     case ARM_PRE_DEC:
28245       if (VECTOR_MODE_P (mode))
28246         {
28247           if (code != ARM_PRE_DEC)
28248             return true;
28249           else
28250             return false;
28251         }
28252
28253       return true;
28254
28255     case ARM_POST_DEC:
28256     case ARM_PRE_INC:
28257       /* Without LDRD and mode size greater than
28258          word size, there is no point in auto-incrementing
28259          because ldm and stm will not have these forms.  */
28260       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28261         return false;
28262
28263       /* Vector and floating point modes do not support
28264          these auto increment forms.  */
28265       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28266         return false;
28267
28268       return true;
28269
28270     default:
28271       return false;
28272
28273     }
28274
28275   return false;
28276 }
28277
28278 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28279    on ARM, since we know that shifts by negative amounts are no-ops.
28280    Additionally, the default expansion code is not available or suitable
28281    for post-reload insn splits (this can occur when the register allocator
28282    chooses not to do a shift in NEON).
28283
28284    This function is used in both initial expand and post-reload splits, and
28285    handles all kinds of 64-bit shifts.
28286
28287    Input requirements:
28288     - It is safe for the input and output to be the same register, but
28289       early-clobber rules apply for the shift amount and scratch registers.
28290     - Shift by register requires both scratch registers.  In all other cases
28291       the scratch registers may be NULL.
28292     - Ashiftrt by a register also clobbers the CC register.  */
28293 void
28294 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28295                                rtx amount, rtx scratch1, rtx scratch2)
28296 {
28297   rtx out_high = gen_highpart (SImode, out);
28298   rtx out_low = gen_lowpart (SImode, out);
28299   rtx in_high = gen_highpart (SImode, in);
28300   rtx in_low = gen_lowpart (SImode, in);
28301
28302   /* Terminology:
28303         in = the register pair containing the input value.
28304         out = the destination register pair.
28305         up = the high- or low-part of each pair.
28306         down = the opposite part to "up".
28307      In a shift, we can consider bits to shift from "up"-stream to
28308      "down"-stream, so in a left-shift "up" is the low-part and "down"
28309      is the high-part of each register pair.  */
28310
28311   rtx out_up   = code == ASHIFT ? out_low : out_high;
28312   rtx out_down = code == ASHIFT ? out_high : out_low;
28313   rtx in_up   = code == ASHIFT ? in_low : in_high;
28314   rtx in_down = code == ASHIFT ? in_high : in_low;
28315
28316   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28317   gcc_assert (out
28318               && (REG_P (out) || GET_CODE (out) == SUBREG)
28319               && GET_MODE (out) == DImode);
28320   gcc_assert (in
28321               && (REG_P (in) || GET_CODE (in) == SUBREG)
28322               && GET_MODE (in) == DImode);
28323   gcc_assert (amount
28324               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28325                    && GET_MODE (amount) == SImode)
28326                   || CONST_INT_P (amount)));
28327   gcc_assert (scratch1 == NULL
28328               || (GET_CODE (scratch1) == SCRATCH)
28329               || (GET_MODE (scratch1) == SImode
28330                   && REG_P (scratch1)));
28331   gcc_assert (scratch2 == NULL
28332               || (GET_CODE (scratch2) == SCRATCH)
28333               || (GET_MODE (scratch2) == SImode
28334                   && REG_P (scratch2)));
28335   gcc_assert (!REG_P (out) || !REG_P (amount)
28336               || !HARD_REGISTER_P (out)
28337               || (REGNO (out) != REGNO (amount)
28338                   && REGNO (out) + 1 != REGNO (amount)));
28339
28340   /* Macros to make following code more readable.  */
28341   #define SUB_32(DEST,SRC) \
28342             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28343   #define RSB_32(DEST,SRC) \
28344             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28345   #define SUB_S_32(DEST,SRC) \
28346             gen_addsi3_compare0 ((DEST), (SRC), \
28347                                  GEN_INT (-32))
28348   #define SET(DEST,SRC) \
28349             gen_rtx_SET ((DEST), (SRC))
28350   #define SHIFT(CODE,SRC,AMOUNT) \
28351             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28352   #define LSHIFT(CODE,SRC,AMOUNT) \
28353             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28354                             SImode, (SRC), (AMOUNT))
28355   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28356             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28357                             SImode, (SRC), (AMOUNT))
28358   #define ORR(A,B) \
28359             gen_rtx_IOR (SImode, (A), (B))
28360   #define BRANCH(COND,LABEL) \
28361             gen_arm_cond_branch ((LABEL), \
28362                                  gen_rtx_ ## COND (CCmode, cc_reg, \
28363                                                    const0_rtx), \
28364                                  cc_reg)
28365
28366   /* Shifts by register and shifts by constant are handled separately.  */
28367   if (CONST_INT_P (amount))
28368     {
28369       /* We have a shift-by-constant.  */
28370
28371       /* First, handle out-of-range shift amounts.
28372          In both cases we try to match the result an ARM instruction in a
28373          shift-by-register would give.  This helps reduce execution
28374          differences between optimization levels, but it won't stop other
28375          parts of the compiler doing different things.  This is "undefined
28376          behaviour, in any case.  */
28377       if (INTVAL (amount) <= 0)
28378         emit_insn (gen_movdi (out, in));
28379       else if (INTVAL (amount) >= 64)
28380         {
28381           if (code == ASHIFTRT)
28382             {
28383               rtx const31_rtx = GEN_INT (31);
28384               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28385               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28386             }
28387           else
28388             emit_insn (gen_movdi (out, const0_rtx));
28389         }
28390
28391       /* Now handle valid shifts. */
28392       else if (INTVAL (amount) < 32)
28393         {
28394           /* Shifts by a constant less than 32.  */
28395           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28396
28397           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28398           emit_insn (SET (out_down,
28399                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
28400                                out_down)));
28401           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28402         }
28403       else
28404         {
28405           /* Shifts by a constant greater than 31.  */
28406           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28407
28408           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28409           if (code == ASHIFTRT)
28410             emit_insn (gen_ashrsi3 (out_up, in_up,
28411                                     GEN_INT (31)));
28412           else
28413             emit_insn (SET (out_up, const0_rtx));
28414         }
28415     }
28416   else
28417     {
28418       /* We have a shift-by-register.  */
28419       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28420
28421       /* This alternative requires the scratch registers.  */
28422       gcc_assert (scratch1 && REG_P (scratch1));
28423       gcc_assert (scratch2 && REG_P (scratch2));
28424
28425       /* We will need the values "amount-32" and "32-amount" later.
28426          Swapping them around now allows the later code to be more general. */
28427       switch (code)
28428         {
28429         case ASHIFT:
28430           emit_insn (SUB_32 (scratch1, amount));
28431           emit_insn (RSB_32 (scratch2, amount));
28432           break;
28433         case ASHIFTRT:
28434           emit_insn (RSB_32 (scratch1, amount));
28435           /* Also set CC = amount > 32.  */
28436           emit_insn (SUB_S_32 (scratch2, amount));
28437           break;
28438         case LSHIFTRT:
28439           emit_insn (RSB_32 (scratch1, amount));
28440           emit_insn (SUB_32 (scratch2, amount));
28441           break;
28442         default:
28443           gcc_unreachable ();
28444         }
28445
28446       /* Emit code like this:
28447
28448          arithmetic-left:
28449             out_down = in_down << amount;
28450             out_down = (in_up << (amount - 32)) | out_down;
28451             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28452             out_up = in_up << amount;
28453
28454          arithmetic-right:
28455             out_down = in_down >> amount;
28456             out_down = (in_up << (32 - amount)) | out_down;
28457             if (amount < 32)
28458               out_down = ((signed)in_up >> (amount - 32)) | out_down;
28459             out_up = in_up << amount;
28460
28461          logical-right:
28462             out_down = in_down >> amount;
28463             out_down = (in_up << (32 - amount)) | out_down;
28464             if (amount < 32)
28465               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28466             out_up = in_up << amount;
28467
28468           The ARM and Thumb2 variants are the same but implemented slightly
28469           differently.  If this were only called during expand we could just
28470           use the Thumb2 case and let combine do the right thing, but this
28471           can also be called from post-reload splitters.  */
28472
28473       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28474
28475       if (!TARGET_THUMB2)
28476         {
28477           /* Emit code for ARM mode.  */
28478           emit_insn (SET (out_down,
28479                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28480           if (code == ASHIFTRT)
28481             {
28482               rtx_code_label *done_label = gen_label_rtx ();
28483               emit_jump_insn (BRANCH (LT, done_label));
28484               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28485                                              out_down)));
28486               emit_label (done_label);
28487             }
28488           else
28489             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28490                                            out_down)));
28491         }
28492       else
28493         {
28494           /* Emit code for Thumb2 mode.
28495              Thumb2 can't do shift and or in one insn.  */
28496           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28497           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28498
28499           if (code == ASHIFTRT)
28500             {
28501               rtx_code_label *done_label = gen_label_rtx ();
28502               emit_jump_insn (BRANCH (LT, done_label));
28503               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28504               emit_insn (SET (out_down, ORR (out_down, scratch2)));
28505               emit_label (done_label);
28506             }
28507           else
28508             {
28509               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28510               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28511             }
28512         }
28513
28514       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28515     }
28516
28517   #undef SUB_32
28518   #undef RSB_32
28519   #undef SUB_S_32
28520   #undef SET
28521   #undef SHIFT
28522   #undef LSHIFT
28523   #undef REV_LSHIFT
28524   #undef ORR
28525   #undef BRANCH
28526 }
28527
28528
28529 /* Returns true if a valid comparison operation and makes
28530    the operands in a form that is valid.  */
28531 bool
28532 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28533 {
28534   enum rtx_code code = GET_CODE (*comparison);
28535   int code_int;
28536   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28537     ? GET_MODE (*op2) : GET_MODE (*op1);
28538
28539   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28540
28541   if (code == UNEQ || code == LTGT)
28542     return false;
28543
28544   code_int = (int)code;
28545   arm_canonicalize_comparison (&code_int, op1, op2, 0);
28546   PUT_CODE (*comparison, (enum rtx_code)code_int);
28547
28548   switch (mode)
28549     {
28550     case SImode:
28551       if (!arm_add_operand (*op1, mode))
28552         *op1 = force_reg (mode, *op1);
28553       if (!arm_add_operand (*op2, mode))
28554         *op2 = force_reg (mode, *op2);
28555       return true;
28556
28557     case DImode:
28558       if (!cmpdi_operand (*op1, mode))
28559         *op1 = force_reg (mode, *op1);
28560       if (!cmpdi_operand (*op2, mode))
28561         *op2 = force_reg (mode, *op2);
28562       return true;
28563
28564     case SFmode:
28565     case DFmode:
28566       if (!arm_float_compare_operand (*op1, mode))
28567         *op1 = force_reg (mode, *op1);
28568       if (!arm_float_compare_operand (*op2, mode))
28569         *op2 = force_reg (mode, *op2);
28570       return true;
28571     default:
28572       break;
28573     }
28574
28575   return false;
28576
28577 }
28578
28579 /* Maximum number of instructions to set block of memory.  */
28580 static int
28581 arm_block_set_max_insns (void)
28582 {
28583   if (optimize_function_for_size_p (cfun))
28584     return 4;
28585   else
28586     return current_tune->max_insns_inline_memset;
28587 }
28588
28589 /* Return TRUE if it's profitable to set block of memory for
28590    non-vectorized case.  VAL is the value to set the memory
28591    with.  LENGTH is the number of bytes to set.  ALIGN is the
28592    alignment of the destination memory in bytes.  UNALIGNED_P
28593    is TRUE if we can only set the memory with instructions
28594    meeting alignment requirements.  USE_STRD_P is TRUE if we
28595    can use strd to set the memory.  */
28596 static bool
28597 arm_block_set_non_vect_profit_p (rtx val,
28598                                  unsigned HOST_WIDE_INT length,
28599                                  unsigned HOST_WIDE_INT align,
28600                                  bool unaligned_p, bool use_strd_p)
28601 {
28602   int num = 0;
28603   /* For leftovers in bytes of 0-7, we can set the memory block using
28604      strb/strh/str with minimum instruction number.  */
28605   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28606
28607   if (unaligned_p)
28608     {
28609       num = arm_const_inline_cost (SET, val);
28610       num += length / align + length % align;
28611     }
28612   else if (use_strd_p)
28613     {
28614       num = arm_const_double_inline_cost (val);
28615       num += (length >> 3) + leftover[length & 7];
28616     }
28617   else
28618     {
28619       num = arm_const_inline_cost (SET, val);
28620       num += (length >> 2) + leftover[length & 3];
28621     }
28622
28623   /* We may be able to combine last pair STRH/STRB into a single STR
28624      by shifting one byte back.  */
28625   if (unaligned_access && length > 3 && (length & 3) == 3)
28626     num--;
28627
28628   return (num <= arm_block_set_max_insns ());
28629 }
28630
28631 /* Return TRUE if it's profitable to set block of memory for
28632    vectorized case.  LENGTH is the number of bytes to set.
28633    ALIGN is the alignment of destination memory in bytes.
28634    MODE is the vector mode used to set the memory.  */
28635 static bool
28636 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28637                              unsigned HOST_WIDE_INT align,
28638                              machine_mode mode)
28639 {
28640   int num;
28641   bool unaligned_p = ((align & 3) != 0);
28642   unsigned int nelt = GET_MODE_NUNITS (mode);
28643
28644   /* Instruction loading constant value.  */
28645   num = 1;
28646   /* Instructions storing the memory.  */
28647   num += (length + nelt - 1) / nelt;
28648   /* Instructions adjusting the address expression.  Only need to
28649      adjust address expression if it's 4 bytes aligned and bytes
28650      leftover can only be stored by mis-aligned store instruction.  */
28651   if (!unaligned_p && (length & 3) != 0)
28652     num++;
28653
28654   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
28655   if (!unaligned_p && mode == V16QImode)
28656     num--;
28657
28658   return (num <= arm_block_set_max_insns ());
28659 }
28660
28661 /* Set a block of memory using vectorization instructions for the
28662    unaligned case.  We fill the first LENGTH bytes of the memory
28663    area starting from DSTBASE with byte constant VALUE.  ALIGN is
28664    the alignment requirement of memory.  Return TRUE if succeeded.  */
28665 static bool
28666 arm_block_set_unaligned_vect (rtx dstbase,
28667                               unsigned HOST_WIDE_INT length,
28668                               unsigned HOST_WIDE_INT value,
28669                               unsigned HOST_WIDE_INT align)
28670 {
28671   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28672   rtx dst, mem;
28673   rtx val_elt, val_vec, reg;
28674   rtx rval[MAX_VECT_LEN];
28675   rtx (*gen_func) (rtx, rtx);
28676   machine_mode mode;
28677   unsigned HOST_WIDE_INT v = value;
28678
28679   gcc_assert ((align & 0x3) != 0);
28680   nelt_v8 = GET_MODE_NUNITS (V8QImode);
28681   nelt_v16 = GET_MODE_NUNITS (V16QImode);
28682   if (length >= nelt_v16)
28683     {
28684       mode = V16QImode;
28685       gen_func = gen_movmisalignv16qi;
28686     }
28687   else
28688     {
28689       mode = V8QImode;
28690       gen_func = gen_movmisalignv8qi;
28691     }
28692   nelt_mode = GET_MODE_NUNITS (mode);
28693   gcc_assert (length >= nelt_mode);
28694   /* Skip if it isn't profitable.  */
28695   if (!arm_block_set_vect_profit_p (length, align, mode))
28696     return false;
28697
28698   dst = copy_addr_to_reg (XEXP (dstbase, 0));
28699   mem = adjust_automodify_address (dstbase, mode, dst, 0);
28700
28701   v = sext_hwi (v, BITS_PER_WORD);
28702   val_elt = GEN_INT (v);
28703   for (j = 0; j < nelt_mode; j++)
28704     rval[j] = val_elt;
28705
28706   reg = gen_reg_rtx (mode);
28707   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28708   /* Emit instruction loading the constant value.  */
28709   emit_move_insn (reg, val_vec);
28710
28711   /* Handle nelt_mode bytes in a vector.  */
28712   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28713     {
28714       emit_insn ((*gen_func) (mem, reg));
28715       if (i + 2 * nelt_mode <= length)
28716         emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28717     }
28718
28719   /* If there are not less than nelt_v8 bytes leftover, we must be in
28720      V16QI mode.  */
28721   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28722
28723   /* Handle (8, 16) bytes leftover.  */
28724   if (i + nelt_v8 < length)
28725     {
28726       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28727       /* We are shifting bytes back, set the alignment accordingly.  */
28728       if ((length & 1) != 0 && align >= 2)
28729         set_mem_align (mem, BITS_PER_UNIT);
28730
28731       emit_insn (gen_movmisalignv16qi (mem, reg));
28732     }
28733   /* Handle (0, 8] bytes leftover.  */
28734   else if (i < length && i + nelt_v8 >= length)
28735     {
28736       if (mode == V16QImode)
28737         {
28738           reg = gen_lowpart (V8QImode, reg);
28739           mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28740         }
28741       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28742                                               + (nelt_mode - nelt_v8))));
28743       /* We are shifting bytes back, set the alignment accordingly.  */
28744       if ((length & 1) != 0 && align >= 2)
28745         set_mem_align (mem, BITS_PER_UNIT);
28746
28747       emit_insn (gen_movmisalignv8qi (mem, reg));
28748     }
28749
28750   return true;
28751 }
28752
28753 /* Set a block of memory using vectorization instructions for the
28754    aligned case.  We fill the first LENGTH bytes of the memory area
28755    starting from DSTBASE with byte constant VALUE.  ALIGN is the
28756    alignment requirement of memory.  Return TRUE if succeeded.  */
28757 static bool
28758 arm_block_set_aligned_vect (rtx dstbase,
28759                             unsigned HOST_WIDE_INT length,
28760                             unsigned HOST_WIDE_INT value,
28761                             unsigned HOST_WIDE_INT align)
28762 {
28763   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28764   rtx dst, addr, mem;
28765   rtx val_elt, val_vec, reg;
28766   rtx rval[MAX_VECT_LEN];
28767   machine_mode mode;
28768   unsigned HOST_WIDE_INT v = value;
28769
28770   gcc_assert ((align & 0x3) == 0);
28771   nelt_v8 = GET_MODE_NUNITS (V8QImode);
28772   nelt_v16 = GET_MODE_NUNITS (V16QImode);
28773   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28774     mode = V16QImode;
28775   else
28776     mode = V8QImode;
28777
28778   nelt_mode = GET_MODE_NUNITS (mode);
28779   gcc_assert (length >= nelt_mode);
28780   /* Skip if it isn't profitable.  */
28781   if (!arm_block_set_vect_profit_p (length, align, mode))
28782     return false;
28783
28784   dst = copy_addr_to_reg (XEXP (dstbase, 0));
28785
28786   v = sext_hwi (v, BITS_PER_WORD);
28787   val_elt = GEN_INT (v);
28788   for (j = 0; j < nelt_mode; j++)
28789     rval[j] = val_elt;
28790
28791   reg = gen_reg_rtx (mode);
28792   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28793   /* Emit instruction loading the constant value.  */
28794   emit_move_insn (reg, val_vec);
28795
28796   i = 0;
28797   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
28798   if (mode == V16QImode)
28799     {
28800       mem = adjust_automodify_address (dstbase, mode, dst, 0);
28801       emit_insn (gen_movmisalignv16qi (mem, reg));
28802       i += nelt_mode;
28803       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
28804       if (i + nelt_v8 < length && i + nelt_v16 > length)
28805         {
28806           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28807           mem = adjust_automodify_address (dstbase, mode, dst, 0);
28808           /* We are shifting bytes back, set the alignment accordingly.  */
28809           if ((length & 0x3) == 0)
28810             set_mem_align (mem, BITS_PER_UNIT * 4);
28811           else if ((length & 0x1) == 0)
28812             set_mem_align (mem, BITS_PER_UNIT * 2);
28813           else
28814             set_mem_align (mem, BITS_PER_UNIT);
28815
28816           emit_insn (gen_movmisalignv16qi (mem, reg));
28817           return true;
28818         }
28819       /* Fall through for bytes leftover.  */
28820       mode = V8QImode;
28821       nelt_mode = GET_MODE_NUNITS (mode);
28822       reg = gen_lowpart (V8QImode, reg);
28823     }
28824
28825   /* Handle 8 bytes in a vector.  */
28826   for (; (i + nelt_mode <= length); i += nelt_mode)
28827     {
28828       addr = plus_constant (Pmode, dst, i);
28829       mem = adjust_automodify_address (dstbase, mode, addr, i);
28830       emit_move_insn (mem, reg);
28831     }
28832
28833   /* Handle single word leftover by shifting 4 bytes back.  We can
28834      use aligned access for this case.  */
28835   if (i + UNITS_PER_WORD == length)
28836     {
28837       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28838       mem = adjust_automodify_address (dstbase, mode,
28839                                        addr, i - UNITS_PER_WORD);
28840       /* We are shifting 4 bytes back, set the alignment accordingly.  */
28841       if (align > UNITS_PER_WORD)
28842         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28843
28844       emit_move_insn (mem, reg);
28845     }
28846   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28847      We have to use unaligned access for this case.  */
28848   else if (i < length)
28849     {
28850       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28851       mem = adjust_automodify_address (dstbase, mode, dst, 0);
28852       /* We are shifting bytes back, set the alignment accordingly.  */
28853       if ((length & 1) == 0)
28854         set_mem_align (mem, BITS_PER_UNIT * 2);
28855       else
28856         set_mem_align (mem, BITS_PER_UNIT);
28857
28858       emit_insn (gen_movmisalignv8qi (mem, reg));
28859     }
28860
28861   return true;
28862 }
28863
28864 /* Set a block of memory using plain strh/strb instructions, only
28865    using instructions allowed by ALIGN on processor.  We fill the
28866    first LENGTH bytes of the memory area starting from DSTBASE
28867    with byte constant VALUE.  ALIGN is the alignment requirement
28868    of memory.  */
28869 static bool
28870 arm_block_set_unaligned_non_vect (rtx dstbase,
28871                                   unsigned HOST_WIDE_INT length,
28872                                   unsigned HOST_WIDE_INT value,
28873                                   unsigned HOST_WIDE_INT align)
28874 {
28875   unsigned int i;
28876   rtx dst, addr, mem;
28877   rtx val_exp, val_reg, reg;
28878   machine_mode mode;
28879   HOST_WIDE_INT v = value;
28880
28881   gcc_assert (align == 1 || align == 2);
28882
28883   if (align == 2)
28884     v |= (value << BITS_PER_UNIT);
28885
28886   v = sext_hwi (v, BITS_PER_WORD);
28887   val_exp = GEN_INT (v);
28888   /* Skip if it isn't profitable.  */
28889   if (!arm_block_set_non_vect_profit_p (val_exp, length,
28890                                         align, true, false))
28891     return false;
28892
28893   dst = copy_addr_to_reg (XEXP (dstbase, 0));
28894   mode = (align == 2 ? HImode : QImode);
28895   val_reg = force_reg (SImode, val_exp);
28896   reg = gen_lowpart (mode, val_reg);
28897
28898   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
28899     {
28900       addr = plus_constant (Pmode, dst, i);
28901       mem = adjust_automodify_address (dstbase, mode, addr, i);
28902       emit_move_insn (mem, reg);
28903     }
28904
28905   /* Handle single byte leftover.  */
28906   if (i + 1 == length)
28907     {
28908       reg = gen_lowpart (QImode, val_reg);
28909       addr = plus_constant (Pmode, dst, i);
28910       mem = adjust_automodify_address (dstbase, QImode, addr, i);
28911       emit_move_insn (mem, reg);
28912       i++;
28913     }
28914
28915   gcc_assert (i == length);
28916   return true;
28917 }
28918
28919 /* Set a block of memory using plain strd/str/strh/strb instructions,
28920    to permit unaligned copies on processors which support unaligned
28921    semantics for those instructions.  We fill the first LENGTH bytes
28922    of the memory area starting from DSTBASE with byte constant VALUE.
28923    ALIGN is the alignment requirement of memory.  */
28924 static bool
28925 arm_block_set_aligned_non_vect (rtx dstbase,
28926                                 unsigned HOST_WIDE_INT length,
28927                                 unsigned HOST_WIDE_INT value,
28928                                 unsigned HOST_WIDE_INT align)
28929 {
28930   unsigned int i;
28931   rtx dst, addr, mem;
28932   rtx val_exp, val_reg, reg;
28933   unsigned HOST_WIDE_INT v;
28934   bool use_strd_p;
28935
28936   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
28937                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
28938
28939   v = (value | (value << 8) | (value << 16) | (value << 24));
28940   if (length < UNITS_PER_WORD)
28941     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
28942
28943   if (use_strd_p)
28944     v |= (v << BITS_PER_WORD);
28945   else
28946     v = sext_hwi (v, BITS_PER_WORD);
28947
28948   val_exp = GEN_INT (v);
28949   /* Skip if it isn't profitable.  */
28950   if (!arm_block_set_non_vect_profit_p (val_exp, length,
28951                                         align, false, use_strd_p))
28952     {
28953       if (!use_strd_p)
28954         return false;
28955
28956       /* Try without strd.  */
28957       v = (v >> BITS_PER_WORD);
28958       v = sext_hwi (v, BITS_PER_WORD);
28959       val_exp = GEN_INT (v);
28960       use_strd_p = false;
28961       if (!arm_block_set_non_vect_profit_p (val_exp, length,
28962                                             align, false, use_strd_p))
28963         return false;
28964     }
28965
28966   i = 0;
28967   dst = copy_addr_to_reg (XEXP (dstbase, 0));
28968   /* Handle double words using strd if possible.  */
28969   if (use_strd_p)
28970     {
28971       val_reg = force_reg (DImode, val_exp);
28972       reg = val_reg;
28973       for (; (i + 8 <= length); i += 8)
28974         {
28975           addr = plus_constant (Pmode, dst, i);
28976           mem = adjust_automodify_address (dstbase, DImode, addr, i);
28977           emit_move_insn (mem, reg);
28978         }
28979     }
28980   else
28981     val_reg = force_reg (SImode, val_exp);
28982
28983   /* Handle words.  */
28984   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
28985   for (; (i + 4 <= length); i += 4)
28986     {
28987       addr = plus_constant (Pmode, dst, i);
28988       mem = adjust_automodify_address (dstbase, SImode, addr, i);
28989       if ((align & 3) == 0)
28990         emit_move_insn (mem, reg);
28991       else
28992         emit_insn (gen_unaligned_storesi (mem, reg));
28993     }
28994
28995   /* Merge last pair of STRH and STRB into a STR if possible.  */
28996   if (unaligned_access && i > 0 && (i + 3) == length)
28997     {
28998       addr = plus_constant (Pmode, dst, i - 1);
28999       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29000       /* We are shifting one byte back, set the alignment accordingly.  */
29001       if ((align & 1) == 0)
29002         set_mem_align (mem, BITS_PER_UNIT);
29003
29004       /* Most likely this is an unaligned access, and we can't tell at
29005          compilation time.  */
29006       emit_insn (gen_unaligned_storesi (mem, reg));
29007       return true;
29008     }
29009
29010   /* Handle half word leftover.  */
29011   if (i + 2 <= length)
29012     {
29013       reg = gen_lowpart (HImode, val_reg);
29014       addr = plus_constant (Pmode, dst, i);
29015       mem = adjust_automodify_address (dstbase, HImode, addr, i);
29016       if ((align & 1) == 0)
29017         emit_move_insn (mem, reg);
29018       else
29019         emit_insn (gen_unaligned_storehi (mem, reg));
29020
29021       i += 2;
29022     }
29023
29024   /* Handle single byte leftover.  */
29025   if (i + 1 == length)
29026     {
29027       reg = gen_lowpart (QImode, val_reg);
29028       addr = plus_constant (Pmode, dst, i);
29029       mem = adjust_automodify_address (dstbase, QImode, addr, i);
29030       emit_move_insn (mem, reg);
29031     }
29032
29033   return true;
29034 }
29035
29036 /* Set a block of memory using vectorization instructions for both
29037    aligned and unaligned cases.  We fill the first LENGTH bytes of
29038    the memory area starting from DSTBASE with byte constant VALUE.
29039    ALIGN is the alignment requirement of memory.  */
29040 static bool
29041 arm_block_set_vect (rtx dstbase,
29042                     unsigned HOST_WIDE_INT length,
29043                     unsigned HOST_WIDE_INT value,
29044                     unsigned HOST_WIDE_INT align)
29045 {
29046   /* Check whether we need to use unaligned store instruction.  */
29047   if (((align & 3) != 0 || (length & 3) != 0)
29048       /* Check whether unaligned store instruction is available.  */
29049       && (!unaligned_access || BYTES_BIG_ENDIAN))
29050     return false;
29051
29052   if ((align & 3) == 0)
29053     return arm_block_set_aligned_vect (dstbase, length, value, align);
29054   else
29055     return arm_block_set_unaligned_vect (dstbase, length, value, align);
29056 }
29057
29058 /* Expand string store operation.  Firstly we try to do that by using
29059    vectorization instructions, then try with ARM unaligned access and
29060    double-word store if profitable.  OPERANDS[0] is the destination,
29061    OPERANDS[1] is the number of bytes, operands[2] is the value to
29062    initialize the memory, OPERANDS[3] is the known alignment of the
29063    destination.  */
29064 bool
29065 arm_gen_setmem (rtx *operands)
29066 {
29067   rtx dstbase = operands[0];
29068   unsigned HOST_WIDE_INT length;
29069   unsigned HOST_WIDE_INT value;
29070   unsigned HOST_WIDE_INT align;
29071
29072   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29073     return false;
29074
29075   length = UINTVAL (operands[1]);
29076   if (length > 64)
29077     return false;
29078
29079   value = (UINTVAL (operands[2]) & 0xFF);
29080   align = UINTVAL (operands[3]);
29081   if (TARGET_NEON && length >= 8
29082       && current_tune->string_ops_prefer_neon
29083       && arm_block_set_vect (dstbase, length, value, align))
29084     return true;
29085
29086   if (!unaligned_access && (align & 3) != 0)
29087     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29088
29089   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29090 }
29091
29092
29093 static bool
29094 arm_macro_fusion_p (void)
29095 {
29096   return current_tune->fuseable_ops != ARM_FUSE_NOTHING;
29097 }
29098
29099
29100 static bool
29101 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29102 {
29103   rtx set_dest;
29104   rtx prev_set = single_set (prev);
29105   rtx curr_set = single_set (curr);
29106
29107   if (!prev_set
29108       || !curr_set)
29109     return false;
29110
29111   if (any_condjump_p (curr))
29112     return false;
29113
29114   if (!arm_macro_fusion_p ())
29115     return false;
29116
29117   if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT)
29118     {
29119       /* We are trying to fuse
29120          movw imm / movt imm
29121          instructions as a group that gets scheduled together.  */
29122
29123       set_dest = SET_DEST (curr_set);
29124
29125       if (GET_MODE (set_dest) != SImode)
29126         return false;
29127
29128       /* We are trying to match:
29129          prev (movw)  == (set (reg r0) (const_int imm16))
29130          curr (movt) == (set (zero_extract (reg r0)
29131                                            (const_int 16)
29132                                            (const_int 16))
29133                              (const_int imm16_1))
29134          or
29135          prev (movw) == (set (reg r1)
29136                               (high (symbol_ref ("SYM"))))
29137          curr (movt) == (set (reg r0)
29138                              (lo_sum (reg r1)
29139                                      (symbol_ref ("SYM"))))  */
29140       if (GET_CODE (set_dest) == ZERO_EXTRACT)
29141         {
29142           if (CONST_INT_P (SET_SRC (curr_set))
29143               && CONST_INT_P (SET_SRC (prev_set))
29144               && REG_P (XEXP (set_dest, 0))
29145               && REG_P (SET_DEST (prev_set))
29146               && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29147             return true;
29148         }
29149       else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29150                && REG_P (SET_DEST (curr_set))
29151                && REG_P (SET_DEST (prev_set))
29152                && GET_CODE (SET_SRC (prev_set)) == HIGH
29153                && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29154              return true;
29155     }
29156   return false;
29157 }
29158
29159 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
29160
29161 static unsigned HOST_WIDE_INT
29162 arm_asan_shadow_offset (void)
29163 {
29164   return (unsigned HOST_WIDE_INT) 1 << 29;
29165 }
29166
29167
29168 /* This is a temporary fix for PR60655.  Ideally we need
29169    to handle most of these cases in the generic part but
29170    currently we reject minus (..) (sym_ref).  We try to
29171    ameliorate the case with minus (sym_ref1) (sym_ref2)
29172    where they are in the same section.  */
29173
29174 static bool
29175 arm_const_not_ok_for_debug_p (rtx p)
29176 {
29177   tree decl_op0 = NULL;
29178   tree decl_op1 = NULL;
29179
29180   if (GET_CODE (p) == MINUS)
29181     {
29182       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29183         {
29184           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29185           if (decl_op1
29186               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29187               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29188             {
29189               if ((TREE_CODE (decl_op1) == VAR_DECL
29190                    || TREE_CODE (decl_op1) == CONST_DECL)
29191                   && (TREE_CODE (decl_op0) == VAR_DECL
29192                       || TREE_CODE (decl_op0) == CONST_DECL))
29193                 return (get_variable_section (decl_op1, false)
29194                         != get_variable_section (decl_op0, false));
29195
29196               if (TREE_CODE (decl_op1) == LABEL_DECL
29197                   && TREE_CODE (decl_op0) == LABEL_DECL)
29198                 return (DECL_CONTEXT (decl_op1)
29199                         != DECL_CONTEXT (decl_op0));
29200             }
29201
29202           return true;
29203         }
29204     }
29205
29206   return false;
29207 }
29208
29209 /* return TRUE if x is a reference to a value in a constant pool */
29210 extern bool
29211 arm_is_constant_pool_ref (rtx x)
29212 {
29213   return (MEM_P (x)
29214           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29215           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29216 }
29217
29218 /* If MEM is in the form of [base+offset], extract the two parts
29219    of address and set to BASE and OFFSET, otherwise return false
29220    after clearing BASE and OFFSET.  */
29221
29222 static bool
29223 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29224 {
29225   rtx addr;
29226
29227   gcc_assert (MEM_P (mem));
29228
29229   addr = XEXP (mem, 0);
29230
29231   /* Strip off const from addresses like (const (addr)).  */
29232   if (GET_CODE (addr) == CONST)
29233     addr = XEXP (addr, 0);
29234
29235   if (GET_CODE (addr) == REG)
29236     {
29237       *base = addr;
29238       *offset = const0_rtx;
29239       return true;
29240     }
29241
29242   if (GET_CODE (addr) == PLUS
29243       && GET_CODE (XEXP (addr, 0)) == REG
29244       && CONST_INT_P (XEXP (addr, 1)))
29245     {
29246       *base = XEXP (addr, 0);
29247       *offset = XEXP (addr, 1);
29248       return true;
29249     }
29250
29251   *base = NULL_RTX;
29252   *offset = NULL_RTX;
29253
29254   return false;
29255 }
29256
29257 /* If INSN is a load or store of address in the form of [base+offset],
29258    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
29259    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
29260    otherwise return FALSE.  */
29261
29262 static bool
29263 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29264 {
29265   rtx x, dest, src;
29266
29267   gcc_assert (INSN_P (insn));
29268   x = PATTERN (insn);
29269   if (GET_CODE (x) != SET)
29270     return false;
29271
29272   src = SET_SRC (x);
29273   dest = SET_DEST (x);
29274   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29275     {
29276       *is_load = false;
29277       extract_base_offset_in_addr (dest, base, offset);
29278     }
29279   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29280     {
29281       *is_load = true;
29282       extract_base_offset_in_addr (src, base, offset);
29283     }
29284   else
29285     return false;
29286
29287   return (*base != NULL_RTX && *offset != NULL_RTX);
29288 }
29289
29290 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29291
29292    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29293    and PRI are only calculated for these instructions.  For other instruction,
29294    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
29295    instruction fusion can be supported by returning different priorities.
29296
29297    It's important that irrelevant instructions get the largest FUSION_PRI.  */
29298
29299 static void
29300 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29301                            int *fusion_pri, int *pri)
29302 {
29303   int tmp, off_val;
29304   bool is_load;
29305   rtx base, offset;
29306
29307   gcc_assert (INSN_P (insn));
29308
29309   tmp = max_pri - 1;
29310   if (!fusion_load_store (insn, &base, &offset, &is_load))
29311     {
29312       *pri = tmp;
29313       *fusion_pri = tmp;
29314       return;
29315     }
29316
29317   /* Load goes first.  */
29318   if (is_load)
29319     *fusion_pri = tmp - 1;
29320   else
29321     *fusion_pri = tmp - 2;
29322
29323   tmp /= 2;
29324
29325   /* INSN with smaller base register goes first.  */
29326   tmp -= ((REGNO (base) & 0xff) << 20);
29327
29328   /* INSN with smaller offset goes first.  */
29329   off_val = (int)(INTVAL (offset));
29330   if (off_val >= 0)
29331     tmp -= (off_val & 0xfffff);
29332   else
29333     tmp += ((- off_val) & 0xfffff);
29334
29335   *pri = tmp;
29336   return;
29337 }
29338 #include "gt-arm.h"