gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2014 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "hash-table.h"
  27 #include "tm.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "stringpool.h"
  31 #include "stor-layout.h"
  32 #include "calls.h"
  33 #include "varasm.h"
  34 #include "obstack.h"
  35 #include "regs.h"
  36 #include "hard-reg-set.h"
  37 #include "insn-config.h"
  38 #include "conditions.h"
  39 #include "output.h"
  40 #include "insn-attr.h"
  41 #include "flags.h"
  42 #include "reload.h"
  43 #include "hashtab.h"
  44 #include "hash-set.h"
  45 #include "vec.h"
  46 #include "machmode.h"
  47 #include "input.h"
  48 #include "function.h"
  49 #include "expr.h"
  50 #include "optabs.h"
  51 #include "diagnostic-core.h"
  52 #include "recog.h"
  53 #include "predict.h"
  54 #include "dominance.h"
  55 #include "cfg.h"
  56 #include "cfgrtl.h"
  57 #include "cfganal.h"
  58 #include "lcm.h"
  59 #include "cfgbuild.h"
  60 #include "cfgcleanup.h"
  61 #include "basic-block.h"
  62 #include "hash-map.h"
  63 #include "is-a.h"
  64 #include "plugin-api.h"
  65 #include "ipa-ref.h"
  66 #include "cgraph.h"
  67 #include "ggc.h"
  68 #include "except.h"
  69 #include "tm_p.h"
  70 #include "target.h"
  71 #include "sched-int.h"
  72 #include "target-def.h"
  73 #include "debug.h"
  74 #include "langhooks.h"
  75 #include "df.h"
  76 #include "intl.h"
  77 #include "libfuncs.h"
  78 #include "params.h"
  79 #include "opts.h"
  80 #include "dumpfile.h"
  81 #include "gimple-expr.h"
  82 #include "builtins.h"
  83 #include "tm-constrs.h"
  84
  85 /* Forward definitions of types.  */
  86 typedef struct minipool_node    Mnode;
  87 typedef struct minipool_fixup   Mfix;
  88
  89 void (*arm_lang_output_object_attributes_hook)(void);
  90
  91 struct four_ints
  92 {
  93   int i[4];
  94 };
  95
  96 /* Forward function declarations.  */
  97 static bool arm_const_not_ok_for_debug_p (rtx);
  98 static bool arm_lra_p (void);
  99 static bool arm_needs_doubleword_align (machine_mode, const_tree);
 100 static int arm_compute_static_chain_stack_bytes (void);
 101 static arm_stack_offsets *arm_get_frame_offsets (void);
 102 static void arm_add_gc_roots (void);
 103 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 104                              HOST_WIDE_INT, rtx, rtx, int, int);
 105 static unsigned bit_count (unsigned long);
 106 static int arm_address_register_rtx_p (rtx, int);
 107 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 108 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 109 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 110 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 111 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 112 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 113 inline static int thumb1_index_register_rtx_p (rtx, int);
 114 static int thumb_far_jump_used_p (void);
 115 static bool thumb_force_lr_save (void);
 116 static unsigned arm_size_return_regs (void);
 117 static bool arm_assemble_integer (rtx, unsigned int, int);
 118 static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
 119 static void arm_print_operand (FILE *, rtx, int);
 120 static void arm_print_operand_address (FILE *, rtx);
 121 static bool arm_print_operand_punct_valid_p (unsigned char code);
 122 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 123 static arm_cc get_arm_condition_code (rtx);
 124 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
 125 static const char *output_multi_immediate (rtx *, const char *, const char *,
 126                                            int, HOST_WIDE_INT);
 127 static const char *shift_op (rtx, HOST_WIDE_INT *);
 128 static struct machine_function *arm_init_machine_status (void);
 129 static void thumb_exit (FILE *, int);
 130 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 131 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 132 static Mnode *add_minipool_forward_ref (Mfix *);
 133 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 134 static Mnode *add_minipool_backward_ref (Mfix *);
 135 static void assign_minipool_offsets (Mfix *);
 136 static void arm_print_value (FILE *, rtx);
 137 static void dump_minipool (rtx_insn *);
 138 static int arm_barrier_cost (rtx);
 139 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 140 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 141 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 142                                machine_mode, rtx);
 143 static void arm_reorg (void);
 144 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 145 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 146 static unsigned long arm_compute_save_reg_mask (void);
 147 static unsigned long arm_isr_value (tree);
 148 static unsigned long arm_compute_func_type (void);
 149 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 150 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 151 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 152 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 153 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 154 #endif
 155 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
 156 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
 157 static int arm_comp_type_attributes (const_tree, const_tree);
 158 static void arm_set_default_type_attributes (tree);
 159 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
 160 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 161 static int optimal_immediate_sequence (enum rtx_code code,
 162                                        unsigned HOST_WIDE_INT val,
 163                                        struct four_ints *return_sequence);
 164 static int optimal_immediate_sequence_1 (enum rtx_code code,
 165                                          unsigned HOST_WIDE_INT val,
 166                                          struct four_ints *return_sequence,
 167                                          int i);
 168 static int arm_get_strip_length (int);
 169 static bool arm_function_ok_for_sibcall (tree, tree);
 170 static machine_mode arm_promote_function_mode (const_tree,
 171                                                     machine_mode, int *,
 172                                                     const_tree, int);
 173 static bool arm_return_in_memory (const_tree, const_tree);
 174 static rtx arm_function_value (const_tree, const_tree, bool);
 175 static rtx arm_libcall_value_1 (machine_mode);
 176 static rtx arm_libcall_value (machine_mode, const_rtx);
 177 static bool arm_function_value_regno_p (const unsigned int);
 178 static void arm_internal_label (FILE *, const char *, unsigned long);
 179 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 180                                  tree);
 181 static bool arm_have_conditional_execution (void);
 182 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 183 static bool arm_legitimate_constant_p (machine_mode, rtx);
 184 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
 185 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
 186 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 187 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 188 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 189 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 190 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
 191 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 192 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 193 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 194 static void arm_init_builtins (void);
 195 static void arm_init_iwmmxt_builtins (void);
 196 static rtx safe_vector_operand (rtx, machine_mode);
 197 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
 198 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
 199 static rtx arm_expand_builtin (tree, rtx, rtx, machine_mode, int);
 200 static tree arm_builtin_decl (unsigned, bool);
 201 static void emit_constant_insn (rtx cond, rtx pattern);
 202 static rtx_insn *emit_set_insn (rtx, rtx);
 203 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 204 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 205                                   tree, bool);
 206 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 207                              const_tree, bool);
 208 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 209                                       const_tree, bool);
 210 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 211 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 212                                       const_tree);
 213 static rtx aapcs_libcall_value (machine_mode);
 214 static int aapcs_select_return_coproc (const_tree, const_tree);
 215
 216 #ifdef OBJECT_FORMAT_ELF
 217 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 218 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 219 #endif
 220 #ifndef ARM_PE
 221 static void arm_encode_section_info (tree, rtx, int);
 222 #endif
 223
 224 static void arm_file_end (void);
 225 static void arm_file_start (void);
 226
 227 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 228                                         tree, int *, int);
 229 static bool arm_pass_by_reference (cumulative_args_t,
 230                                    machine_mode, const_tree, bool);
 231 static bool arm_promote_prototypes (const_tree);
 232 static bool arm_default_short_enums (void);
 233 static bool arm_align_anon_bitfield (void);
 234 static bool arm_return_in_msb (const_tree);
 235 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 236 static bool arm_return_in_memory (const_tree, const_tree);
 237 #if ARM_UNWIND_INFO
 238 static void arm_unwind_emit (FILE *, rtx_insn *);
 239 static bool arm_output_ttype (rtx);
 240 static void arm_asm_emit_except_personality (rtx);
 241 static void arm_asm_init_sections (void);
 242 #endif
 243 static rtx arm_dwarf_register_span (rtx);
 244
 245 static tree arm_cxx_guard_type (void);
 246 static bool arm_cxx_guard_mask_bit (void);
 247 static tree arm_get_cookie_size (tree);
 248 static bool arm_cookie_has_size (void);
 249 static bool arm_cxx_cdtor_returns_this (void);
 250 static bool arm_cxx_key_method_may_be_inline (void);
 251 static void arm_cxx_determine_class_data_visibility (tree);
 252 static bool arm_cxx_class_data_always_comdat (void);
 253 static bool arm_cxx_use_aeabi_atexit (void);
 254 static void arm_init_libfuncs (void);
 255 static tree arm_build_builtin_va_list (void);
 256 static void arm_expand_builtin_va_start (tree, rtx);
 257 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 258 static void arm_option_override (void);
 259 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 260 static bool arm_cannot_copy_insn_p (rtx_insn *);
 261 static int arm_issue_rate (void);
 262 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 263 static bool arm_output_addr_const_extra (FILE *, rtx);
 264 static bool arm_allocate_stack_slots_for_args (void);
 265 static bool arm_warn_func_return (tree);
 266 static const char *arm_invalid_parameter_type (const_tree t);
 267 static const char *arm_invalid_return_type (const_tree t);
 268 static tree arm_promoted_type (const_tree t);
 269 static tree arm_convert_to_type (tree type, tree expr);
 270 static bool arm_scalar_mode_supported_p (machine_mode);
 271 static bool arm_frame_pointer_required (void);
 272 static bool arm_can_eliminate (const int, const int);
 273 static void arm_asm_trampoline_template (FILE *);
 274 static void arm_trampoline_init (rtx, tree, rtx);
 275 static rtx arm_trampoline_adjust_address (rtx);
 276 static rtx arm_pic_static_addr (rtx orig, rtx reg);
 277 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 278 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 279 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 280 static bool arm_array_mode_supported_p (machine_mode,
 281                                         unsigned HOST_WIDE_INT);
 282 static machine_mode arm_preferred_simd_mode (machine_mode);
 283 static bool arm_class_likely_spilled_p (reg_class_t);
 284 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 285 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 286 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 287                                                      const_tree type,
 288                                                      int misalignment,
 289                                                      bool is_packed);
 290 static void arm_conditional_register_usage (void);
 291 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 292 static unsigned int arm_autovectorize_vector_sizes (void);
 293 static int arm_default_branch_cost (bool, bool);
 294 static int arm_cortex_a5_branch_cost (bool, bool);
 295 static int arm_cortex_m_branch_cost (bool, bool);
 296
 297 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
 298                                              const unsigned char *sel);
 299
 300 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 301                                            tree vectype,
 302                                            int misalign ATTRIBUTE_UNUSED);
 303 static unsigned arm_add_stmt_cost (void *data, int count,
 304                                    enum vect_cost_for_stmt kind,
 305                                    struct _stmt_vec_info *stmt_info,
 306                                    int misalign,
 307                                    enum vect_cost_model_location where);
 308
 309 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 310                                          bool op0_preserve_value);
 311 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 312 \f
 313 /* Table of machine attributes.  */
 314 static const struct attribute_spec arm_attribute_table[] =
 315 {
 316   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 317        affects_type_identity } */
 318   /* Function calls made to this symbol must be done indirectly, because
 319      it may lie outside of the 26 bit addressing range of a normal function
 320      call.  */
 321   { "long_call",    0, 0, false, true,  true,  NULL, false },
 322   /* Whereas these functions are always known to reside within the 26 bit
 323      addressing range.  */
 324   { "short_call",   0, 0, false, true,  true,  NULL, false },
 325   /* Specify the procedure call conventions for a function.  */
 326   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 327     false },
 328   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 329   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 330     false },
 331   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 332     false },
 333   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 334     false },
 335 #ifdef ARM_PE
 336   /* ARM/PE has three new attributes:
 337      interfacearm - ?
 338      dllexport - for exporting a function/variable that will live in a dll
 339      dllimport - for importing a function/variable from a dll
 340
 341      Microsoft allows multiple declspecs in one __declspec, separating
 342      them with spaces.  We do NOT support this.  Instead, use __declspec
 343      multiple times.
 344   */
 345   { "dllimport",    0, 0, true,  false, false, NULL, false },
 346   { "dllexport",    0, 0, true,  false, false, NULL, false },
 347   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 348     false },
 349 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 350   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 351   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 352   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 353     false },
 354 #endif
 355   { NULL,           0, 0, false, false, false, NULL, false }
 356 };
 357 \f
 358 /* Initialize the GCC target structure.  */
 359 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 360 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 361 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 362 #endif
 363
 364 #undef TARGET_LEGITIMIZE_ADDRESS
 365 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 366
 367 #undef TARGET_LRA_P
 368 #define TARGET_LRA_P arm_lra_p
 369
 370 #undef  TARGET_ATTRIBUTE_TABLE
 371 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 372
 373 #undef TARGET_ASM_FILE_START
 374 #define TARGET_ASM_FILE_START arm_file_start
 375 #undef TARGET_ASM_FILE_END
 376 #define TARGET_ASM_FILE_END arm_file_end
 377
 378 #undef  TARGET_ASM_ALIGNED_SI_OP
 379 #define TARGET_ASM_ALIGNED_SI_OP NULL
 380 #undef  TARGET_ASM_INTEGER
 381 #define TARGET_ASM_INTEGER arm_assemble_integer
 382
 383 #undef TARGET_PRINT_OPERAND
 384 #define TARGET_PRINT_OPERAND arm_print_operand
 385 #undef TARGET_PRINT_OPERAND_ADDRESS
 386 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 387 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 388 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 389
 390 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 391 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 392
 393 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 394 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 395
 396 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 397 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 398
 399 #undef  TARGET_OPTION_OVERRIDE
 400 #define TARGET_OPTION_OVERRIDE arm_option_override
 401
 402 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 403 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 404
 405 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 406 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 407
 408 #undef  TARGET_SCHED_ADJUST_COST
 409 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 410
 411 #undef TARGET_SCHED_REORDER
 412 #define TARGET_SCHED_REORDER arm_sched_reorder
 413
 414 #undef TARGET_REGISTER_MOVE_COST
 415 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 416
 417 #undef TARGET_MEMORY_MOVE_COST
 418 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 419
 420 #undef TARGET_ENCODE_SECTION_INFO
 421 #ifdef ARM_PE
 422 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 423 #else
 424 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 425 #endif
 426
 427 #undef  TARGET_STRIP_NAME_ENCODING
 428 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 429
 430 #undef  TARGET_ASM_INTERNAL_LABEL
 431 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 432
 433 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 434 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 435
 436 #undef  TARGET_FUNCTION_VALUE
 437 #define TARGET_FUNCTION_VALUE arm_function_value
 438
 439 #undef  TARGET_LIBCALL_VALUE
 440 #define TARGET_LIBCALL_VALUE arm_libcall_value
 441
 442 #undef TARGET_FUNCTION_VALUE_REGNO_P
 443 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 444
 445 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 446 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 447 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 448 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
 449
 450 #undef  TARGET_RTX_COSTS
 451 #define TARGET_RTX_COSTS arm_rtx_costs
 452 #undef  TARGET_ADDRESS_COST
 453 #define TARGET_ADDRESS_COST arm_address_cost
 454
 455 #undef TARGET_SHIFT_TRUNCATION_MASK
 456 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 457 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 458 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 459 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 460 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 461 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 462 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 463 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 464 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 465   arm_autovectorize_vector_sizes
 466
 467 #undef  TARGET_MACHINE_DEPENDENT_REORG
 468 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 469
 470 #undef  TARGET_INIT_BUILTINS
 471 #define TARGET_INIT_BUILTINS  arm_init_builtins
 472 #undef  TARGET_EXPAND_BUILTIN
 473 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 474 #undef  TARGET_BUILTIN_DECL
 475 #define TARGET_BUILTIN_DECL arm_builtin_decl
 476
 477 #undef TARGET_INIT_LIBFUNCS
 478 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 479
 480 #undef TARGET_PROMOTE_FUNCTION_MODE
 481 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 482 #undef TARGET_PROMOTE_PROTOTYPES
 483 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 484 #undef TARGET_PASS_BY_REFERENCE
 485 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 486 #undef TARGET_ARG_PARTIAL_BYTES
 487 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 488 #undef TARGET_FUNCTION_ARG
 489 #define TARGET_FUNCTION_ARG arm_function_arg
 490 #undef TARGET_FUNCTION_ARG_ADVANCE
 491 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 492 #undef TARGET_FUNCTION_ARG_BOUNDARY
 493 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 494
 495 #undef  TARGET_SETUP_INCOMING_VARARGS
 496 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 497
 498 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 499 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 500
 501 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 502 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 503 #undef TARGET_TRAMPOLINE_INIT
 504 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 505 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 506 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 507
 508 #undef TARGET_WARN_FUNC_RETURN
 509 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 510
 511 #undef TARGET_DEFAULT_SHORT_ENUMS
 512 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 513
 514 #undef TARGET_ALIGN_ANON_BITFIELD
 515 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 516
 517 #undef TARGET_NARROW_VOLATILE_BITFIELD
 518 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 519
 520 #undef TARGET_CXX_GUARD_TYPE
 521 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 522
 523 #undef TARGET_CXX_GUARD_MASK_BIT
 524 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 525
 526 #undef TARGET_CXX_GET_COOKIE_SIZE
 527 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 528
 529 #undef TARGET_CXX_COOKIE_HAS_SIZE
 530 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 531
 532 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 533 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 534
 535 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 536 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 537
 538 #undef TARGET_CXX_USE_AEABI_ATEXIT
 539 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 540
 541 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 542 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 543   arm_cxx_determine_class_data_visibility
 544
 545 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 546 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 547
 548 #undef TARGET_RETURN_IN_MSB
 549 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 550
 551 #undef TARGET_RETURN_IN_MEMORY
 552 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 553
 554 #undef TARGET_MUST_PASS_IN_STACK
 555 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 556
 557 #if ARM_UNWIND_INFO
 558 #undef TARGET_ASM_UNWIND_EMIT
 559 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 560
 561 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 562 #undef TARGET_ASM_TTYPE
 563 #define TARGET_ASM_TTYPE arm_output_ttype
 564
 565 #undef TARGET_ARM_EABI_UNWINDER
 566 #define TARGET_ARM_EABI_UNWINDER true
 567
 568 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 569 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 570
 571 #undef TARGET_ASM_INIT_SECTIONS
 572 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 573 #endif /* ARM_UNWIND_INFO */
 574
 575 #undef TARGET_DWARF_REGISTER_SPAN
 576 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 577
 578 #undef  TARGET_CANNOT_COPY_INSN_P
 579 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 580
 581 #ifdef HAVE_AS_TLS
 582 #undef TARGET_HAVE_TLS
 583 #define TARGET_HAVE_TLS true
 584 #endif
 585
 586 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 587 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 588
 589 #undef TARGET_LEGITIMATE_CONSTANT_P
 590 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 591
 592 #undef TARGET_CANNOT_FORCE_CONST_MEM
 593 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 594
 595 #undef TARGET_MAX_ANCHOR_OFFSET
 596 #define TARGET_MAX_ANCHOR_OFFSET 4095
 597
 598 /* The minimum is set such that the total size of the block
 599    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 600    divisible by eight, ensuring natural spacing of anchors.  */
 601 #undef TARGET_MIN_ANCHOR_OFFSET
 602 #define TARGET_MIN_ANCHOR_OFFSET -4088
 603
 604 #undef TARGET_SCHED_ISSUE_RATE
 605 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 606
 607 #undef TARGET_MANGLE_TYPE
 608 #define TARGET_MANGLE_TYPE arm_mangle_type
 609
 610 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 611 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 612
 613 #undef TARGET_BUILD_BUILTIN_VA_LIST
 614 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 615 #undef TARGET_EXPAND_BUILTIN_VA_START
 616 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 617 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 618 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 619
 620 #ifdef HAVE_AS_TLS
 621 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 622 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 623 #endif
 624
 625 #undef TARGET_LEGITIMATE_ADDRESS_P
 626 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 627
 628 #undef TARGET_PREFERRED_RELOAD_CLASS
 629 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 630
 631 #undef TARGET_INVALID_PARAMETER_TYPE
 632 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
 633
 634 #undef TARGET_INVALID_RETURN_TYPE
 635 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
 636
 637 #undef TARGET_PROMOTED_TYPE
 638 #define TARGET_PROMOTED_TYPE arm_promoted_type
 639
 640 #undef TARGET_CONVERT_TO_TYPE
 641 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
 642
 643 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 644 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 645
 646 #undef TARGET_FRAME_POINTER_REQUIRED
 647 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 648
 649 #undef TARGET_CAN_ELIMINATE
 650 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 651
 652 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 653 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 654
 655 #undef TARGET_CLASS_LIKELY_SPILLED_P
 656 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 657
 658 #undef TARGET_VECTORIZE_BUILTINS
 659 #define TARGET_VECTORIZE_BUILTINS
 660
 661 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 662 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 663   arm_builtin_vectorized_function
 664
 665 #undef TARGET_VECTOR_ALIGNMENT
 666 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 667
 668 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 669 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 670   arm_vector_alignment_reachable
 671
 672 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 673 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 674   arm_builtin_support_vector_misalignment
 675
 676 #undef TARGET_PREFERRED_RENAME_CLASS
 677 #define TARGET_PREFERRED_RENAME_CLASS \
 678   arm_preferred_rename_class
 679
 680 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 681 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 682   arm_vectorize_vec_perm_const_ok
 683
 684 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 685 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 686   arm_builtin_vectorization_cost
 687 #undef TARGET_VECTORIZE_ADD_STMT_COST
 688 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 689
 690 #undef TARGET_CANONICALIZE_COMPARISON
 691 #define TARGET_CANONICALIZE_COMPARISON \
 692   arm_canonicalize_comparison
 693
 694 #undef TARGET_ASAN_SHADOW_OFFSET
 695 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 696
 697 #undef MAX_INSN_PER_IT_BLOCK
 698 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 699
 700 #undef TARGET_CAN_USE_DOLOOP_P
 701 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 702
 703 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 704 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 705
 706 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 707 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 708
 709 struct gcc_target targetm = TARGET_INITIALIZER;
 710 \f
 711 /* Obstack for minipool constant handling.  */
 712 static struct obstack minipool_obstack;
 713 static char *         minipool_startobj;
 714
 715 /* The maximum number of insns skipped which
 716    will be conditionalised if possible.  */
 717 static int max_insns_skipped = 5;
 718
 719 extern FILE * asm_out_file;
 720
 721 /* True if we are currently building a constant table.  */
 722 int making_const_table;
 723
 724 /* The processor for which instructions should be scheduled.  */
 725 enum processor_type arm_tune = arm_none;
 726
 727 /* The current tuning set.  */
 728 const struct tune_params *current_tune;
 729
 730 /* Which floating point hardware to schedule for.  */
 731 int arm_fpu_attr;
 732
 733 /* Which floating popint hardware to use.  */
 734 const struct arm_fpu_desc *arm_fpu_desc;
 735
 736 /* Used for Thumb call_via trampolines.  */
 737 rtx thumb_call_via_label[14];
 738 static int thumb_call_reg_needed;
 739
 740 /* Bit values used to identify processor capabilities.  */
 741 #define FL_CO_PROC    (1 << 0)        /* Has external co-processor bus */
 742 #define FL_ARCH3M     (1 << 1)        /* Extended multiply */
 743 #define FL_MODE26     (1 << 2)        /* 26-bit mode support */
 744 #define FL_MODE32     (1 << 3)        /* 32-bit mode support */
 745 #define FL_ARCH4      (1 << 4)        /* Architecture rel 4 */
 746 #define FL_ARCH5      (1 << 5)        /* Architecture rel 5 */
 747 #define FL_THUMB      (1 << 6)        /* Thumb aware */
 748 #define FL_LDSCHED    (1 << 7)        /* Load scheduling necessary */
 749 #define FL_STRONG     (1 << 8)        /* StrongARM */
 750 #define FL_ARCH5E     (1 << 9)        /* DSP extensions to v5 */
 751 #define FL_XSCALE     (1 << 10)       /* XScale */
 752 /* spare              (1 << 11) */
 753 #define FL_ARCH6      (1 << 12)       /* Architecture rel 6.  Adds
 754                                          media instructions.  */
 755 #define FL_VFPV2      (1 << 13)       /* Vector Floating Point V2.  */
 756 #define FL_WBUF       (1 << 14)       /* Schedule for write buffer ops.
 757                                          Note: ARM6 & 7 derivatives only.  */
 758 #define FL_ARCH6K     (1 << 15)       /* Architecture rel 6 K extensions.  */
 759 #define FL_THUMB2     (1 << 16)       /* Thumb-2.  */
 760 #define FL_NOTM       (1 << 17)       /* Instructions not present in the 'M'
 761                                          profile.  */
 762 #define FL_THUMB_DIV  (1 << 18)       /* Hardware divide (Thumb mode).  */
 763 #define FL_VFPV3      (1 << 19)       /* Vector Floating Point V3.  */
 764 #define FL_NEON       (1 << 20)       /* Neon instructions.  */
 765 #define FL_ARCH7EM    (1 << 21)       /* Instructions present in the ARMv7E-M
 766                                          architecture.  */
 767 #define FL_ARCH7      (1 << 22)       /* Architecture 7.  */
 768 #define FL_ARM_DIV    (1 << 23)       /* Hardware divide (ARM mode).  */
 769 #define FL_ARCH8      (1 << 24)       /* Architecture 8.  */
 770 #define FL_CRC32      (1 << 25)       /* ARMv8 CRC32 instructions.  */
 771
 772 #define FL_IWMMXT     (1 << 29)       /* XScale v2 or "Intel Wireless MMX technology".  */
 773 #define FL_IWMMXT2    (1 << 30)       /* "Intel Wireless MMX2 technology".  */
 774
 775 /* Flags that only effect tuning, not available instructions.  */
 776 #define FL_TUNE         (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
 777                          | FL_CO_PROC)
 778
 779 #define FL_FOR_ARCH2    FL_NOTM
 780 #define FL_FOR_ARCH3    (FL_FOR_ARCH2 | FL_MODE32)
 781 #define FL_FOR_ARCH3M   (FL_FOR_ARCH3 | FL_ARCH3M)
 782 #define FL_FOR_ARCH4    (FL_FOR_ARCH3M | FL_ARCH4)
 783 #define FL_FOR_ARCH4T   (FL_FOR_ARCH4 | FL_THUMB)
 784 #define FL_FOR_ARCH5    (FL_FOR_ARCH4 | FL_ARCH5)
 785 #define FL_FOR_ARCH5T   (FL_FOR_ARCH5 | FL_THUMB)
 786 #define FL_FOR_ARCH5E   (FL_FOR_ARCH5 | FL_ARCH5E)
 787 #define FL_FOR_ARCH5TE  (FL_FOR_ARCH5E | FL_THUMB)
 788 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
 789 #define FL_FOR_ARCH6    (FL_FOR_ARCH5TE | FL_ARCH6)
 790 #define FL_FOR_ARCH6J   FL_FOR_ARCH6
 791 #define FL_FOR_ARCH6K   (FL_FOR_ARCH6 | FL_ARCH6K)
 792 #define FL_FOR_ARCH6Z   FL_FOR_ARCH6
 793 #define FL_FOR_ARCH6ZK  FL_FOR_ARCH6K
 794 #define FL_FOR_ARCH6T2  (FL_FOR_ARCH6 | FL_THUMB2)
 795 #define FL_FOR_ARCH6M   (FL_FOR_ARCH6 & ~FL_NOTM)
 796 #define FL_FOR_ARCH7    ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
 797 #define FL_FOR_ARCH7A   (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
 798 #define FL_FOR_ARCH7VE  (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
 799 #define FL_FOR_ARCH7R   (FL_FOR_ARCH7A | FL_THUMB_DIV)
 800 #define FL_FOR_ARCH7M   (FL_FOR_ARCH7 | FL_THUMB_DIV)
 801 #define FL_FOR_ARCH7EM  (FL_FOR_ARCH7M | FL_ARCH7EM)
 802 #define FL_FOR_ARCH8A   (FL_FOR_ARCH7VE | FL_ARCH8)
 803
 804 /* The bits in this mask specify which
 805    instructions we are allowed to generate.  */
 806 static unsigned long insn_flags = 0;
 807
 808 /* The bits in this mask specify which instruction scheduling options should
 809    be used.  */
 810 static unsigned long tune_flags = 0;
 811
 812 /* The highest ARM architecture version supported by the
 813    target.  */
 814 enum base_architecture arm_base_arch = BASE_ARCH_0;
 815
 816 /* The following are used in the arm.md file as equivalents to bits
 817    in the above two flag variables.  */
 818
 819 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 820 int arm_arch3m = 0;
 821
 822 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 823 int arm_arch4 = 0;
 824
 825 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 826 int arm_arch4t = 0;
 827
 828 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 829 int arm_arch5 = 0;
 830
 831 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 832 int arm_arch5e = 0;
 833
 834 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 835 int arm_arch6 = 0;
 836
 837 /* Nonzero if this chip supports the ARM 6K extensions.  */
 838 int arm_arch6k = 0;
 839
 840 /* Nonzero if instructions present in ARMv6-M can be used.  */
 841 int arm_arch6m = 0;
 842
 843 /* Nonzero if this chip supports the ARM 7 extensions.  */
 844 int arm_arch7 = 0;
 845
 846 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 847 int arm_arch_notm = 0;
 848
 849 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 850 int arm_arch7em = 0;
 851
 852 /* Nonzero if instructions present in ARMv8 can be used.  */
 853 int arm_arch8 = 0;
 854
 855 /* Nonzero if this chip can benefit from load scheduling.  */
 856 int arm_ld_sched = 0;
 857
 858 /* Nonzero if this chip is a StrongARM.  */
 859 int arm_tune_strongarm = 0;
 860
 861 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 862 int arm_arch_iwmmxt = 0;
 863
 864 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 865 int arm_arch_iwmmxt2 = 0;
 866
 867 /* Nonzero if this chip is an XScale.  */
 868 int arm_arch_xscale = 0;
 869
 870 /* Nonzero if tuning for XScale  */
 871 int arm_tune_xscale = 0;
 872
 873 /* Nonzero if we want to tune for stores that access the write-buffer.
 874    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 875 int arm_tune_wbuf = 0;
 876
 877 /* Nonzero if tuning for Cortex-A9.  */
 878 int arm_tune_cortex_a9 = 0;
 879
 880 /* Nonzero if generating Thumb instructions.  */
 881 int thumb_code = 0;
 882
 883 /* Nonzero if generating Thumb-1 instructions.  */
 884 int thumb1_code = 0;
 885
 886 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 887    preprocessor.
 888    XXX This is a bit of a hack, it's intended to help work around
 889    problems in GLD which doesn't understand that armv5t code is
 890    interworking clean.  */
 891 int arm_cpp_interwork = 0;
 892
 893 /* Nonzero if chip supports Thumb 2.  */
 894 int arm_arch_thumb2;
 895
 896 /* Nonzero if chip supports integer division instruction.  */
 897 int arm_arch_arm_hwdiv;
 898 int arm_arch_thumb_hwdiv;
 899
 900 /* Nonzero if we should use Neon to handle 64-bits operations rather
 901    than core registers.  */
 902 int prefer_neon_for_64bits = 0;
 903
 904 /* Nonzero if we shouldn't use literal pools.  */
 905 bool arm_disable_literal_pool = false;
 906
 907 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
 908    we must report the mode of the memory reference from
 909    TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
 910 machine_mode output_memory_reference_mode;
 911
 912 /* The register number to be used for the PIC offset register.  */
 913 unsigned arm_pic_register = INVALID_REGNUM;
 914
 915 enum arm_pcs arm_pcs_default;
 916
 917 /* For an explanation of these variables, see final_prescan_insn below.  */
 918 int arm_ccfsm_state;
 919 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 920 enum arm_cond_code arm_current_cc;
 921
 922 rtx arm_target_insn;
 923 int arm_target_label;
 924 /* The number of conditionally executed insns, including the current insn.  */
 925 int arm_condexec_count = 0;
 926 /* A bitmask specifying the patterns for the IT block.
 927    Zero means do not output an IT block before this insn. */
 928 int arm_condexec_mask = 0;
 929 /* The number of bits used in arm_condexec_mask.  */
 930 int arm_condexec_masklen = 0;
 931
 932 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 933 int arm_arch_crc = 0;
 934
 935 /* The condition codes of the ARM, and the inverse function.  */
 936 static const char * const arm_condition_codes[] =
 937 {
 938   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 939   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 940 };
 941
 942 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 943 int arm_regs_in_sequence[] =
 944 {
 945   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 946 };
 947
 948 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
 949 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 950
 951 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 952                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 953                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 954 \f
 955 /* Initialization code.  */
 956
 957 struct processors
 958 {
 959   const char *const name;
 960   enum processor_type core;
 961   const char *arch;
 962   enum base_architecture base_arch;
 963   const unsigned long flags;
 964   const struct tune_params *const tune;
 965 };
 966
 967
 968 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
 969 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
 970   prefetch_slots, \
 971   l1_size, \
 972   l1_line_size
 973
 974 /* arm generic vectorizer costs.  */
 975 static const
 976 struct cpu_vec_costs arm_default_vec_cost = {
 977   1,                                    /* scalar_stmt_cost.  */
 978   1,                                    /* scalar load_cost.  */
 979   1,                                    /* scalar_store_cost.  */
 980   1,                                    /* vec_stmt_cost.  */
 981   1,                                    /* vec_to_scalar_cost.  */
 982   1,                                    /* scalar_to_vec_cost.  */
 983   1,                                    /* vec_align_load_cost.  */
 984   1,                                    /* vec_unalign_load_cost.  */
 985   1,                                    /* vec_unalign_store_cost.  */
 986   1,                                    /* vec_store_cost.  */
 987   3,                                    /* cond_taken_branch_cost.  */
 988   1,                                    /* cond_not_taken_branch_cost.  */
 989 };
 990
 991 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
 992 #include "aarch-cost-tables.h"
 993
 994
 995
 996 const struct cpu_cost_table cortexa9_extra_costs =
 997 {
 998   /* ALU */
 999   {
1000     0,                  /* arith.  */
1001     0,                  /* logical.  */
1002     0,                  /* shift.  */
1003     COSTS_N_INSNS (1),  /* shift_reg.  */
1004     COSTS_N_INSNS (1),  /* arith_shift.  */
1005     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1006     0,                  /* log_shift.  */
1007     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1008     COSTS_N_INSNS (1),  /* extend.  */
1009     COSTS_N_INSNS (2),  /* extend_arith.  */
1010     COSTS_N_INSNS (1),  /* bfi.  */
1011     COSTS_N_INSNS (1),  /* bfx.  */
1012     0,                  /* clz.  */
1013     0,                  /* rev.  */
1014     0,                  /* non_exec.  */
1015     true                /* non_exec_costs_exec.  */
1016   },
1017   {
1018     /* MULT SImode */
1019     {
1020       COSTS_N_INSNS (3),        /* simple.  */
1021       COSTS_N_INSNS (3),        /* flag_setting.  */
1022       COSTS_N_INSNS (2),        /* extend.  */
1023       COSTS_N_INSNS (3),        /* add.  */
1024       COSTS_N_INSNS (2),        /* extend_add.  */
1025       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1026     },
1027     /* MULT DImode */
1028     {
1029       0,                        /* simple (N/A).  */
1030       0,                        /* flag_setting (N/A).  */
1031       COSTS_N_INSNS (4),        /* extend.  */
1032       0,                        /* add (N/A).  */
1033       COSTS_N_INSNS (4),        /* extend_add.  */
1034       0                         /* idiv (N/A).  */
1035     }
1036   },
1037   /* LD/ST */
1038   {
1039     COSTS_N_INSNS (2),  /* load.  */
1040     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1041     COSTS_N_INSNS (2),  /* ldrd.  */
1042     COSTS_N_INSNS (2),  /* ldm_1st.  */
1043     1,                  /* ldm_regs_per_insn_1st.  */
1044     2,                  /* ldm_regs_per_insn_subsequent.  */
1045     COSTS_N_INSNS (5),  /* loadf.  */
1046     COSTS_N_INSNS (5),  /* loadd.  */
1047     COSTS_N_INSNS (1),  /* load_unaligned.  */
1048     COSTS_N_INSNS (2),  /* store.  */
1049     COSTS_N_INSNS (2),  /* strd.  */
1050     COSTS_N_INSNS (2),  /* stm_1st.  */
1051     1,                  /* stm_regs_per_insn_1st.  */
1052     2,                  /* stm_regs_per_insn_subsequent.  */
1053     COSTS_N_INSNS (1),  /* storef.  */
1054     COSTS_N_INSNS (1),  /* stored.  */
1055     COSTS_N_INSNS (1)   /* store_unaligned.  */
1056   },
1057   {
1058     /* FP SFmode */
1059     {
1060       COSTS_N_INSNS (14),       /* div.  */
1061       COSTS_N_INSNS (4),        /* mult.  */
1062       COSTS_N_INSNS (7),        /* mult_addsub. */
1063       COSTS_N_INSNS (30),       /* fma.  */
1064       COSTS_N_INSNS (3),        /* addsub.  */
1065       COSTS_N_INSNS (1),        /* fpconst.  */
1066       COSTS_N_INSNS (1),        /* neg.  */
1067       COSTS_N_INSNS (3),        /* compare.  */
1068       COSTS_N_INSNS (3),        /* widen.  */
1069       COSTS_N_INSNS (3),        /* narrow.  */
1070       COSTS_N_INSNS (3),        /* toint.  */
1071       COSTS_N_INSNS (3),        /* fromint.  */
1072       COSTS_N_INSNS (3)         /* roundint.  */
1073     },
1074     /* FP DFmode */
1075     {
1076       COSTS_N_INSNS (24),       /* div.  */
1077       COSTS_N_INSNS (5),        /* mult.  */
1078       COSTS_N_INSNS (8),        /* mult_addsub.  */
1079       COSTS_N_INSNS (30),       /* fma.  */
1080       COSTS_N_INSNS (3),        /* addsub.  */
1081       COSTS_N_INSNS (1),        /* fpconst.  */
1082       COSTS_N_INSNS (1),        /* neg.  */
1083       COSTS_N_INSNS (3),        /* compare.  */
1084       COSTS_N_INSNS (3),        /* widen.  */
1085       COSTS_N_INSNS (3),        /* narrow.  */
1086       COSTS_N_INSNS (3),        /* toint.  */
1087       COSTS_N_INSNS (3),        /* fromint.  */
1088       COSTS_N_INSNS (3)         /* roundint.  */
1089     }
1090   },
1091   /* Vector */
1092   {
1093     COSTS_N_INSNS (1)   /* alu.  */
1094   }
1095 };
1096
1097 const struct cpu_cost_table cortexa8_extra_costs =
1098 {
1099   /* ALU */
1100   {
1101     0,                  /* arith.  */
1102     0,                  /* logical.  */
1103     COSTS_N_INSNS (1),  /* shift.  */
1104     0,                  /* shift_reg.  */
1105     COSTS_N_INSNS (1),  /* arith_shift.  */
1106     0,                  /* arith_shift_reg.  */
1107     COSTS_N_INSNS (1),  /* log_shift.  */
1108     0,                  /* log_shift_reg.  */
1109     0,                  /* extend.  */
1110     0,                  /* extend_arith.  */
1111     0,                  /* bfi.  */
1112     0,                  /* bfx.  */
1113     0,                  /* clz.  */
1114     0,                  /* rev.  */
1115     0,                  /* non_exec.  */
1116     true                /* non_exec_costs_exec.  */
1117   },
1118   {
1119     /* MULT SImode */
1120     {
1121       COSTS_N_INSNS (1),        /* simple.  */
1122       COSTS_N_INSNS (1),        /* flag_setting.  */
1123       COSTS_N_INSNS (1),        /* extend.  */
1124       COSTS_N_INSNS (1),        /* add.  */
1125       COSTS_N_INSNS (1),        /* extend_add.  */
1126       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1127     },
1128     /* MULT DImode */
1129     {
1130       0,                        /* simple (N/A).  */
1131       0,                        /* flag_setting (N/A).  */
1132       COSTS_N_INSNS (2),        /* extend.  */
1133       0,                        /* add (N/A).  */
1134       COSTS_N_INSNS (2),        /* extend_add.  */
1135       0                         /* idiv (N/A).  */
1136     }
1137   },
1138   /* LD/ST */
1139   {
1140     COSTS_N_INSNS (1),  /* load.  */
1141     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1142     COSTS_N_INSNS (1),  /* ldrd.  */
1143     COSTS_N_INSNS (1),  /* ldm_1st.  */
1144     1,                  /* ldm_regs_per_insn_1st.  */
1145     2,                  /* ldm_regs_per_insn_subsequent.  */
1146     COSTS_N_INSNS (1),  /* loadf.  */
1147     COSTS_N_INSNS (1),  /* loadd.  */
1148     COSTS_N_INSNS (1),  /* load_unaligned.  */
1149     COSTS_N_INSNS (1),  /* store.  */
1150     COSTS_N_INSNS (1),  /* strd.  */
1151     COSTS_N_INSNS (1),  /* stm_1st.  */
1152     1,                  /* stm_regs_per_insn_1st.  */
1153     2,                  /* stm_regs_per_insn_subsequent.  */
1154     COSTS_N_INSNS (1),  /* storef.  */
1155     COSTS_N_INSNS (1),  /* stored.  */
1156     COSTS_N_INSNS (1)   /* store_unaligned.  */
1157   },
1158   {
1159     /* FP SFmode */
1160     {
1161       COSTS_N_INSNS (36),       /* div.  */
1162       COSTS_N_INSNS (11),       /* mult.  */
1163       COSTS_N_INSNS (20),       /* mult_addsub. */
1164       COSTS_N_INSNS (30),       /* fma.  */
1165       COSTS_N_INSNS (9),        /* addsub.  */
1166       COSTS_N_INSNS (3),        /* fpconst.  */
1167       COSTS_N_INSNS (3),        /* neg.  */
1168       COSTS_N_INSNS (6),        /* compare.  */
1169       COSTS_N_INSNS (4),        /* widen.  */
1170       COSTS_N_INSNS (4),        /* narrow.  */
1171       COSTS_N_INSNS (8),        /* toint.  */
1172       COSTS_N_INSNS (8),        /* fromint.  */
1173       COSTS_N_INSNS (8)         /* roundint.  */
1174     },
1175     /* FP DFmode */
1176     {
1177       COSTS_N_INSNS (64),       /* div.  */
1178       COSTS_N_INSNS (16),       /* mult.  */
1179       COSTS_N_INSNS (25),       /* mult_addsub.  */
1180       COSTS_N_INSNS (30),       /* fma.  */
1181       COSTS_N_INSNS (9),        /* addsub.  */
1182       COSTS_N_INSNS (3),        /* fpconst.  */
1183       COSTS_N_INSNS (3),        /* neg.  */
1184       COSTS_N_INSNS (6),        /* compare.  */
1185       COSTS_N_INSNS (6),        /* widen.  */
1186       COSTS_N_INSNS (6),        /* narrow.  */
1187       COSTS_N_INSNS (8),        /* toint.  */
1188       COSTS_N_INSNS (8),        /* fromint.  */
1189       COSTS_N_INSNS (8)         /* roundint.  */
1190     }
1191   },
1192   /* Vector */
1193   {
1194     COSTS_N_INSNS (1)   /* alu.  */
1195   }
1196 };
1197
1198 const struct cpu_cost_table cortexa5_extra_costs =
1199 {
1200   /* ALU */
1201   {
1202     0,                  /* arith.  */
1203     0,                  /* logical.  */
1204     COSTS_N_INSNS (1),  /* shift.  */
1205     COSTS_N_INSNS (1),  /* shift_reg.  */
1206     COSTS_N_INSNS (1),  /* arith_shift.  */
1207     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1208     COSTS_N_INSNS (1),  /* log_shift.  */
1209     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1210     COSTS_N_INSNS (1),  /* extend.  */
1211     COSTS_N_INSNS (1),  /* extend_arith.  */
1212     COSTS_N_INSNS (1),  /* bfi.  */
1213     COSTS_N_INSNS (1),  /* bfx.  */
1214     COSTS_N_INSNS (1),  /* clz.  */
1215     COSTS_N_INSNS (1),  /* rev.  */
1216     0,                  /* non_exec.  */
1217     true                /* non_exec_costs_exec.  */
1218   },
1219
1220   {
1221     /* MULT SImode */
1222     {
1223       0,                        /* simple.  */
1224       COSTS_N_INSNS (1),        /* flag_setting.  */
1225       COSTS_N_INSNS (1),        /* extend.  */
1226       COSTS_N_INSNS (1),        /* add.  */
1227       COSTS_N_INSNS (1),        /* extend_add.  */
1228       COSTS_N_INSNS (7)         /* idiv.  */
1229     },
1230     /* MULT DImode */
1231     {
1232       0,                        /* simple (N/A).  */
1233       0,                        /* flag_setting (N/A).  */
1234       COSTS_N_INSNS (1),        /* extend.  */
1235       0,                        /* add.  */
1236       COSTS_N_INSNS (2),        /* extend_add.  */
1237       0                         /* idiv (N/A).  */
1238     }
1239   },
1240   /* LD/ST */
1241   {
1242     COSTS_N_INSNS (1),  /* load.  */
1243     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1244     COSTS_N_INSNS (6),  /* ldrd.  */
1245     COSTS_N_INSNS (1),  /* ldm_1st.  */
1246     1,                  /* ldm_regs_per_insn_1st.  */
1247     2,                  /* ldm_regs_per_insn_subsequent.  */
1248     COSTS_N_INSNS (2),  /* loadf.  */
1249     COSTS_N_INSNS (4),  /* loadd.  */
1250     COSTS_N_INSNS (1),  /* load_unaligned.  */
1251     COSTS_N_INSNS (1),  /* store.  */
1252     COSTS_N_INSNS (3),  /* strd.  */
1253     COSTS_N_INSNS (1),  /* stm_1st.  */
1254     1,                  /* stm_regs_per_insn_1st.  */
1255     2,                  /* stm_regs_per_insn_subsequent.  */
1256     COSTS_N_INSNS (2),  /* storef.  */
1257     COSTS_N_INSNS (2),  /* stored.  */
1258     COSTS_N_INSNS (1)   /* store_unaligned.  */
1259   },
1260   {
1261     /* FP SFmode */
1262     {
1263       COSTS_N_INSNS (15),       /* div.  */
1264       COSTS_N_INSNS (3),        /* mult.  */
1265       COSTS_N_INSNS (7),        /* mult_addsub. */
1266       COSTS_N_INSNS (7),        /* fma.  */
1267       COSTS_N_INSNS (3),        /* addsub.  */
1268       COSTS_N_INSNS (3),        /* fpconst.  */
1269       COSTS_N_INSNS (3),        /* neg.  */
1270       COSTS_N_INSNS (3),        /* compare.  */
1271       COSTS_N_INSNS (3),        /* widen.  */
1272       COSTS_N_INSNS (3),        /* narrow.  */
1273       COSTS_N_INSNS (3),        /* toint.  */
1274       COSTS_N_INSNS (3),        /* fromint.  */
1275       COSTS_N_INSNS (3)         /* roundint.  */
1276     },
1277     /* FP DFmode */
1278     {
1279       COSTS_N_INSNS (30),       /* div.  */
1280       COSTS_N_INSNS (6),        /* mult.  */
1281       COSTS_N_INSNS (10),       /* mult_addsub.  */
1282       COSTS_N_INSNS (7),        /* fma.  */
1283       COSTS_N_INSNS (3),        /* addsub.  */
1284       COSTS_N_INSNS (3),        /* fpconst.  */
1285       COSTS_N_INSNS (3),        /* neg.  */
1286       COSTS_N_INSNS (3),        /* compare.  */
1287       COSTS_N_INSNS (3),        /* widen.  */
1288       COSTS_N_INSNS (3),        /* narrow.  */
1289       COSTS_N_INSNS (3),        /* toint.  */
1290       COSTS_N_INSNS (3),        /* fromint.  */
1291       COSTS_N_INSNS (3)         /* roundint.  */
1292     }
1293   },
1294   /* Vector */
1295   {
1296     COSTS_N_INSNS (1)   /* alu.  */
1297   }
1298 };
1299
1300
1301 const struct cpu_cost_table cortexa7_extra_costs =
1302 {
1303   /* ALU */
1304   {
1305     0,                  /* arith.  */
1306     0,                  /* logical.  */
1307     COSTS_N_INSNS (1),  /* shift.  */
1308     COSTS_N_INSNS (1),  /* shift_reg.  */
1309     COSTS_N_INSNS (1),  /* arith_shift.  */
1310     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1311     COSTS_N_INSNS (1),  /* log_shift.  */
1312     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1313     COSTS_N_INSNS (1),  /* extend.  */
1314     COSTS_N_INSNS (1),  /* extend_arith.  */
1315     COSTS_N_INSNS (1),  /* bfi.  */
1316     COSTS_N_INSNS (1),  /* bfx.  */
1317     COSTS_N_INSNS (1),  /* clz.  */
1318     COSTS_N_INSNS (1),  /* rev.  */
1319     0,                  /* non_exec.  */
1320     true                /* non_exec_costs_exec.  */
1321   },
1322
1323   {
1324     /* MULT SImode */
1325     {
1326       0,                        /* simple.  */
1327       COSTS_N_INSNS (1),        /* flag_setting.  */
1328       COSTS_N_INSNS (1),        /* extend.  */
1329       COSTS_N_INSNS (1),        /* add.  */
1330       COSTS_N_INSNS (1),        /* extend_add.  */
1331       COSTS_N_INSNS (7)         /* idiv.  */
1332     },
1333     /* MULT DImode */
1334     {
1335       0,                        /* simple (N/A).  */
1336       0,                        /* flag_setting (N/A).  */
1337       COSTS_N_INSNS (1),        /* extend.  */
1338       0,                        /* add.  */
1339       COSTS_N_INSNS (2),        /* extend_add.  */
1340       0                         /* idiv (N/A).  */
1341     }
1342   },
1343   /* LD/ST */
1344   {
1345     COSTS_N_INSNS (1),  /* load.  */
1346     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1347     COSTS_N_INSNS (3),  /* ldrd.  */
1348     COSTS_N_INSNS (1),  /* ldm_1st.  */
1349     1,                  /* ldm_regs_per_insn_1st.  */
1350     2,                  /* ldm_regs_per_insn_subsequent.  */
1351     COSTS_N_INSNS (2),  /* loadf.  */
1352     COSTS_N_INSNS (2),  /* loadd.  */
1353     COSTS_N_INSNS (1),  /* load_unaligned.  */
1354     COSTS_N_INSNS (1),  /* store.  */
1355     COSTS_N_INSNS (3),  /* strd.  */
1356     COSTS_N_INSNS (1),  /* stm_1st.  */
1357     1,                  /* stm_regs_per_insn_1st.  */
1358     2,                  /* stm_regs_per_insn_subsequent.  */
1359     COSTS_N_INSNS (2),  /* storef.  */
1360     COSTS_N_INSNS (2),  /* stored.  */
1361     COSTS_N_INSNS (1)   /* store_unaligned.  */
1362   },
1363   {
1364     /* FP SFmode */
1365     {
1366       COSTS_N_INSNS (15),       /* div.  */
1367       COSTS_N_INSNS (3),        /* mult.  */
1368       COSTS_N_INSNS (7),        /* mult_addsub. */
1369       COSTS_N_INSNS (7),        /* fma.  */
1370       COSTS_N_INSNS (3),        /* addsub.  */
1371       COSTS_N_INSNS (3),        /* fpconst.  */
1372       COSTS_N_INSNS (3),        /* neg.  */
1373       COSTS_N_INSNS (3),        /* compare.  */
1374       COSTS_N_INSNS (3),        /* widen.  */
1375       COSTS_N_INSNS (3),        /* narrow.  */
1376       COSTS_N_INSNS (3),        /* toint.  */
1377       COSTS_N_INSNS (3),        /* fromint.  */
1378       COSTS_N_INSNS (3)         /* roundint.  */
1379     },
1380     /* FP DFmode */
1381     {
1382       COSTS_N_INSNS (30),       /* div.  */
1383       COSTS_N_INSNS (6),        /* mult.  */
1384       COSTS_N_INSNS (10),       /* mult_addsub.  */
1385       COSTS_N_INSNS (7),        /* fma.  */
1386       COSTS_N_INSNS (3),        /* addsub.  */
1387       COSTS_N_INSNS (3),        /* fpconst.  */
1388       COSTS_N_INSNS (3),        /* neg.  */
1389       COSTS_N_INSNS (3),        /* compare.  */
1390       COSTS_N_INSNS (3),        /* widen.  */
1391       COSTS_N_INSNS (3),        /* narrow.  */
1392       COSTS_N_INSNS (3),        /* toint.  */
1393       COSTS_N_INSNS (3),        /* fromint.  */
1394       COSTS_N_INSNS (3)         /* roundint.  */
1395     }
1396   },
1397   /* Vector */
1398   {
1399     COSTS_N_INSNS (1)   /* alu.  */
1400   }
1401 };
1402
1403 const struct cpu_cost_table cortexa12_extra_costs =
1404 {
1405   /* ALU */
1406   {
1407     0,                  /* arith.  */
1408     0,                  /* logical.  */
1409     0,                  /* shift.  */
1410     COSTS_N_INSNS (1),  /* shift_reg.  */
1411     COSTS_N_INSNS (1),  /* arith_shift.  */
1412     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1413     COSTS_N_INSNS (1),  /* log_shift.  */
1414     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1415     0,                  /* extend.  */
1416     COSTS_N_INSNS (1),  /* extend_arith.  */
1417     0,                  /* bfi.  */
1418     COSTS_N_INSNS (1),  /* bfx.  */
1419     COSTS_N_INSNS (1),  /* clz.  */
1420     COSTS_N_INSNS (1),  /* rev.  */
1421     0,                  /* non_exec.  */
1422     true                /* non_exec_costs_exec.  */
1423   },
1424   /* MULT SImode */
1425   {
1426     {
1427       COSTS_N_INSNS (2),        /* simple.  */
1428       COSTS_N_INSNS (3),        /* flag_setting.  */
1429       COSTS_N_INSNS (2),        /* extend.  */
1430       COSTS_N_INSNS (3),        /* add.  */
1431       COSTS_N_INSNS (2),        /* extend_add.  */
1432       COSTS_N_INSNS (18)        /* idiv.  */
1433     },
1434     /* MULT DImode */
1435     {
1436       0,                        /* simple (N/A).  */
1437       0,                        /* flag_setting (N/A).  */
1438       COSTS_N_INSNS (3),        /* extend.  */
1439       0,                        /* add (N/A).  */
1440       COSTS_N_INSNS (3),        /* extend_add.  */
1441       0                         /* idiv (N/A).  */
1442     }
1443   },
1444   /* LD/ST */
1445   {
1446     COSTS_N_INSNS (3),  /* load.  */
1447     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1448     COSTS_N_INSNS (3),  /* ldrd.  */
1449     COSTS_N_INSNS (3),  /* ldm_1st.  */
1450     1,                  /* ldm_regs_per_insn_1st.  */
1451     2,                  /* ldm_regs_per_insn_subsequent.  */
1452     COSTS_N_INSNS (3),  /* loadf.  */
1453     COSTS_N_INSNS (3),  /* loadd.  */
1454     0,                  /* load_unaligned.  */
1455     0,                  /* store.  */
1456     0,                  /* strd.  */
1457     0,                  /* stm_1st.  */
1458     1,                  /* stm_regs_per_insn_1st.  */
1459     2,                  /* stm_regs_per_insn_subsequent.  */
1460     COSTS_N_INSNS (2),  /* storef.  */
1461     COSTS_N_INSNS (2),  /* stored.  */
1462     0                   /* store_unaligned.  */
1463   },
1464   {
1465     /* FP SFmode */
1466     {
1467       COSTS_N_INSNS (17),       /* div.  */
1468       COSTS_N_INSNS (4),        /* mult.  */
1469       COSTS_N_INSNS (8),        /* mult_addsub. */
1470       COSTS_N_INSNS (8),        /* fma.  */
1471       COSTS_N_INSNS (4),        /* addsub.  */
1472       COSTS_N_INSNS (2),        /* fpconst. */
1473       COSTS_N_INSNS (2),        /* neg.  */
1474       COSTS_N_INSNS (2),        /* compare.  */
1475       COSTS_N_INSNS (4),        /* widen.  */
1476       COSTS_N_INSNS (4),        /* narrow.  */
1477       COSTS_N_INSNS (4),        /* toint.  */
1478       COSTS_N_INSNS (4),        /* fromint.  */
1479       COSTS_N_INSNS (4)         /* roundint.  */
1480     },
1481     /* FP DFmode */
1482     {
1483       COSTS_N_INSNS (31),       /* div.  */
1484       COSTS_N_INSNS (4),        /* mult.  */
1485       COSTS_N_INSNS (8),        /* mult_addsub.  */
1486       COSTS_N_INSNS (8),        /* fma.  */
1487       COSTS_N_INSNS (4),        /* addsub.  */
1488       COSTS_N_INSNS (2),        /* fpconst.  */
1489       COSTS_N_INSNS (2),        /* neg.  */
1490       COSTS_N_INSNS (2),        /* compare.  */
1491       COSTS_N_INSNS (4),        /* widen.  */
1492       COSTS_N_INSNS (4),        /* narrow.  */
1493       COSTS_N_INSNS (4),        /* toint.  */
1494       COSTS_N_INSNS (4),        /* fromint.  */
1495       COSTS_N_INSNS (4)         /* roundint.  */
1496     }
1497   },
1498   /* Vector */
1499   {
1500     COSTS_N_INSNS (1)   /* alu.  */
1501   }
1502 };
1503
1504 const struct cpu_cost_table cortexa15_extra_costs =
1505 {
1506   /* ALU */
1507   {
1508     0,                  /* arith.  */
1509     0,                  /* logical.  */
1510     0,                  /* shift.  */
1511     0,                  /* shift_reg.  */
1512     COSTS_N_INSNS (1),  /* arith_shift.  */
1513     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1514     COSTS_N_INSNS (1),  /* log_shift.  */
1515     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1516     0,                  /* extend.  */
1517     COSTS_N_INSNS (1),  /* extend_arith.  */
1518     COSTS_N_INSNS (1),  /* bfi.  */
1519     0,                  /* bfx.  */
1520     0,                  /* clz.  */
1521     0,                  /* rev.  */
1522     0,                  /* non_exec.  */
1523     true                /* non_exec_costs_exec.  */
1524   },
1525   /* MULT SImode */
1526   {
1527     {
1528       COSTS_N_INSNS (2),        /* simple.  */
1529       COSTS_N_INSNS (3),        /* flag_setting.  */
1530       COSTS_N_INSNS (2),        /* extend.  */
1531       COSTS_N_INSNS (2),        /* add.  */
1532       COSTS_N_INSNS (2),        /* extend_add.  */
1533       COSTS_N_INSNS (18)        /* idiv.  */
1534     },
1535     /* MULT DImode */
1536     {
1537       0,                        /* simple (N/A).  */
1538       0,                        /* flag_setting (N/A).  */
1539       COSTS_N_INSNS (3),        /* extend.  */
1540       0,                        /* add (N/A).  */
1541       COSTS_N_INSNS (3),        /* extend_add.  */
1542       0                         /* idiv (N/A).  */
1543     }
1544   },
1545   /* LD/ST */
1546   {
1547     COSTS_N_INSNS (3),  /* load.  */
1548     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1549     COSTS_N_INSNS (3),  /* ldrd.  */
1550     COSTS_N_INSNS (4),  /* ldm_1st.  */
1551     1,                  /* ldm_regs_per_insn_1st.  */
1552     2,                  /* ldm_regs_per_insn_subsequent.  */
1553     COSTS_N_INSNS (4),  /* loadf.  */
1554     COSTS_N_INSNS (4),  /* loadd.  */
1555     0,                  /* load_unaligned.  */
1556     0,                  /* store.  */
1557     0,                  /* strd.  */
1558     COSTS_N_INSNS (1),  /* stm_1st.  */
1559     1,                  /* stm_regs_per_insn_1st.  */
1560     2,                  /* stm_regs_per_insn_subsequent.  */
1561     0,                  /* storef.  */
1562     0,                  /* stored.  */
1563     0                   /* store_unaligned.  */
1564   },
1565   {
1566     /* FP SFmode */
1567     {
1568       COSTS_N_INSNS (17),       /* div.  */
1569       COSTS_N_INSNS (4),        /* mult.  */
1570       COSTS_N_INSNS (8),        /* mult_addsub. */
1571       COSTS_N_INSNS (8),        /* fma.  */
1572       COSTS_N_INSNS (4),        /* addsub.  */
1573       COSTS_N_INSNS (2),        /* fpconst. */
1574       COSTS_N_INSNS (2),        /* neg.  */
1575       COSTS_N_INSNS (5),        /* compare.  */
1576       COSTS_N_INSNS (4),        /* widen.  */
1577       COSTS_N_INSNS (4),        /* narrow.  */
1578       COSTS_N_INSNS (4),        /* toint.  */
1579       COSTS_N_INSNS (4),        /* fromint.  */
1580       COSTS_N_INSNS (4)         /* roundint.  */
1581     },
1582     /* FP DFmode */
1583     {
1584       COSTS_N_INSNS (31),       /* div.  */
1585       COSTS_N_INSNS (4),        /* mult.  */
1586       COSTS_N_INSNS (8),        /* mult_addsub.  */
1587       COSTS_N_INSNS (8),        /* fma.  */
1588       COSTS_N_INSNS (4),        /* addsub.  */
1589       COSTS_N_INSNS (2),        /* fpconst.  */
1590       COSTS_N_INSNS (2),        /* neg.  */
1591       COSTS_N_INSNS (2),        /* compare.  */
1592       COSTS_N_INSNS (4),        /* widen.  */
1593       COSTS_N_INSNS (4),        /* narrow.  */
1594       COSTS_N_INSNS (4),        /* toint.  */
1595       COSTS_N_INSNS (4),        /* fromint.  */
1596       COSTS_N_INSNS (4)         /* roundint.  */
1597     }
1598   },
1599   /* Vector */
1600   {
1601     COSTS_N_INSNS (1)   /* alu.  */
1602   }
1603 };
1604
1605 const struct cpu_cost_table v7m_extra_costs =
1606 {
1607   /* ALU */
1608   {
1609     0,                  /* arith.  */
1610     0,                  /* logical.  */
1611     0,                  /* shift.  */
1612     0,                  /* shift_reg.  */
1613     0,                  /* arith_shift.  */
1614     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1615     0,                  /* log_shift.  */
1616     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1617     0,                  /* extend.  */
1618     COSTS_N_INSNS (1),  /* extend_arith.  */
1619     0,                  /* bfi.  */
1620     0,                  /* bfx.  */
1621     0,                  /* clz.  */
1622     0,                  /* rev.  */
1623     COSTS_N_INSNS (1),  /* non_exec.  */
1624     false               /* non_exec_costs_exec.  */
1625   },
1626   {
1627     /* MULT SImode */
1628     {
1629       COSTS_N_INSNS (1),        /* simple.  */
1630       COSTS_N_INSNS (1),        /* flag_setting.  */
1631       COSTS_N_INSNS (2),        /* extend.  */
1632       COSTS_N_INSNS (1),        /* add.  */
1633       COSTS_N_INSNS (3),        /* extend_add.  */
1634       COSTS_N_INSNS (8)         /* idiv.  */
1635     },
1636     /* MULT DImode */
1637     {
1638       0,                        /* simple (N/A).  */
1639       0,                        /* flag_setting (N/A).  */
1640       COSTS_N_INSNS (2),        /* extend.  */
1641       0,                        /* add (N/A).  */
1642       COSTS_N_INSNS (3),        /* extend_add.  */
1643       0                         /* idiv (N/A).  */
1644     }
1645   },
1646   /* LD/ST */
1647   {
1648     COSTS_N_INSNS (2),  /* load.  */
1649     0,                  /* load_sign_extend.  */
1650     COSTS_N_INSNS (3),  /* ldrd.  */
1651     COSTS_N_INSNS (2),  /* ldm_1st.  */
1652     1,                  /* ldm_regs_per_insn_1st.  */
1653     1,                  /* ldm_regs_per_insn_subsequent.  */
1654     COSTS_N_INSNS (2),  /* loadf.  */
1655     COSTS_N_INSNS (3),  /* loadd.  */
1656     COSTS_N_INSNS (1),  /* load_unaligned.  */
1657     COSTS_N_INSNS (2),  /* store.  */
1658     COSTS_N_INSNS (3),  /* strd.  */
1659     COSTS_N_INSNS (2),  /* stm_1st.  */
1660     1,                  /* stm_regs_per_insn_1st.  */
1661     1,                  /* stm_regs_per_insn_subsequent.  */
1662     COSTS_N_INSNS (2),  /* storef.  */
1663     COSTS_N_INSNS (3),  /* stored.  */
1664     COSTS_N_INSNS (1)  /* store_unaligned.  */
1665   },
1666   {
1667     /* FP SFmode */
1668     {
1669       COSTS_N_INSNS (7),        /* div.  */
1670       COSTS_N_INSNS (2),        /* mult.  */
1671       COSTS_N_INSNS (5),        /* mult_addsub.  */
1672       COSTS_N_INSNS (3),        /* fma.  */
1673       COSTS_N_INSNS (1),        /* addsub.  */
1674       0,                        /* fpconst.  */
1675       0,                        /* neg.  */
1676       0,                        /* compare.  */
1677       0,                        /* widen.  */
1678       0,                        /* narrow.  */
1679       0,                        /* toint.  */
1680       0,                        /* fromint.  */
1681       0                         /* roundint.  */
1682     },
1683     /* FP DFmode */
1684     {
1685       COSTS_N_INSNS (15),       /* div.  */
1686       COSTS_N_INSNS (5),        /* mult.  */
1687       COSTS_N_INSNS (7),        /* mult_addsub.  */
1688       COSTS_N_INSNS (7),        /* fma.  */
1689       COSTS_N_INSNS (3),        /* addsub.  */
1690       0,                        /* fpconst.  */
1691       0,                        /* neg.  */
1692       0,                        /* compare.  */
1693       0,                        /* widen.  */
1694       0,                        /* narrow.  */
1695       0,                        /* toint.  */
1696       0,                        /* fromint.  */
1697       0                         /* roundint.  */
1698     }
1699   },
1700   /* Vector */
1701   {
1702     COSTS_N_INSNS (1)   /* alu.  */
1703   }
1704 };
1705
1706 const struct tune_params arm_slowmul_tune =
1707 {
1708   arm_slowmul_rtx_costs,
1709   NULL,
1710   NULL,                                         /* Sched adj cost.  */
1711   3,                                            /* Constant limit.  */
1712   5,                                            /* Max cond insns.  */
1713   ARM_PREFETCH_NOT_BENEFICIAL,
1714   true,                                         /* Prefer constant pool.  */
1715   arm_default_branch_cost,
1716   false,                                        /* Prefer LDRD/STRD.  */
1717   {true, true},                                 /* Prefer non short circuit.  */
1718   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1719   false,                                        /* Prefer Neon for 64-bits bitops.  */
1720   false, false,                                 /* Prefer 32-bit encodings.  */
1721   false,                                        /* Prefer Neon for stringops.  */
1722   8                                             /* Maximum insns to inline memset.  */
1723 };
1724
1725 const struct tune_params arm_fastmul_tune =
1726 {
1727   arm_fastmul_rtx_costs,
1728   NULL,
1729   NULL,                                         /* Sched adj cost.  */
1730   1,                                            /* Constant limit.  */
1731   5,                                            /* Max cond insns.  */
1732   ARM_PREFETCH_NOT_BENEFICIAL,
1733   true,                                         /* Prefer constant pool.  */
1734   arm_default_branch_cost,
1735   false,                                        /* Prefer LDRD/STRD.  */
1736   {true, true},                                 /* Prefer non short circuit.  */
1737   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1738   false,                                        /* Prefer Neon for 64-bits bitops.  */
1739   false, false,                                 /* Prefer 32-bit encodings.  */
1740   false,                                        /* Prefer Neon for stringops.  */
1741   8                                             /* Maximum insns to inline memset.  */
1742 };
1743
1744 /* StrongARM has early execution of branches, so a sequence that is worth
1745    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1746
1747 const struct tune_params arm_strongarm_tune =
1748 {
1749   arm_fastmul_rtx_costs,
1750   NULL,
1751   NULL,                                         /* Sched adj cost.  */
1752   1,                                            /* Constant limit.  */
1753   3,                                            /* Max cond insns.  */
1754   ARM_PREFETCH_NOT_BENEFICIAL,
1755   true,                                         /* Prefer constant pool.  */
1756   arm_default_branch_cost,
1757   false,                                        /* Prefer LDRD/STRD.  */
1758   {true, true},                                 /* Prefer non short circuit.  */
1759   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1760   false,                                        /* Prefer Neon for 64-bits bitops.  */
1761   false, false,                                 /* Prefer 32-bit encodings.  */
1762   false,                                        /* Prefer Neon for stringops.  */
1763   8                                             /* Maximum insns to inline memset.  */
1764 };
1765
1766 const struct tune_params arm_xscale_tune =
1767 {
1768   arm_xscale_rtx_costs,
1769   NULL,
1770   xscale_sched_adjust_cost,
1771   2,                                            /* Constant limit.  */
1772   3,                                            /* Max cond insns.  */
1773   ARM_PREFETCH_NOT_BENEFICIAL,
1774   true,                                         /* Prefer constant pool.  */
1775   arm_default_branch_cost,
1776   false,                                        /* Prefer LDRD/STRD.  */
1777   {true, true},                                 /* Prefer non short circuit.  */
1778   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1779   false,                                        /* Prefer Neon for 64-bits bitops.  */
1780   false, false,                                 /* Prefer 32-bit encodings.  */
1781   false,                                        /* Prefer Neon for stringops.  */
1782   8                                             /* Maximum insns to inline memset.  */
1783 };
1784
1785 const struct tune_params arm_9e_tune =
1786 {
1787   arm_9e_rtx_costs,
1788   NULL,
1789   NULL,                                         /* Sched adj cost.  */
1790   1,                                            /* Constant limit.  */
1791   5,                                            /* Max cond insns.  */
1792   ARM_PREFETCH_NOT_BENEFICIAL,
1793   true,                                         /* Prefer constant pool.  */
1794   arm_default_branch_cost,
1795   false,                                        /* Prefer LDRD/STRD.  */
1796   {true, true},                                 /* Prefer non short circuit.  */
1797   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1798   false,                                        /* Prefer Neon for 64-bits bitops.  */
1799   false, false,                                 /* Prefer 32-bit encodings.  */
1800   false,                                        /* Prefer Neon for stringops.  */
1801   8                                             /* Maximum insns to inline memset.  */
1802 };
1803
1804 const struct tune_params arm_v6t2_tune =
1805 {
1806   arm_9e_rtx_costs,
1807   NULL,
1808   NULL,                                         /* Sched adj cost.  */
1809   1,                                            /* Constant limit.  */
1810   5,                                            /* Max cond insns.  */
1811   ARM_PREFETCH_NOT_BENEFICIAL,
1812   false,                                        /* Prefer constant pool.  */
1813   arm_default_branch_cost,
1814   false,                                        /* Prefer LDRD/STRD.  */
1815   {true, true},                                 /* Prefer non short circuit.  */
1816   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1817   false,                                        /* Prefer Neon for 64-bits bitops.  */
1818   false, false,                                 /* Prefer 32-bit encodings.  */
1819   false,                                        /* Prefer Neon for stringops.  */
1820   8                                             /* Maximum insns to inline memset.  */
1821 };
1822
1823 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1824 const struct tune_params arm_cortex_tune =
1825 {
1826   arm_9e_rtx_costs,
1827   &generic_extra_costs,
1828   NULL,                                         /* Sched adj cost.  */
1829   1,                                            /* Constant limit.  */
1830   5,                                            /* Max cond insns.  */
1831   ARM_PREFETCH_NOT_BENEFICIAL,
1832   false,                                        /* Prefer constant pool.  */
1833   arm_default_branch_cost,
1834   false,                                        /* Prefer LDRD/STRD.  */
1835   {true, true},                                 /* Prefer non short circuit.  */
1836   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1837   false,                                        /* Prefer Neon for 64-bits bitops.  */
1838   false, false,                                 /* Prefer 32-bit encodings.  */
1839   false,                                        /* Prefer Neon for stringops.  */
1840   8                                             /* Maximum insns to inline memset.  */
1841 };
1842
1843 const struct tune_params arm_cortex_a8_tune =
1844 {
1845   arm_9e_rtx_costs,
1846   &cortexa8_extra_costs,
1847   NULL,                                         /* Sched adj cost.  */
1848   1,                                            /* Constant limit.  */
1849   5,                                            /* Max cond insns.  */
1850   ARM_PREFETCH_NOT_BENEFICIAL,
1851   false,                                        /* Prefer constant pool.  */
1852   arm_default_branch_cost,
1853   false,                                        /* Prefer LDRD/STRD.  */
1854   {true, true},                                 /* Prefer non short circuit.  */
1855   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1856   false,                                        /* Prefer Neon for 64-bits bitops.  */
1857   false, false,                                 /* Prefer 32-bit encodings.  */
1858   true,                                         /* Prefer Neon for stringops.  */
1859   8                                             /* Maximum insns to inline memset.  */
1860 };
1861
1862 const struct tune_params arm_cortex_a7_tune =
1863 {
1864   arm_9e_rtx_costs,
1865   &cortexa7_extra_costs,
1866   NULL,
1867   1,                                            /* Constant limit.  */
1868   5,                                            /* Max cond insns.  */
1869   ARM_PREFETCH_NOT_BENEFICIAL,
1870   false,                                        /* Prefer constant pool.  */
1871   arm_default_branch_cost,
1872   false,                                        /* Prefer LDRD/STRD.  */
1873   {true, true},                                 /* Prefer non short circuit.  */
1874   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1875   false,                                        /* Prefer Neon for 64-bits bitops.  */
1876   false, false,                                 /* Prefer 32-bit encodings.  */
1877   true,                                         /* Prefer Neon for stringops.  */
1878   8                                             /* Maximum insns to inline memset.  */
1879 };
1880
1881 const struct tune_params arm_cortex_a15_tune =
1882 {
1883   arm_9e_rtx_costs,
1884   &cortexa15_extra_costs,
1885   NULL,                                         /* Sched adj cost.  */
1886   1,                                            /* Constant limit.  */
1887   2,                                            /* Max cond insns.  */
1888   ARM_PREFETCH_NOT_BENEFICIAL,
1889   false,                                        /* Prefer constant pool.  */
1890   arm_default_branch_cost,
1891   true,                                         /* Prefer LDRD/STRD.  */
1892   {true, true},                                 /* Prefer non short circuit.  */
1893   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1894   false,                                        /* Prefer Neon for 64-bits bitops.  */
1895   true, true,                                   /* Prefer 32-bit encodings.  */
1896   true,                                         /* Prefer Neon for stringops.  */
1897   8                                             /* Maximum insns to inline memset.  */
1898 };
1899
1900 const struct tune_params arm_cortex_a53_tune =
1901 {
1902   arm_9e_rtx_costs,
1903   &cortexa53_extra_costs,
1904   NULL,                                         /* Scheduler cost adjustment.  */
1905   1,                                            /* Constant limit.  */
1906   5,                                            /* Max cond insns.  */
1907   ARM_PREFETCH_NOT_BENEFICIAL,
1908   false,                                        /* Prefer constant pool.  */
1909   arm_default_branch_cost,
1910   false,                                        /* Prefer LDRD/STRD.  */
1911   {true, true},                                 /* Prefer non short circuit.  */
1912   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1913   false,                                        /* Prefer Neon for 64-bits bitops.  */
1914   false, false,                                 /* Prefer 32-bit encodings.  */
1915   false,                                        /* Prefer Neon for stringops.  */
1916   8                                             /* Maximum insns to inline memset.  */
1917 };
1918
1919 const struct tune_params arm_cortex_a57_tune =
1920 {
1921   arm_9e_rtx_costs,
1922   &cortexa57_extra_costs,
1923   NULL,                                         /* Scheduler cost adjustment.  */
1924   1,                                           /* Constant limit.  */
1925   2,                                           /* Max cond insns.  */
1926   ARM_PREFETCH_NOT_BENEFICIAL,
1927   false,                                       /* Prefer constant pool.  */
1928   arm_default_branch_cost,
1929   true,                                       /* Prefer LDRD/STRD.  */
1930   {true, true},                                /* Prefer non short circuit.  */
1931   &arm_default_vec_cost,                       /* Vectorizer costs.  */
1932   false,                                       /* Prefer Neon for 64-bits bitops.  */
1933   true, true,                                  /* Prefer 32-bit encodings.  */
1934   false,                                        /* Prefer Neon for stringops.  */
1935   8                                             /* Maximum insns to inline memset.  */
1936 };
1937
1938 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1939    less appealing.  Set max_insns_skipped to a low value.  */
1940
1941 const struct tune_params arm_cortex_a5_tune =
1942 {
1943   arm_9e_rtx_costs,
1944   &cortexa5_extra_costs,
1945   NULL,                                         /* Sched adj cost.  */
1946   1,                                            /* Constant limit.  */
1947   1,                                            /* Max cond insns.  */
1948   ARM_PREFETCH_NOT_BENEFICIAL,
1949   false,                                        /* Prefer constant pool.  */
1950   arm_cortex_a5_branch_cost,
1951   false,                                        /* Prefer LDRD/STRD.  */
1952   {false, false},                               /* Prefer non short circuit.  */
1953   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1954   false,                                        /* Prefer Neon for 64-bits bitops.  */
1955   false, false,                                 /* Prefer 32-bit encodings.  */
1956   true,                                         /* Prefer Neon for stringops.  */
1957   8                                             /* Maximum insns to inline memset.  */
1958 };
1959
1960 const struct tune_params arm_cortex_a9_tune =
1961 {
1962   arm_9e_rtx_costs,
1963   &cortexa9_extra_costs,
1964   cortex_a9_sched_adjust_cost,
1965   1,                                            /* Constant limit.  */
1966   5,                                            /* Max cond insns.  */
1967   ARM_PREFETCH_BENEFICIAL(4,32,32),
1968   false,                                        /* Prefer constant pool.  */
1969   arm_default_branch_cost,
1970   false,                                        /* Prefer LDRD/STRD.  */
1971   {true, true},                                 /* Prefer non short circuit.  */
1972   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1973   false,                                        /* Prefer Neon for 64-bits bitops.  */
1974   false, false,                                 /* Prefer 32-bit encodings.  */
1975   false,                                        /* Prefer Neon for stringops.  */
1976   8                                             /* Maximum insns to inline memset.  */
1977 };
1978
1979 const struct tune_params arm_cortex_a12_tune =
1980 {
1981   arm_9e_rtx_costs,
1982   &cortexa12_extra_costs,
1983   NULL,
1984   1,                                            /* Constant limit.  */
1985   5,                                            /* Max cond insns.  */
1986   ARM_PREFETCH_BENEFICIAL(4,32,32),
1987   false,                                        /* Prefer constant pool.  */
1988   arm_default_branch_cost,
1989   true,                                         /* Prefer LDRD/STRD.  */
1990   {true, true},                                 /* Prefer non short circuit.  */
1991   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1992   false,                                        /* Prefer Neon for 64-bits bitops.  */
1993   false, false,                                 /* Prefer 32-bit encodings.  */
1994   true,                                         /* Prefer Neon for stringops.  */
1995   8                                             /* Maximum insns to inline memset.  */
1996 };
1997
1998 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
1999    cycle to execute each.  An LDR from the constant pool also takes two cycles
2000    to execute, but mildly increases pipelining opportunity (consecutive
2001    loads/stores can be pipelined together, saving one cycle), and may also
2002    improve icache utilisation.  Hence we prefer the constant pool for such
2003    processors.  */
2004
2005 const struct tune_params arm_v7m_tune =
2006 {
2007   arm_9e_rtx_costs,
2008   &v7m_extra_costs,
2009   NULL,                                         /* Sched adj cost.  */
2010   1,                                            /* Constant limit.  */
2011   2,                                            /* Max cond insns.  */
2012   ARM_PREFETCH_NOT_BENEFICIAL,
2013   true,                                         /* Prefer constant pool.  */
2014   arm_cortex_m_branch_cost,
2015   false,                                        /* Prefer LDRD/STRD.  */
2016   {false, false},                               /* Prefer non short circuit.  */
2017   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2018   false,                                        /* Prefer Neon for 64-bits bitops.  */
2019   false, false,                                 /* Prefer 32-bit encodings.  */
2020   false,                                        /* Prefer Neon for stringops.  */
2021   8                                             /* Maximum insns to inline memset.  */
2022 };
2023
2024 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2025    arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
2026 const struct tune_params arm_v6m_tune =
2027 {
2028   arm_9e_rtx_costs,
2029   NULL,
2030   NULL,                                         /* Sched adj cost.  */
2031   1,                                            /* Constant limit.  */
2032   5,                                            /* Max cond insns.  */
2033   ARM_PREFETCH_NOT_BENEFICIAL,
2034   false,                                        /* Prefer constant pool.  */
2035   arm_default_branch_cost,
2036   false,                                        /* Prefer LDRD/STRD.  */
2037   {false, false},                               /* Prefer non short circuit.  */
2038   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2039   false,                                        /* Prefer Neon for 64-bits bitops.  */
2040   false, false,                                 /* Prefer 32-bit encodings.  */
2041   false,                                        /* Prefer Neon for stringops.  */
2042   8                                             /* Maximum insns to inline memset.  */
2043 };
2044
2045 const struct tune_params arm_fa726te_tune =
2046 {
2047   arm_9e_rtx_costs,
2048   NULL,
2049   fa726te_sched_adjust_cost,
2050   1,                                            /* Constant limit.  */
2051   5,                                            /* Max cond insns.  */
2052   ARM_PREFETCH_NOT_BENEFICIAL,
2053   true,                                         /* Prefer constant pool.  */
2054   arm_default_branch_cost,
2055   false,                                        /* Prefer LDRD/STRD.  */
2056   {true, true},                                 /* Prefer non short circuit.  */
2057   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2058   false,                                        /* Prefer Neon for 64-bits bitops.  */
2059   false, false,                                 /* Prefer 32-bit encodings.  */
2060   false,                                        /* Prefer Neon for stringops.  */
2061   8                                             /* Maximum insns to inline memset.  */
2062 };
2063
2064
2065 /* Not all of these give usefully different compilation alternatives,
2066    but there is no simple way of generalizing them.  */
2067 static const struct processors all_cores[] =
2068 {
2069   /* ARM Cores */
2070 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2071   {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH,          \
2072     FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2073 #include "arm-cores.def"
2074 #undef ARM_CORE
2075   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2076 };
2077
2078 static const struct processors all_architectures[] =
2079 {
2080   /* ARM Architectures */
2081   /* We don't specify tuning costs here as it will be figured out
2082      from the core.  */
2083
2084 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2085   {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2086 #include "arm-arches.def"
2087 #undef ARM_ARCH
2088   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2089 };
2090
2091
2092 /* These are populated as commandline arguments are processed, or NULL
2093    if not specified.  */
2094 static const struct processors *arm_selected_arch;
2095 static const struct processors *arm_selected_cpu;
2096 static const struct processors *arm_selected_tune;
2097
2098 /* The name of the preprocessor macro to define for this architecture.  */
2099
2100 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2101
2102 /* Available values for -mfpu=.  */
2103
2104 static const struct arm_fpu_desc all_fpus[] =
2105 {
2106 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2107   { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2108 #include "arm-fpus.def"
2109 #undef ARM_FPU
2110 };
2111
2112
2113 /* Supported TLS relocations.  */
2114
2115 enum tls_reloc {
2116   TLS_GD32,
2117   TLS_LDM32,
2118   TLS_LDO32,
2119   TLS_IE32,
2120   TLS_LE32,
2121   TLS_DESCSEQ   /* GNU scheme */
2122 };
2123
2124 /* The maximum number of insns to be used when loading a constant.  */
2125 inline static int
2126 arm_constant_limit (bool size_p)
2127 {
2128   return size_p ? 1 : current_tune->constant_limit;
2129 }
2130
2131 /* Emit an insn that's a simple single-set.  Both the operands must be known
2132    to be valid.  */
2133 inline static rtx_insn *
2134 emit_set_insn (rtx x, rtx y)
2135 {
2136   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2137 }
2138
2139 /* Return the number of bits set in VALUE.  */
2140 static unsigned
2141 bit_count (unsigned long value)
2142 {
2143   unsigned long count = 0;
2144
2145   while (value)
2146     {
2147       count++;
2148       value &= value - 1;  /* Clear the least-significant set bit.  */
2149     }
2150
2151   return count;
2152 }
2153
2154 typedef struct
2155 {
2156   machine_mode mode;
2157   const char *name;
2158 } arm_fixed_mode_set;
2159
2160 /* A small helper for setting fixed-point library libfuncs.  */
2161
2162 static void
2163 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2164                              const char *funcname, const char *modename,
2165                              int num_suffix)
2166 {
2167   char buffer[50];
2168
2169   if (num_suffix == 0)
2170     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2171   else
2172     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2173
2174   set_optab_libfunc (optable, mode, buffer);
2175 }
2176
2177 static void
2178 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2179                             machine_mode from, const char *funcname,
2180                             const char *toname, const char *fromname)
2181 {
2182   char buffer[50];
2183   const char *maybe_suffix_2 = "";
2184
2185   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2186   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2187       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2188       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2189     maybe_suffix_2 = "2";
2190
2191   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2192            maybe_suffix_2);
2193
2194   set_conv_libfunc (optable, to, from, buffer);
2195 }
2196
2197 /* Set up library functions unique to ARM.  */
2198
2199 static void
2200 arm_init_libfuncs (void)
2201 {
2202   /* For Linux, we have access to kernel support for atomic operations.  */
2203   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2204     init_sync_libfuncs (2 * UNITS_PER_WORD);
2205
2206   /* There are no special library functions unless we are using the
2207      ARM BPABI.  */
2208   if (!TARGET_BPABI)
2209     return;
2210
2211   /* The functions below are described in Section 4 of the "Run-Time
2212      ABI for the ARM architecture", Version 1.0.  */
2213
2214   /* Double-precision floating-point arithmetic.  Table 2.  */
2215   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2216   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2217   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2218   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2219   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2220
2221   /* Double-precision comparisons.  Table 3.  */
2222   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2223   set_optab_libfunc (ne_optab, DFmode, NULL);
2224   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2225   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2226   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2227   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2228   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2229
2230   /* Single-precision floating-point arithmetic.  Table 4.  */
2231   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2232   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2233   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2234   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2235   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2236
2237   /* Single-precision comparisons.  Table 5.  */
2238   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2239   set_optab_libfunc (ne_optab, SFmode, NULL);
2240   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2241   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2242   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2243   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2244   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2245
2246   /* Floating-point to integer conversions.  Table 6.  */
2247   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2248   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2249   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2250   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2251   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2252   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2253   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2254   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2255
2256   /* Conversions between floating types.  Table 7.  */
2257   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2258   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2259
2260   /* Integer to floating-point conversions.  Table 8.  */
2261   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2262   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2263   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2264   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2265   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2266   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2267   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2268   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2269
2270   /* Long long.  Table 9.  */
2271   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2272   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2273   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2274   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2275   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2276   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2277   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2278   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2279
2280   /* Integer (32/32->32) division.  \S 4.3.1.  */
2281   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2282   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2283
2284   /* The divmod functions are designed so that they can be used for
2285      plain division, even though they return both the quotient and the
2286      remainder.  The quotient is returned in the usual location (i.e.,
2287      r0 for SImode, {r0, r1} for DImode), just as would be expected
2288      for an ordinary division routine.  Because the AAPCS calling
2289      conventions specify that all of { r0, r1, r2, r3 } are
2290      callee-saved registers, there is no need to tell the compiler
2291      explicitly that those registers are clobbered by these
2292      routines.  */
2293   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2294   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2295
2296   /* For SImode division the ABI provides div-without-mod routines,
2297      which are faster.  */
2298   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2299   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2300
2301   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2302      divmod libcalls instead.  */
2303   set_optab_libfunc (smod_optab, DImode, NULL);
2304   set_optab_libfunc (umod_optab, DImode, NULL);
2305   set_optab_libfunc (smod_optab, SImode, NULL);
2306   set_optab_libfunc (umod_optab, SImode, NULL);
2307
2308   /* Half-precision float operations.  The compiler handles all operations
2309      with NULL libfuncs by converting the SFmode.  */
2310   switch (arm_fp16_format)
2311     {
2312     case ARM_FP16_FORMAT_IEEE:
2313     case ARM_FP16_FORMAT_ALTERNATIVE:
2314
2315       /* Conversions.  */
2316       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2317                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2318                          ? "__gnu_f2h_ieee"
2319                          : "__gnu_f2h_alternative"));
2320       set_conv_libfunc (sext_optab, SFmode, HFmode,
2321                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2322                          ? "__gnu_h2f_ieee"
2323                          : "__gnu_h2f_alternative"));
2324
2325       /* Arithmetic.  */
2326       set_optab_libfunc (add_optab, HFmode, NULL);
2327       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2328       set_optab_libfunc (smul_optab, HFmode, NULL);
2329       set_optab_libfunc (neg_optab, HFmode, NULL);
2330       set_optab_libfunc (sub_optab, HFmode, NULL);
2331
2332       /* Comparisons.  */
2333       set_optab_libfunc (eq_optab, HFmode, NULL);
2334       set_optab_libfunc (ne_optab, HFmode, NULL);
2335       set_optab_libfunc (lt_optab, HFmode, NULL);
2336       set_optab_libfunc (le_optab, HFmode, NULL);
2337       set_optab_libfunc (ge_optab, HFmode, NULL);
2338       set_optab_libfunc (gt_optab, HFmode, NULL);
2339       set_optab_libfunc (unord_optab, HFmode, NULL);
2340       break;
2341
2342     default:
2343       break;
2344     }
2345
2346   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2347   {
2348     const arm_fixed_mode_set fixed_arith_modes[] =
2349       {
2350         { QQmode, "qq" },
2351         { UQQmode, "uqq" },
2352         { HQmode, "hq" },
2353         { UHQmode, "uhq" },
2354         { SQmode, "sq" },
2355         { USQmode, "usq" },
2356         { DQmode, "dq" },
2357         { UDQmode, "udq" },
2358         { TQmode, "tq" },
2359         { UTQmode, "utq" },
2360         { HAmode, "ha" },
2361         { UHAmode, "uha" },
2362         { SAmode, "sa" },
2363         { USAmode, "usa" },
2364         { DAmode, "da" },
2365         { UDAmode, "uda" },
2366         { TAmode, "ta" },
2367         { UTAmode, "uta" }
2368       };
2369     const arm_fixed_mode_set fixed_conv_modes[] =
2370       {
2371         { QQmode, "qq" },
2372         { UQQmode, "uqq" },
2373         { HQmode, "hq" },
2374         { UHQmode, "uhq" },
2375         { SQmode, "sq" },
2376         { USQmode, "usq" },
2377         { DQmode, "dq" },
2378         { UDQmode, "udq" },
2379         { TQmode, "tq" },
2380         { UTQmode, "utq" },
2381         { HAmode, "ha" },
2382         { UHAmode, "uha" },
2383         { SAmode, "sa" },
2384         { USAmode, "usa" },
2385         { DAmode, "da" },
2386         { UDAmode, "uda" },
2387         { TAmode, "ta" },
2388         { UTAmode, "uta" },
2389         { QImode, "qi" },
2390         { HImode, "hi" },
2391         { SImode, "si" },
2392         { DImode, "di" },
2393         { TImode, "ti" },
2394         { SFmode, "sf" },
2395         { DFmode, "df" }
2396       };
2397     unsigned int i, j;
2398
2399     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2400       {
2401         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2402                                      "add", fixed_arith_modes[i].name, 3);
2403         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2404                                      "ssadd", fixed_arith_modes[i].name, 3);
2405         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2406                                      "usadd", fixed_arith_modes[i].name, 3);
2407         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2408                                      "sub", fixed_arith_modes[i].name, 3);
2409         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2410                                      "sssub", fixed_arith_modes[i].name, 3);
2411         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2412                                      "ussub", fixed_arith_modes[i].name, 3);
2413         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2414                                      "mul", fixed_arith_modes[i].name, 3);
2415         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2416                                      "ssmul", fixed_arith_modes[i].name, 3);
2417         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2418                                      "usmul", fixed_arith_modes[i].name, 3);
2419         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2420                                      "div", fixed_arith_modes[i].name, 3);
2421         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2422                                      "udiv", fixed_arith_modes[i].name, 3);
2423         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2424                                      "ssdiv", fixed_arith_modes[i].name, 3);
2425         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2426                                      "usdiv", fixed_arith_modes[i].name, 3);
2427         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2428                                      "neg", fixed_arith_modes[i].name, 2);
2429         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2430                                      "ssneg", fixed_arith_modes[i].name, 2);
2431         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2432                                      "usneg", fixed_arith_modes[i].name, 2);
2433         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2434                                      "ashl", fixed_arith_modes[i].name, 3);
2435         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2436                                      "ashr", fixed_arith_modes[i].name, 3);
2437         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2438                                      "lshr", fixed_arith_modes[i].name, 3);
2439         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2440                                      "ssashl", fixed_arith_modes[i].name, 3);
2441         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2442                                      "usashl", fixed_arith_modes[i].name, 3);
2443         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2444                                      "cmp", fixed_arith_modes[i].name, 2);
2445       }
2446
2447     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2448       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2449         {
2450           if (i == j
2451               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2452                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2453             continue;
2454
2455           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2456                                       fixed_conv_modes[j].mode, "fract",
2457                                       fixed_conv_modes[i].name,
2458                                       fixed_conv_modes[j].name);
2459           arm_set_fixed_conv_libfunc (satfract_optab,
2460                                       fixed_conv_modes[i].mode,
2461                                       fixed_conv_modes[j].mode, "satfract",
2462                                       fixed_conv_modes[i].name,
2463                                       fixed_conv_modes[j].name);
2464           arm_set_fixed_conv_libfunc (fractuns_optab,
2465                                       fixed_conv_modes[i].mode,
2466                                       fixed_conv_modes[j].mode, "fractuns",
2467                                       fixed_conv_modes[i].name,
2468                                       fixed_conv_modes[j].name);
2469           arm_set_fixed_conv_libfunc (satfractuns_optab,
2470                                       fixed_conv_modes[i].mode,
2471                                       fixed_conv_modes[j].mode, "satfractuns",
2472                                       fixed_conv_modes[i].name,
2473                                       fixed_conv_modes[j].name);
2474         }
2475   }
2476
2477   if (TARGET_AAPCS_BASED)
2478     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2479 }
2480
2481 /* On AAPCS systems, this is the "struct __va_list".  */
2482 static GTY(()) tree va_list_type;
2483
2484 /* Return the type to use as __builtin_va_list.  */
2485 static tree
2486 arm_build_builtin_va_list (void)
2487 {
2488   tree va_list_name;
2489   tree ap_field;
2490
2491   if (!TARGET_AAPCS_BASED)
2492     return std_build_builtin_va_list ();
2493
2494   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2495      defined as:
2496
2497        struct __va_list
2498        {
2499          void *__ap;
2500        };
2501
2502      The C Library ABI further reinforces this definition in \S
2503      4.1.
2504
2505      We must follow this definition exactly.  The structure tag
2506      name is visible in C++ mangled names, and thus forms a part
2507      of the ABI.  The field name may be used by people who
2508      #include <stdarg.h>.  */
2509   /* Create the type.  */
2510   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2511   /* Give it the required name.  */
2512   va_list_name = build_decl (BUILTINS_LOCATION,
2513                              TYPE_DECL,
2514                              get_identifier ("__va_list"),
2515                              va_list_type);
2516   DECL_ARTIFICIAL (va_list_name) = 1;
2517   TYPE_NAME (va_list_type) = va_list_name;
2518   TYPE_STUB_DECL (va_list_type) = va_list_name;
2519   /* Create the __ap field.  */
2520   ap_field = build_decl (BUILTINS_LOCATION,
2521                          FIELD_DECL,
2522                          get_identifier ("__ap"),
2523                          ptr_type_node);
2524   DECL_ARTIFICIAL (ap_field) = 1;
2525   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2526   TYPE_FIELDS (va_list_type) = ap_field;
2527   /* Compute its layout.  */
2528   layout_type (va_list_type);
2529
2530   return va_list_type;
2531 }
2532
2533 /* Return an expression of type "void *" pointing to the next
2534    available argument in a variable-argument list.  VALIST is the
2535    user-level va_list object, of type __builtin_va_list.  */
2536 static tree
2537 arm_extract_valist_ptr (tree valist)
2538 {
2539   if (TREE_TYPE (valist) == error_mark_node)
2540     return error_mark_node;
2541
2542   /* On an AAPCS target, the pointer is stored within "struct
2543      va_list".  */
2544   if (TARGET_AAPCS_BASED)
2545     {
2546       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2547       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2548                        valist, ap_field, NULL_TREE);
2549     }
2550
2551   return valist;
2552 }
2553
2554 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2555 static void
2556 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2557 {
2558   valist = arm_extract_valist_ptr (valist);
2559   std_expand_builtin_va_start (valist, nextarg);
2560 }
2561
2562 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2563 static tree
2564 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2565                           gimple_seq *post_p)
2566 {
2567   valist = arm_extract_valist_ptr (valist);
2568   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2569 }
2570
2571 /* Fix up any incompatible options that the user has specified.  */
2572 static void
2573 arm_option_override (void)
2574 {
2575   if (global_options_set.x_arm_arch_option)
2576     arm_selected_arch = &all_architectures[arm_arch_option];
2577
2578   if (global_options_set.x_arm_cpu_option)
2579     {
2580       arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2581       arm_selected_tune = &all_cores[(int) arm_cpu_option];
2582     }
2583
2584   if (global_options_set.x_arm_tune_option)
2585     arm_selected_tune = &all_cores[(int) arm_tune_option];
2586
2587 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2588   SUBTARGET_OVERRIDE_OPTIONS;
2589 #endif
2590
2591   if (arm_selected_arch)
2592     {
2593       if (arm_selected_cpu)
2594         {
2595           /* Check for conflict between mcpu and march.  */
2596           if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2597             {
2598               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2599                        arm_selected_cpu->name, arm_selected_arch->name);
2600               /* -march wins for code generation.
2601                  -mcpu wins for default tuning.  */
2602               if (!arm_selected_tune)
2603                 arm_selected_tune = arm_selected_cpu;
2604
2605               arm_selected_cpu = arm_selected_arch;
2606             }
2607           else
2608             /* -mcpu wins.  */
2609             arm_selected_arch = NULL;
2610         }
2611       else
2612         /* Pick a CPU based on the architecture.  */
2613         arm_selected_cpu = arm_selected_arch;
2614     }
2615
2616   /* If the user did not specify a processor, choose one for them.  */
2617   if (!arm_selected_cpu)
2618     {
2619       const struct processors * sel;
2620       unsigned int        sought;
2621
2622       arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2623       if (!arm_selected_cpu->name)
2624         {
2625 #ifdef SUBTARGET_CPU_DEFAULT
2626           /* Use the subtarget default CPU if none was specified by
2627              configure.  */
2628           arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2629 #endif
2630           /* Default to ARM6.  */
2631           if (!arm_selected_cpu->name)
2632             arm_selected_cpu = &all_cores[arm6];
2633         }
2634
2635       sel = arm_selected_cpu;
2636       insn_flags = sel->flags;
2637
2638       /* Now check to see if the user has specified some command line
2639          switch that require certain abilities from the cpu.  */
2640       sought = 0;
2641
2642       if (TARGET_INTERWORK || TARGET_THUMB)
2643         {
2644           sought |= (FL_THUMB | FL_MODE32);
2645
2646           /* There are no ARM processors that support both APCS-26 and
2647              interworking.  Therefore we force FL_MODE26 to be removed
2648              from insn_flags here (if it was set), so that the search
2649              below will always be able to find a compatible processor.  */
2650           insn_flags &= ~FL_MODE26;
2651         }
2652
2653       if (sought != 0 && ((sought & insn_flags) != sought))
2654         {
2655           /* Try to locate a CPU type that supports all of the abilities
2656              of the default CPU, plus the extra abilities requested by
2657              the user.  */
2658           for (sel = all_cores; sel->name != NULL; sel++)
2659             if ((sel->flags & sought) == (sought | insn_flags))
2660               break;
2661
2662           if (sel->name == NULL)
2663             {
2664               unsigned current_bit_count = 0;
2665               const struct processors * best_fit = NULL;
2666
2667               /* Ideally we would like to issue an error message here
2668                  saying that it was not possible to find a CPU compatible
2669                  with the default CPU, but which also supports the command
2670                  line options specified by the programmer, and so they
2671                  ought to use the -mcpu=<name> command line option to
2672                  override the default CPU type.
2673
2674                  If we cannot find a cpu that has both the
2675                  characteristics of the default cpu and the given
2676                  command line options we scan the array again looking
2677                  for a best match.  */
2678               for (sel = all_cores; sel->name != NULL; sel++)
2679                 if ((sel->flags & sought) == sought)
2680                   {
2681                     unsigned count;
2682
2683                     count = bit_count (sel->flags & insn_flags);
2684
2685                     if (count >= current_bit_count)
2686                       {
2687                         best_fit = sel;
2688                         current_bit_count = count;
2689                       }
2690                   }
2691
2692               gcc_assert (best_fit);
2693               sel = best_fit;
2694             }
2695
2696           arm_selected_cpu = sel;
2697         }
2698     }
2699
2700   gcc_assert (arm_selected_cpu);
2701   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
2702   if (!arm_selected_tune)
2703     arm_selected_tune = &all_cores[arm_selected_cpu->core];
2704
2705   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2706   insn_flags = arm_selected_cpu->flags;
2707   arm_base_arch = arm_selected_cpu->base_arch;
2708
2709   arm_tune = arm_selected_tune->core;
2710   tune_flags = arm_selected_tune->flags;
2711   current_tune = arm_selected_tune->tune;
2712
2713   /* Make sure that the processor choice does not conflict with any of the
2714      other command line choices.  */
2715   if (TARGET_ARM && !(insn_flags & FL_NOTM))
2716     error ("target CPU does not support ARM mode");
2717
2718   /* BPABI targets use linker tricks to allow interworking on cores
2719      without thumb support.  */
2720   if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2721     {
2722       warning (0, "target CPU does not support interworking" );
2723       target_flags &= ~MASK_INTERWORK;
2724     }
2725
2726   if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2727     {
2728       warning (0, "target CPU does not support THUMB instructions");
2729       target_flags &= ~MASK_THUMB;
2730     }
2731
2732   if (TARGET_APCS_FRAME && TARGET_THUMB)
2733     {
2734       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2735       target_flags &= ~MASK_APCS_FRAME;
2736     }
2737
2738   /* Callee super interworking implies thumb interworking.  Adding
2739      this to the flags here simplifies the logic elsewhere.  */
2740   if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2741     target_flags |= MASK_INTERWORK;
2742
2743   /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2744      from here where no function is being compiled currently.  */
2745   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2746     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2747
2748   if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2749     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2750
2751   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2752     {
2753       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2754       target_flags |= MASK_APCS_FRAME;
2755     }
2756
2757   if (TARGET_POKE_FUNCTION_NAME)
2758     target_flags |= MASK_APCS_FRAME;
2759
2760   if (TARGET_APCS_REENT && flag_pic)
2761     error ("-fpic and -mapcs-reent are incompatible");
2762
2763   if (TARGET_APCS_REENT)
2764     warning (0, "APCS reentrant code not supported.  Ignored");
2765
2766   /* If this target is normally configured to use APCS frames, warn if they
2767      are turned off and debugging is turned on.  */
2768   if (TARGET_ARM
2769       && write_symbols != NO_DEBUG
2770       && !TARGET_APCS_FRAME
2771       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2772     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2773
2774   if (TARGET_APCS_FLOAT)
2775     warning (0, "passing floating point arguments in fp regs not yet supported");
2776
2777   /* Initialize boolean versions of the flags, for use in the arm.md file.  */
2778   arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2779   arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2780   arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2781   arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2782   arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2783   arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2784   arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2785   arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2786   arm_arch6m = arm_arch6 && !arm_arch_notm;
2787   arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2788   arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2789   arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2790   arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2791   arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2792
2793   arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2794   arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2795   thumb_code = TARGET_ARM == 0;
2796   thumb1_code = TARGET_THUMB1 != 0;
2797   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2798   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2799   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2800   arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2801   arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2802   arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2803   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2804   arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2805   if (arm_restrict_it == 2)
2806     arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2807
2808   if (!TARGET_THUMB2)
2809     arm_restrict_it = 0;
2810
2811   /* If we are not using the default (ARM mode) section anchor offset
2812      ranges, then set the correct ranges now.  */
2813   if (TARGET_THUMB1)
2814     {
2815       /* Thumb-1 LDR instructions cannot have negative offsets.
2816          Permissible positive offset ranges are 5-bit (for byte loads),
2817          6-bit (for halfword loads), or 7-bit (for word loads).
2818          Empirical results suggest a 7-bit anchor range gives the best
2819          overall code size.  */
2820       targetm.min_anchor_offset = 0;
2821       targetm.max_anchor_offset = 127;
2822     }
2823   else if (TARGET_THUMB2)
2824     {
2825       /* The minimum is set such that the total size of the block
2826          for a particular anchor is 248 + 1 + 4095 bytes, which is
2827          divisible by eight, ensuring natural spacing of anchors.  */
2828       targetm.min_anchor_offset = -248;
2829       targetm.max_anchor_offset = 4095;
2830     }
2831
2832   /* V5 code we generate is completely interworking capable, so we turn off
2833      TARGET_INTERWORK here to avoid many tests later on.  */
2834
2835   /* XXX However, we must pass the right pre-processor defines to CPP
2836      or GLD can get confused.  This is a hack.  */
2837   if (TARGET_INTERWORK)
2838     arm_cpp_interwork = 1;
2839
2840   if (arm_arch5)
2841     target_flags &= ~MASK_INTERWORK;
2842
2843   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2844     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2845
2846   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2847     error ("iwmmxt abi requires an iwmmxt capable cpu");
2848
2849   if (!global_options_set.x_arm_fpu_index)
2850     {
2851       const char *target_fpu_name;
2852       bool ok;
2853
2854 #ifdef FPUTYPE_DEFAULT
2855       target_fpu_name = FPUTYPE_DEFAULT;
2856 #else
2857       target_fpu_name = "vfp";
2858 #endif
2859
2860       ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2861                                   CL_TARGET);
2862       gcc_assert (ok);
2863     }
2864
2865   arm_fpu_desc = &all_fpus[arm_fpu_index];
2866
2867   if (TARGET_NEON && !arm_arch7)
2868     error ("target CPU does not support NEON");
2869
2870   switch (arm_fpu_desc->model)
2871     {
2872     case ARM_FP_MODEL_VFP:
2873       arm_fpu_attr = FPU_VFP;
2874       break;
2875
2876     default:
2877       gcc_unreachable();
2878     }
2879
2880   if (TARGET_AAPCS_BASED)
2881     {
2882       if (TARGET_CALLER_INTERWORKING)
2883         error ("AAPCS does not support -mcaller-super-interworking");
2884       else
2885         if (TARGET_CALLEE_INTERWORKING)
2886           error ("AAPCS does not support -mcallee-super-interworking");
2887     }
2888
2889   /* iWMMXt and NEON are incompatible.  */
2890   if (TARGET_IWMMXT && TARGET_NEON)
2891     error ("iWMMXt and NEON are incompatible");
2892
2893   /* iWMMXt unsupported under Thumb mode.  */
2894   if (TARGET_THUMB && TARGET_IWMMXT)
2895     error ("iWMMXt unsupported under Thumb mode");
2896
2897   /* __fp16 support currently assumes the core has ldrh.  */
2898   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2899     sorry ("__fp16 and no ldrh");
2900
2901   /* If soft-float is specified then don't use FPU.  */
2902   if (TARGET_SOFT_FLOAT)
2903     arm_fpu_attr = FPU_NONE;
2904
2905   if (TARGET_AAPCS_BASED)
2906     {
2907       if (arm_abi == ARM_ABI_IWMMXT)
2908         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2909       else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2910                && TARGET_HARD_FLOAT
2911                && TARGET_VFP)
2912         arm_pcs_default = ARM_PCS_AAPCS_VFP;
2913       else
2914         arm_pcs_default = ARM_PCS_AAPCS;
2915     }
2916   else
2917     {
2918       if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2919         sorry ("-mfloat-abi=hard and VFP");
2920
2921       if (arm_abi == ARM_ABI_APCS)
2922         arm_pcs_default = ARM_PCS_APCS;
2923       else
2924         arm_pcs_default = ARM_PCS_ATPCS;
2925     }
2926
2927   /* For arm2/3 there is no need to do any scheduling if we are doing
2928      software floating-point.  */
2929   if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2930     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2931
2932   /* Use the cp15 method if it is available.  */
2933   if (target_thread_pointer == TP_AUTO)
2934     {
2935       if (arm_arch6k && !TARGET_THUMB1)
2936         target_thread_pointer = TP_CP15;
2937       else
2938         target_thread_pointer = TP_SOFT;
2939     }
2940
2941   if (TARGET_HARD_TP && TARGET_THUMB1)
2942     error ("can not use -mtp=cp15 with 16-bit Thumb");
2943
2944   /* Override the default structure alignment for AAPCS ABI.  */
2945   if (!global_options_set.x_arm_structure_size_boundary)
2946     {
2947       if (TARGET_AAPCS_BASED)
2948         arm_structure_size_boundary = 8;
2949     }
2950   else
2951     {
2952       if (arm_structure_size_boundary != 8
2953           && arm_structure_size_boundary != 32
2954           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2955         {
2956           if (ARM_DOUBLEWORD_ALIGN)
2957             warning (0,
2958                      "structure size boundary can only be set to 8, 32 or 64");
2959           else
2960             warning (0, "structure size boundary can only be set to 8 or 32");
2961           arm_structure_size_boundary
2962             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2963         }
2964     }
2965
2966   if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2967     {
2968       error ("RTP PIC is incompatible with Thumb");
2969       flag_pic = 0;
2970     }
2971
2972   /* If stack checking is disabled, we can use r10 as the PIC register,
2973      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
2974   if (flag_pic && TARGET_SINGLE_PIC_BASE)
2975     {
2976       if (TARGET_VXWORKS_RTP)
2977         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2978       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2979     }
2980
2981   if (flag_pic && TARGET_VXWORKS_RTP)
2982     arm_pic_register = 9;
2983
2984   if (arm_pic_register_string != NULL)
2985     {
2986       int pic_register = decode_reg_name (arm_pic_register_string);
2987
2988       if (!flag_pic)
2989         warning (0, "-mpic-register= is useless without -fpic");
2990
2991       /* Prevent the user from choosing an obviously stupid PIC register.  */
2992       else if (pic_register < 0 || call_used_regs[pic_register]
2993                || pic_register == HARD_FRAME_POINTER_REGNUM
2994                || pic_register == STACK_POINTER_REGNUM
2995                || pic_register >= PC_REGNUM
2996                || (TARGET_VXWORKS_RTP
2997                    && (unsigned int) pic_register != arm_pic_register))
2998         error ("unable to use '%s' for PIC register", arm_pic_register_string);
2999       else
3000         arm_pic_register = pic_register;
3001     }
3002
3003   if (TARGET_VXWORKS_RTP
3004       && !global_options_set.x_arm_pic_data_is_text_relative)
3005     arm_pic_data_is_text_relative = 0;
3006
3007   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3008   if (fix_cm3_ldrd == 2)
3009     {
3010       if (arm_selected_cpu->core == cortexm3)
3011         fix_cm3_ldrd = 1;
3012       else
3013         fix_cm3_ldrd = 0;
3014     }
3015
3016   /* Enable -munaligned-access by default for
3017      - all ARMv6 architecture-based processors
3018      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3019      - ARMv8 architecture-base processors.
3020
3021      Disable -munaligned-access by default for
3022      - all pre-ARMv6 architecture-based processors
3023      - ARMv6-M architecture-based processors.  */
3024
3025   if (unaligned_access == 2)
3026     {
3027       if (arm_arch6 && (arm_arch_notm || arm_arch7))
3028         unaligned_access = 1;
3029       else
3030         unaligned_access = 0;
3031     }
3032   else if (unaligned_access == 1
3033            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3034     {
3035       warning (0, "target CPU does not support unaligned accesses");
3036       unaligned_access = 0;
3037     }
3038
3039   if (TARGET_THUMB1 && flag_schedule_insns)
3040     {
3041       /* Don't warn since it's on by default in -O2.  */
3042       flag_schedule_insns = 0;
3043     }
3044
3045   if (optimize_size)
3046     {
3047       /* If optimizing for size, bump the number of instructions that we
3048          are prepared to conditionally execute (even on a StrongARM).  */
3049       max_insns_skipped = 6;
3050
3051       /* For THUMB2, we limit the conditional sequence to one IT block.  */
3052       if (TARGET_THUMB2)
3053         max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3054     }
3055   else
3056     max_insns_skipped = current_tune->max_insns_skipped;
3057
3058   /* Hot/Cold partitioning is not currently supported, since we can't
3059      handle literal pool placement in that case.  */
3060   if (flag_reorder_blocks_and_partition)
3061     {
3062       inform (input_location,
3063               "-freorder-blocks-and-partition not supported on this architecture");
3064       flag_reorder_blocks_and_partition = 0;
3065       flag_reorder_blocks = 1;
3066     }
3067
3068   if (flag_pic)
3069     /* Hoisting PIC address calculations more aggressively provides a small,
3070        but measurable, size reduction for PIC code.  Therefore, we decrease
3071        the bar for unrestricted expression hoisting to the cost of PIC address
3072        calculation, which is 2 instructions.  */
3073     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3074                            global_options.x_param_values,
3075                            global_options_set.x_param_values);
3076
3077   /* ARM EABI defaults to strict volatile bitfields.  */
3078   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3079       && abi_version_at_least(2))
3080     flag_strict_volatile_bitfields = 1;
3081
3082   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3083      it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
3084   if (flag_prefetch_loop_arrays < 0
3085       && HAVE_prefetch
3086       && optimize >= 3
3087       && current_tune->num_prefetch_slots > 0)
3088     flag_prefetch_loop_arrays = 1;
3089
3090   /* Set up parameters to be used in prefetching algorithm.  Do not override the
3091      defaults unless we are tuning for a core we have researched values for.  */
3092   if (current_tune->num_prefetch_slots > 0)
3093     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3094                            current_tune->num_prefetch_slots,
3095                            global_options.x_param_values,
3096                            global_options_set.x_param_values);
3097   if (current_tune->l1_cache_line_size >= 0)
3098     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3099                            current_tune->l1_cache_line_size,
3100                            global_options.x_param_values,
3101                            global_options_set.x_param_values);
3102   if (current_tune->l1_cache_size >= 0)
3103     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3104                            current_tune->l1_cache_size,
3105                            global_options.x_param_values,
3106                            global_options_set.x_param_values);
3107
3108   /* Use Neon to perform 64-bits operations rather than core
3109      registers.  */
3110   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3111   if (use_neon_for_64bits == 1)
3112      prefer_neon_for_64bits = true;
3113
3114   /* Use the alternative scheduling-pressure algorithm by default.  */
3115   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3116                          global_options.x_param_values,
3117                          global_options_set.x_param_values);
3118
3119   /* Disable shrink-wrap when optimizing function for size, since it tends to
3120      generate additional returns.  */
3121   if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3122     flag_shrink_wrap = false;
3123   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3124   if (TARGET_APCS_FRAME)
3125     flag_shrink_wrap = false;
3126
3127   /* We only support -mslow-flash-data on armv7-m targets.  */
3128   if (target_slow_flash_data
3129       && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3130           || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3131     error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3132
3133   /* Currently, for slow flash data, we just disable literal pools.  */
3134   if (target_slow_flash_data)
3135     arm_disable_literal_pool = true;
3136
3137   /* Register global variables with the garbage collector.  */
3138   arm_add_gc_roots ();
3139 }
3140
3141 static void
3142 arm_add_gc_roots (void)
3143 {
3144   gcc_obstack_init(&minipool_obstack);
3145   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3146 }
3147 \f
3148 /* A table of known ARM exception types.
3149    For use with the interrupt function attribute.  */
3150
3151 typedef struct
3152 {
3153   const char *const arg;
3154   const unsigned long return_value;
3155 }
3156 isr_attribute_arg;
3157
3158 static const isr_attribute_arg isr_attribute_args [] =
3159 {
3160   { "IRQ",   ARM_FT_ISR },
3161   { "irq",   ARM_FT_ISR },
3162   { "FIQ",   ARM_FT_FIQ },
3163   { "fiq",   ARM_FT_FIQ },
3164   { "ABORT", ARM_FT_ISR },
3165   { "abort", ARM_FT_ISR },
3166   { "ABORT", ARM_FT_ISR },
3167   { "abort", ARM_FT_ISR },
3168   { "UNDEF", ARM_FT_EXCEPTION },
3169   { "undef", ARM_FT_EXCEPTION },
3170   { "SWI",   ARM_FT_EXCEPTION },
3171   { "swi",   ARM_FT_EXCEPTION },
3172   { NULL,    ARM_FT_NORMAL }
3173 };
3174
3175 /* Returns the (interrupt) function type of the current
3176    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3177
3178 static unsigned long
3179 arm_isr_value (tree argument)
3180 {
3181   const isr_attribute_arg * ptr;
3182   const char *              arg;
3183
3184   if (!arm_arch_notm)
3185     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3186
3187   /* No argument - default to IRQ.  */
3188   if (argument == NULL_TREE)
3189     return ARM_FT_ISR;
3190
3191   /* Get the value of the argument.  */
3192   if (TREE_VALUE (argument) == NULL_TREE
3193       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3194     return ARM_FT_UNKNOWN;
3195
3196   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3197
3198   /* Check it against the list of known arguments.  */
3199   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3200     if (streq (arg, ptr->arg))
3201       return ptr->return_value;
3202
3203   /* An unrecognized interrupt type.  */
3204   return ARM_FT_UNKNOWN;
3205 }
3206
3207 /* Computes the type of the current function.  */
3208
3209 static unsigned long
3210 arm_compute_func_type (void)
3211 {
3212   unsigned long type = ARM_FT_UNKNOWN;
3213   tree a;
3214   tree attr;
3215
3216   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3217
3218   /* Decide if the current function is volatile.  Such functions
3219      never return, and many memory cycles can be saved by not storing
3220      register values that will never be needed again.  This optimization
3221      was added to speed up context switching in a kernel application.  */
3222   if (optimize > 0
3223       && (TREE_NOTHROW (current_function_decl)
3224           || !(flag_unwind_tables
3225                || (flag_exceptions
3226                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3227       && TREE_THIS_VOLATILE (current_function_decl))
3228     type |= ARM_FT_VOLATILE;
3229
3230   if (cfun->static_chain_decl != NULL)
3231     type |= ARM_FT_NESTED;
3232
3233   attr = DECL_ATTRIBUTES (current_function_decl);
3234
3235   a = lookup_attribute ("naked", attr);
3236   if (a != NULL_TREE)
3237     type |= ARM_FT_NAKED;
3238
3239   a = lookup_attribute ("isr", attr);
3240   if (a == NULL_TREE)
3241     a = lookup_attribute ("interrupt", attr);
3242
3243   if (a == NULL_TREE)
3244     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3245   else
3246     type |= arm_isr_value (TREE_VALUE (a));
3247
3248   return type;
3249 }
3250
3251 /* Returns the type of the current function.  */
3252
3253 unsigned long
3254 arm_current_func_type (void)
3255 {
3256   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3257     cfun->machine->func_type = arm_compute_func_type ();
3258
3259   return cfun->machine->func_type;
3260 }
3261
3262 bool
3263 arm_allocate_stack_slots_for_args (void)
3264 {
3265   /* Naked functions should not allocate stack slots for arguments.  */
3266   return !IS_NAKED (arm_current_func_type ());
3267 }
3268
3269 static bool
3270 arm_warn_func_return (tree decl)
3271 {
3272   /* Naked functions are implemented entirely in assembly, including the
3273      return sequence, so suppress warnings about this.  */
3274   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3275 }
3276
3277 \f
3278 /* Output assembler code for a block containing the constant parts
3279    of a trampoline, leaving space for the variable parts.
3280
3281    On the ARM, (if r8 is the static chain regnum, and remembering that
3282    referencing pc adds an offset of 8) the trampoline looks like:
3283            ldr          r8, [pc, #0]
3284            ldr          pc, [pc]
3285            .word        static chain value
3286            .word        function's address
3287    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3288
3289 static void
3290 arm_asm_trampoline_template (FILE *f)
3291 {
3292   if (TARGET_ARM)
3293     {
3294       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3295       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3296     }
3297   else if (TARGET_THUMB2)
3298     {
3299       /* The Thumb-2 trampoline is similar to the arm implementation.
3300          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3301       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3302                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3303       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3304     }
3305   else
3306     {
3307       ASM_OUTPUT_ALIGN (f, 2);
3308       fprintf (f, "\t.code\t16\n");
3309       fprintf (f, ".Ltrampoline_start:\n");
3310       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3311       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3312       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3313       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3314       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3315       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3316     }
3317   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3318   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3319 }
3320
3321 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3322
3323 static void
3324 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3325 {
3326   rtx fnaddr, mem, a_tramp;
3327
3328   emit_block_move (m_tramp, assemble_trampoline_template (),
3329                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3330
3331   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3332   emit_move_insn (mem, chain_value);
3333
3334   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3335   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3336   emit_move_insn (mem, fnaddr);
3337
3338   a_tramp = XEXP (m_tramp, 0);
3339   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3340                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3341                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3342 }
3343
3344 /* Thumb trampolines should be entered in thumb mode, so set
3345    the bottom bit of the address.  */
3346
3347 static rtx
3348 arm_trampoline_adjust_address (rtx addr)
3349 {
3350   if (TARGET_THUMB)
3351     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3352                                 NULL, 0, OPTAB_LIB_WIDEN);
3353   return addr;
3354 }
3355 \f
3356 /* Return 1 if it is possible to return using a single instruction.
3357    If SIBLING is non-null, this is a test for a return before a sibling
3358    call.  SIBLING is the call insn, so we can examine its register usage.  */
3359
3360 int
3361 use_return_insn (int iscond, rtx sibling)
3362 {
3363   int regno;
3364   unsigned int func_type;
3365   unsigned long saved_int_regs;
3366   unsigned HOST_WIDE_INT stack_adjust;
3367   arm_stack_offsets *offsets;
3368
3369   /* Never use a return instruction before reload has run.  */
3370   if (!reload_completed)
3371     return 0;
3372
3373   func_type = arm_current_func_type ();
3374
3375   /* Naked, volatile and stack alignment functions need special
3376      consideration.  */
3377   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3378     return 0;
3379
3380   /* So do interrupt functions that use the frame pointer and Thumb
3381      interrupt functions.  */
3382   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3383     return 0;
3384
3385   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3386       && !optimize_function_for_size_p (cfun))
3387     return 0;
3388
3389   offsets = arm_get_frame_offsets ();
3390   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3391
3392   /* As do variadic functions.  */
3393   if (crtl->args.pretend_args_size
3394       || cfun->machine->uses_anonymous_args
3395       /* Or if the function calls __builtin_eh_return () */
3396       || crtl->calls_eh_return
3397       /* Or if the function calls alloca */
3398       || cfun->calls_alloca
3399       /* Or if there is a stack adjustment.  However, if the stack pointer
3400          is saved on the stack, we can use a pre-incrementing stack load.  */
3401       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3402                                  && stack_adjust == 4)))
3403     return 0;
3404
3405   saved_int_regs = offsets->saved_regs_mask;
3406
3407   /* Unfortunately, the insn
3408
3409        ldmib sp, {..., sp, ...}
3410
3411      triggers a bug on most SA-110 based devices, such that the stack
3412      pointer won't be correctly restored if the instruction takes a
3413      page fault.  We work around this problem by popping r3 along with
3414      the other registers, since that is never slower than executing
3415      another instruction.
3416
3417      We test for !arm_arch5 here, because code for any architecture
3418      less than this could potentially be run on one of the buggy
3419      chips.  */
3420   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3421     {
3422       /* Validate that r3 is a call-clobbered register (always true in
3423          the default abi) ...  */
3424       if (!call_used_regs[3])
3425         return 0;
3426
3427       /* ... that it isn't being used for a return value ... */
3428       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3429         return 0;
3430
3431       /* ... or for a tail-call argument ...  */
3432       if (sibling)
3433         {
3434           gcc_assert (CALL_P (sibling));
3435
3436           if (find_regno_fusage (sibling, USE, 3))
3437             return 0;
3438         }
3439
3440       /* ... and that there are no call-saved registers in r0-r2
3441          (always true in the default ABI).  */
3442       if (saved_int_regs & 0x7)
3443         return 0;
3444     }
3445
3446   /* Can't be done if interworking with Thumb, and any registers have been
3447      stacked.  */
3448   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3449     return 0;
3450
3451   /* On StrongARM, conditional returns are expensive if they aren't
3452      taken and multiple registers have been stacked.  */
3453   if (iscond && arm_tune_strongarm)
3454     {
3455       /* Conditional return when just the LR is stored is a simple
3456          conditional-load instruction, that's not expensive.  */
3457       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3458         return 0;
3459
3460       if (flag_pic
3461           && arm_pic_register != INVALID_REGNUM
3462           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3463         return 0;
3464     }
3465
3466   /* If there are saved registers but the LR isn't saved, then we need
3467      two instructions for the return.  */
3468   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3469     return 0;
3470
3471   /* Can't be done if any of the VFP regs are pushed,
3472      since this also requires an insn.  */
3473   if (TARGET_HARD_FLOAT && TARGET_VFP)
3474     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3475       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3476         return 0;
3477
3478   if (TARGET_REALLY_IWMMXT)
3479     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3480       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3481         return 0;
3482
3483   return 1;
3484 }
3485
3486 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3487    shrink-wrapping if possible.  This is the case if we need to emit a
3488    prologue, which we can test by looking at the offsets.  */
3489 bool
3490 use_simple_return_p (void)
3491 {
3492   arm_stack_offsets *offsets;
3493
3494   offsets = arm_get_frame_offsets ();
3495   return offsets->outgoing_args != 0;
3496 }
3497
3498 /* Return TRUE if int I is a valid immediate ARM constant.  */
3499
3500 int
3501 const_ok_for_arm (HOST_WIDE_INT i)
3502 {
3503   int lowbit;
3504
3505   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3506      be all zero, or all one.  */
3507   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3508       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3509           != ((~(unsigned HOST_WIDE_INT) 0)
3510               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3511     return FALSE;
3512
3513   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3514
3515   /* Fast return for 0 and small values.  We must do this for zero, since
3516      the code below can't handle that one case.  */
3517   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3518     return TRUE;
3519
3520   /* Get the number of trailing zeros.  */
3521   lowbit = ffs((int) i) - 1;
3522
3523   /* Only even shifts are allowed in ARM mode so round down to the
3524      nearest even number.  */
3525   if (TARGET_ARM)
3526     lowbit &= ~1;
3527
3528   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3529     return TRUE;
3530
3531   if (TARGET_ARM)
3532     {
3533       /* Allow rotated constants in ARM mode.  */
3534       if (lowbit <= 4
3535            && ((i & ~0xc000003f) == 0
3536                || (i & ~0xf000000f) == 0
3537                || (i & ~0xfc000003) == 0))
3538         return TRUE;
3539     }
3540   else
3541     {
3542       HOST_WIDE_INT v;
3543
3544       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
3545       v = i & 0xff;
3546       v |= v << 16;
3547       if (i == v || i == (v | (v << 8)))
3548         return TRUE;
3549
3550       /* Allow repeated pattern 0xXY00XY00.  */
3551       v = i & 0xff00;
3552       v |= v << 16;
3553       if (i == v)
3554         return TRUE;
3555     }
3556
3557   return FALSE;
3558 }
3559
3560 /* Return true if I is a valid constant for the operation CODE.  */
3561 int
3562 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3563 {
3564   if (const_ok_for_arm (i))
3565     return 1;
3566
3567   switch (code)
3568     {
3569     case SET:
3570       /* See if we can use movw.  */
3571       if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3572         return 1;
3573       else
3574         /* Otherwise, try mvn.  */
3575         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3576
3577     case PLUS:
3578       /* See if we can use addw or subw.  */
3579       if (TARGET_THUMB2
3580           && ((i & 0xfffff000) == 0
3581               || ((-i) & 0xfffff000) == 0))
3582         return 1;
3583       /* else fall through.  */
3584
3585     case COMPARE:
3586     case EQ:
3587     case NE:
3588     case GT:
3589     case LE:
3590     case LT:
3591     case GE:
3592     case GEU:
3593     case LTU:
3594     case GTU:
3595     case LEU:
3596     case UNORDERED:
3597     case ORDERED:
3598     case UNEQ:
3599     case UNGE:
3600     case UNLT:
3601     case UNGT:
3602     case UNLE:
3603       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3604
3605     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
3606     case XOR:
3607       return 0;
3608
3609     case IOR:
3610       if (TARGET_THUMB2)
3611         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3612       return 0;
3613
3614     case AND:
3615       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3616
3617     default:
3618       gcc_unreachable ();
3619     }
3620 }
3621
3622 /* Return true if I is a valid di mode constant for the operation CODE.  */
3623 int
3624 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3625 {
3626   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3627   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3628   rtx hi = GEN_INT (hi_val);
3629   rtx lo = GEN_INT (lo_val);
3630
3631   if (TARGET_THUMB1)
3632     return 0;
3633
3634   switch (code)
3635     {
3636     case AND:
3637     case IOR:
3638     case XOR:
3639       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3640               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3641     case PLUS:
3642       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3643
3644     default:
3645       return 0;
3646     }
3647 }
3648
3649 /* Emit a sequence of insns to handle a large constant.
3650    CODE is the code of the operation required, it can be any of SET, PLUS,
3651    IOR, AND, XOR, MINUS;
3652    MODE is the mode in which the operation is being performed;
3653    VAL is the integer to operate on;
3654    SOURCE is the other operand (a register, or a null-pointer for SET);
3655    SUBTARGETS means it is safe to create scratch registers if that will
3656    either produce a simpler sequence, or we will want to cse the values.
3657    Return value is the number of insns emitted.  */
3658
3659 /* ??? Tweak this for thumb2.  */
3660 int
3661 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3662                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3663 {
3664   rtx cond;
3665
3666   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3667     cond = COND_EXEC_TEST (PATTERN (insn));
3668   else
3669     cond = NULL_RTX;
3670
3671   if (subtargets || code == SET
3672       || (REG_P (target) && REG_P (source)
3673           && REGNO (target) != REGNO (source)))
3674     {
3675       /* After arm_reorg has been called, we can't fix up expensive
3676          constants by pushing them into memory so we must synthesize
3677          them in-line, regardless of the cost.  This is only likely to
3678          be more costly on chips that have load delay slots and we are
3679          compiling without running the scheduler (so no splitting
3680          occurred before the final instruction emission).
3681
3682          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3683       */
3684       if (!cfun->machine->after_arm_reorg
3685           && !cond
3686           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3687                                 1, 0)
3688               > (arm_constant_limit (optimize_function_for_size_p (cfun))
3689                  + (code != SET))))
3690         {
3691           if (code == SET)
3692             {
3693               /* Currently SET is the only monadic value for CODE, all
3694                  the rest are diadic.  */
3695               if (TARGET_USE_MOVT)
3696                 arm_emit_movpair (target, GEN_INT (val));
3697               else
3698                 emit_set_insn (target, GEN_INT (val));
3699
3700               return 1;
3701             }
3702           else
3703             {
3704               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3705
3706               if (TARGET_USE_MOVT)
3707                 arm_emit_movpair (temp, GEN_INT (val));
3708               else
3709                 emit_set_insn (temp, GEN_INT (val));
3710
3711               /* For MINUS, the value is subtracted from, since we never
3712                  have subtraction of a constant.  */
3713               if (code == MINUS)
3714                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3715               else
3716                 emit_set_insn (target,
3717                                gen_rtx_fmt_ee (code, mode, source, temp));
3718               return 2;
3719             }
3720         }
3721     }
3722
3723   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3724                            1);
3725 }
3726
3727 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3728    ARM/THUMB2 immediates, and add up to VAL.
3729    Thr function return value gives the number of insns required.  */
3730 static int
3731 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3732                             struct four_ints *return_sequence)
3733 {
3734   int best_consecutive_zeros = 0;
3735   int i;
3736   int best_start = 0;
3737   int insns1, insns2;
3738   struct four_ints tmp_sequence;
3739
3740   /* If we aren't targeting ARM, the best place to start is always at
3741      the bottom, otherwise look more closely.  */
3742   if (TARGET_ARM)
3743     {
3744       for (i = 0; i < 32; i += 2)
3745         {
3746           int consecutive_zeros = 0;
3747
3748           if (!(val & (3 << i)))
3749             {
3750               while ((i < 32) && !(val & (3 << i)))
3751                 {
3752                   consecutive_zeros += 2;
3753                   i += 2;
3754                 }
3755               if (consecutive_zeros > best_consecutive_zeros)
3756                 {
3757                   best_consecutive_zeros = consecutive_zeros;
3758                   best_start = i - consecutive_zeros;
3759                 }
3760               i -= 2;
3761             }
3762         }
3763     }
3764
3765   /* So long as it won't require any more insns to do so, it's
3766      desirable to emit a small constant (in bits 0...9) in the last
3767      insn.  This way there is more chance that it can be combined with
3768      a later addressing insn to form a pre-indexed load or store
3769      operation.  Consider:
3770
3771            *((volatile int *)0xe0000100) = 1;
3772            *((volatile int *)0xe0000110) = 2;
3773
3774      We want this to wind up as:
3775
3776             mov rA, #0xe0000000
3777             mov rB, #1
3778             str rB, [rA, #0x100]
3779             mov rB, #2
3780             str rB, [rA, #0x110]
3781
3782      rather than having to synthesize both large constants from scratch.
3783
3784      Therefore, we calculate how many insns would be required to emit
3785      the constant starting from `best_start', and also starting from
3786      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
3787      yield a shorter sequence, we may as well use zero.  */
3788   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3789   if (best_start != 0
3790       && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3791     {
3792       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3793       if (insns2 <= insns1)
3794         {
3795           *return_sequence = tmp_sequence;
3796           insns1 = insns2;
3797         }
3798     }
3799
3800   return insns1;
3801 }
3802
3803 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
3804 static int
3805 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3806                              struct four_ints *return_sequence, int i)
3807 {
3808   int remainder = val & 0xffffffff;
3809   int insns = 0;
3810
3811   /* Try and find a way of doing the job in either two or three
3812      instructions.
3813
3814      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3815      location.  We start at position I.  This may be the MSB, or
3816      optimial_immediate_sequence may have positioned it at the largest block
3817      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3818      wrapping around to the top of the word when we drop off the bottom.
3819      In the worst case this code should produce no more than four insns.
3820
3821      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3822      constants, shifted to any arbitrary location.  We should always start
3823      at the MSB.  */
3824   do
3825     {
3826       int end;
3827       unsigned int b1, b2, b3, b4;
3828       unsigned HOST_WIDE_INT result;
3829       int loc;
3830
3831       gcc_assert (insns < 4);
3832
3833       if (i <= 0)
3834         i += 32;
3835
3836       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
3837       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3838         {
3839           loc = i;
3840           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3841             /* We can use addw/subw for the last 12 bits.  */
3842             result = remainder;
3843           else
3844             {
3845               /* Use an 8-bit shifted/rotated immediate.  */
3846               end = i - 8;
3847               if (end < 0)
3848                 end += 32;
3849               result = remainder & ((0x0ff << end)
3850                                    | ((i < end) ? (0xff >> (32 - end))
3851                                                 : 0));
3852               i -= 8;
3853             }
3854         }
3855       else
3856         {
3857           /* Arm allows rotates by a multiple of two. Thumb-2 allows
3858              arbitrary shifts.  */
3859           i -= TARGET_ARM ? 2 : 1;
3860           continue;
3861         }
3862
3863       /* Next, see if we can do a better job with a thumb2 replicated
3864          constant.
3865
3866          We do it this way around to catch the cases like 0x01F001E0 where
3867          two 8-bit immediates would work, but a replicated constant would
3868          make it worse.
3869
3870          TODO: 16-bit constants that don't clear all the bits, but still win.
3871          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
3872       if (TARGET_THUMB2)
3873         {
3874           b1 = (remainder & 0xff000000) >> 24;
3875           b2 = (remainder & 0x00ff0000) >> 16;
3876           b3 = (remainder & 0x0000ff00) >> 8;
3877           b4 = remainder & 0xff;
3878
3879           if (loc > 24)
3880             {
3881               /* The 8-bit immediate already found clears b1 (and maybe b2),
3882                  but must leave b3 and b4 alone.  */
3883
3884               /* First try to find a 32-bit replicated constant that clears
3885                  almost everything.  We can assume that we can't do it in one,
3886                  or else we wouldn't be here.  */
3887               unsigned int tmp = b1 & b2 & b3 & b4;
3888               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3889                                   + (tmp << 24);
3890               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3891                                             + (tmp == b3) + (tmp == b4);
3892               if (tmp
3893                   && (matching_bytes >= 3
3894                       || (matching_bytes == 2
3895                           && const_ok_for_op (remainder & ~tmp2, code))))
3896                 {
3897                   /* At least 3 of the bytes match, and the fourth has at
3898                      least as many bits set, or two of the bytes match
3899                      and it will only require one more insn to finish.  */
3900                   result = tmp2;
3901                   i = tmp != b1 ? 32
3902                       : tmp != b2 ? 24
3903                       : tmp != b3 ? 16
3904                       : 8;
3905                 }
3906
3907               /* Second, try to find a 16-bit replicated constant that can
3908                  leave three of the bytes clear.  If b2 or b4 is already
3909                  zero, then we can.  If the 8-bit from above would not
3910                  clear b2 anyway, then we still win.  */
3911               else if (b1 == b3 && (!b2 || !b4
3912                                || (remainder & 0x00ff0000 & ~result)))
3913                 {
3914                   result = remainder & 0xff00ff00;
3915                   i = 24;
3916                 }
3917             }
3918           else if (loc > 16)
3919             {
3920               /* The 8-bit immediate already found clears b2 (and maybe b3)
3921                  and we don't get here unless b1 is alredy clear, but it will
3922                  leave b4 unchanged.  */
3923
3924               /* If we can clear b2 and b4 at once, then we win, since the
3925                  8-bits couldn't possibly reach that far.  */
3926               if (b2 == b4)
3927                 {
3928                   result = remainder & 0x00ff00ff;
3929                   i = 16;
3930                 }
3931             }
3932         }
3933
3934       return_sequence->i[insns++] = result;
3935       remainder &= ~result;
3936
3937       if (code == SET || code == MINUS)
3938         code = PLUS;
3939     }
3940   while (remainder);
3941
3942   return insns;
3943 }
3944
3945 /* Emit an instruction with the indicated PATTERN.  If COND is
3946    non-NULL, conditionalize the execution of the instruction on COND
3947    being true.  */
3948
3949 static void
3950 emit_constant_insn (rtx cond, rtx pattern)
3951 {
3952   if (cond)
3953     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3954   emit_insn (pattern);
3955 }
3956
3957 /* As above, but extra parameter GENERATE which, if clear, suppresses
3958    RTL generation.  */
3959
3960 static int
3961 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3962                   HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3963                   int generate)
3964 {
3965   int can_invert = 0;
3966   int can_negate = 0;
3967   int final_invert = 0;
3968   int i;
3969   int set_sign_bit_copies = 0;
3970   int clear_sign_bit_copies = 0;
3971   int clear_zero_bit_copies = 0;
3972   int set_zero_bit_copies = 0;
3973   int insns = 0, neg_insns, inv_insns;
3974   unsigned HOST_WIDE_INT temp1, temp2;
3975   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3976   struct four_ints *immediates;
3977   struct four_ints pos_immediates, neg_immediates, inv_immediates;
3978
3979   /* Find out which operations are safe for a given CODE.  Also do a quick
3980      check for degenerate cases; these can occur when DImode operations
3981      are split.  */
3982   switch (code)
3983     {
3984     case SET:
3985       can_invert = 1;
3986       break;
3987
3988     case PLUS:
3989       can_negate = 1;
3990       break;
3991
3992     case IOR:
3993       if (remainder == 0xffffffff)
3994         {
3995           if (generate)
3996             emit_constant_insn (cond,
3997                                 gen_rtx_SET (VOIDmode, target,
3998                                              GEN_INT (ARM_SIGN_EXTEND (val))));
3999           return 1;
4000         }
4001
4002       if (remainder == 0)
4003         {
4004           if (reload_completed && rtx_equal_p (target, source))
4005             return 0;
4006
4007           if (generate)
4008             emit_constant_insn (cond,
4009                                 gen_rtx_SET (VOIDmode, target, source));
4010           return 1;
4011         }
4012       break;
4013
4014     case AND:
4015       if (remainder == 0)
4016         {
4017           if (generate)
4018             emit_constant_insn (cond,
4019                                 gen_rtx_SET (VOIDmode, target, const0_rtx));
4020           return 1;
4021         }
4022       if (remainder == 0xffffffff)
4023         {
4024           if (reload_completed && rtx_equal_p (target, source))
4025             return 0;
4026           if (generate)
4027             emit_constant_insn (cond,
4028                                 gen_rtx_SET (VOIDmode, target, source));
4029           return 1;
4030         }
4031       can_invert = 1;
4032       break;
4033
4034     case XOR:
4035       if (remainder == 0)
4036         {
4037           if (reload_completed && rtx_equal_p (target, source))
4038             return 0;
4039           if (generate)
4040             emit_constant_insn (cond,
4041                                 gen_rtx_SET (VOIDmode, target, source));
4042           return 1;
4043         }
4044
4045       if (remainder == 0xffffffff)
4046         {
4047           if (generate)
4048             emit_constant_insn (cond,
4049                                 gen_rtx_SET (VOIDmode, target,
4050                                              gen_rtx_NOT (mode, source)));
4051           return 1;
4052         }
4053       final_invert = 1;
4054       break;
4055
4056     case MINUS:
4057       /* We treat MINUS as (val - source), since (source - val) is always
4058          passed as (source + (-val)).  */
4059       if (remainder == 0)
4060         {
4061           if (generate)
4062             emit_constant_insn (cond,
4063                                 gen_rtx_SET (VOIDmode, target,
4064                                              gen_rtx_NEG (mode, source)));
4065           return 1;
4066         }
4067       if (const_ok_for_arm (val))
4068         {
4069           if (generate)
4070             emit_constant_insn (cond,
4071                                 gen_rtx_SET (VOIDmode, target,
4072                                              gen_rtx_MINUS (mode, GEN_INT (val),
4073                                                             source)));
4074           return 1;
4075         }
4076
4077       break;
4078
4079     default:
4080       gcc_unreachable ();
4081     }
4082
4083   /* If we can do it in one insn get out quickly.  */
4084   if (const_ok_for_op (val, code))
4085     {
4086       if (generate)
4087         emit_constant_insn (cond,
4088                             gen_rtx_SET (VOIDmode, target,
4089                                          (source
4090                                           ? gen_rtx_fmt_ee (code, mode, source,
4091                                                             GEN_INT (val))
4092                                           : GEN_INT (val))));
4093       return 1;
4094     }
4095
4096   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4097      insn.  */
4098   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4099       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4100     {
4101       if (generate)
4102         {
4103           if (mode == SImode && i == 16)
4104             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4105                smaller insn.  */
4106             emit_constant_insn (cond,
4107                                 gen_zero_extendhisi2
4108                                 (target, gen_lowpart (HImode, source)));
4109           else
4110             /* Extz only supports SImode, but we can coerce the operands
4111                into that mode.  */
4112             emit_constant_insn (cond,
4113                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4114                                               gen_lowpart (SImode, source),
4115                                               GEN_INT (i), const0_rtx));
4116         }
4117
4118       return 1;
4119     }
4120
4121   /* Calculate a few attributes that may be useful for specific
4122      optimizations.  */
4123   /* Count number of leading zeros.  */
4124   for (i = 31; i >= 0; i--)
4125     {
4126       if ((remainder & (1 << i)) == 0)
4127         clear_sign_bit_copies++;
4128       else
4129         break;
4130     }
4131
4132   /* Count number of leading 1's.  */
4133   for (i = 31; i >= 0; i--)
4134     {
4135       if ((remainder & (1 << i)) != 0)
4136         set_sign_bit_copies++;
4137       else
4138         break;
4139     }
4140
4141   /* Count number of trailing zero's.  */
4142   for (i = 0; i <= 31; i++)
4143     {
4144       if ((remainder & (1 << i)) == 0)
4145         clear_zero_bit_copies++;
4146       else
4147         break;
4148     }
4149
4150   /* Count number of trailing 1's.  */
4151   for (i = 0; i <= 31; i++)
4152     {
4153       if ((remainder & (1 << i)) != 0)
4154         set_zero_bit_copies++;
4155       else
4156         break;
4157     }
4158
4159   switch (code)
4160     {
4161     case SET:
4162       /* See if we can do this by sign_extending a constant that is known
4163          to be negative.  This is a good, way of doing it, since the shift
4164          may well merge into a subsequent insn.  */
4165       if (set_sign_bit_copies > 1)
4166         {
4167           if (const_ok_for_arm
4168               (temp1 = ARM_SIGN_EXTEND (remainder
4169                                         << (set_sign_bit_copies - 1))))
4170             {
4171               if (generate)
4172                 {
4173                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4174                   emit_constant_insn (cond,
4175                                       gen_rtx_SET (VOIDmode, new_src,
4176                                                    GEN_INT (temp1)));
4177                   emit_constant_insn (cond,
4178                                       gen_ashrsi3 (target, new_src,
4179                                                    GEN_INT (set_sign_bit_copies - 1)));
4180                 }
4181               return 2;
4182             }
4183           /* For an inverted constant, we will need to set the low bits,
4184              these will be shifted out of harm's way.  */
4185           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4186           if (const_ok_for_arm (~temp1))
4187             {
4188               if (generate)
4189                 {
4190                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4191                   emit_constant_insn (cond,
4192                                       gen_rtx_SET (VOIDmode, new_src,
4193                                                    GEN_INT (temp1)));
4194                   emit_constant_insn (cond,
4195                                       gen_ashrsi3 (target, new_src,
4196                                                    GEN_INT (set_sign_bit_copies - 1)));
4197                 }
4198               return 2;
4199             }
4200         }
4201
4202       /* See if we can calculate the value as the difference between two
4203          valid immediates.  */
4204       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4205         {
4206           int topshift = clear_sign_bit_copies & ~1;
4207
4208           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4209                                    & (0xff000000 >> topshift));
4210
4211           /* If temp1 is zero, then that means the 9 most significant
4212              bits of remainder were 1 and we've caused it to overflow.
4213              When topshift is 0 we don't need to do anything since we
4214              can borrow from 'bit 32'.  */
4215           if (temp1 == 0 && topshift != 0)
4216             temp1 = 0x80000000 >> (topshift - 1);
4217
4218           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4219
4220           if (const_ok_for_arm (temp2))
4221             {
4222               if (generate)
4223                 {
4224                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4225                   emit_constant_insn (cond,
4226                                       gen_rtx_SET (VOIDmode, new_src,
4227                                                    GEN_INT (temp1)));
4228                   emit_constant_insn (cond,
4229                                       gen_addsi3 (target, new_src,
4230                                                   GEN_INT (-temp2)));
4231                 }
4232
4233               return 2;
4234             }
4235         }
4236
4237       /* See if we can generate this by setting the bottom (or the top)
4238          16 bits, and then shifting these into the other half of the
4239          word.  We only look for the simplest cases, to do more would cost
4240          too much.  Be careful, however, not to generate this when the
4241          alternative would take fewer insns.  */
4242       if (val & 0xffff0000)
4243         {
4244           temp1 = remainder & 0xffff0000;
4245           temp2 = remainder & 0x0000ffff;
4246
4247           /* Overlaps outside this range are best done using other methods.  */
4248           for (i = 9; i < 24; i++)
4249             {
4250               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4251                   && !const_ok_for_arm (temp2))
4252                 {
4253                   rtx new_src = (subtargets
4254                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4255                                  : target);
4256                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4257                                             source, subtargets, generate);
4258                   source = new_src;
4259                   if (generate)
4260                     emit_constant_insn
4261                       (cond,
4262                        gen_rtx_SET
4263                        (VOIDmode, target,
4264                         gen_rtx_IOR (mode,
4265                                      gen_rtx_ASHIFT (mode, source,
4266                                                      GEN_INT (i)),
4267                                      source)));
4268                   return insns + 1;
4269                 }
4270             }
4271
4272           /* Don't duplicate cases already considered.  */
4273           for (i = 17; i < 24; i++)
4274             {
4275               if (((temp1 | (temp1 >> i)) == remainder)
4276                   && !const_ok_for_arm (temp1))
4277                 {
4278                   rtx new_src = (subtargets
4279                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4280                                  : target);
4281                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4282                                             source, subtargets, generate);
4283                   source = new_src;
4284                   if (generate)
4285                     emit_constant_insn
4286                       (cond,
4287                        gen_rtx_SET (VOIDmode, target,
4288                                     gen_rtx_IOR
4289                                     (mode,
4290                                      gen_rtx_LSHIFTRT (mode, source,
4291                                                        GEN_INT (i)),
4292                                      source)));
4293                   return insns + 1;
4294                 }
4295             }
4296         }
4297       break;
4298
4299     case IOR:
4300     case XOR:
4301       /* If we have IOR or XOR, and the constant can be loaded in a
4302          single instruction, and we can find a temporary to put it in,
4303          then this can be done in two instructions instead of 3-4.  */
4304       if (subtargets
4305           /* TARGET can't be NULL if SUBTARGETS is 0 */
4306           || (reload_completed && !reg_mentioned_p (target, source)))
4307         {
4308           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4309             {
4310               if (generate)
4311                 {
4312                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4313
4314                   emit_constant_insn (cond,
4315                                       gen_rtx_SET (VOIDmode, sub,
4316                                                    GEN_INT (val)));
4317                   emit_constant_insn (cond,
4318                                       gen_rtx_SET (VOIDmode, target,
4319                                                    gen_rtx_fmt_ee (code, mode,
4320                                                                    source, sub)));
4321                 }
4322               return 2;
4323             }
4324         }
4325
4326       if (code == XOR)
4327         break;
4328
4329       /*  Convert.
4330           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4331                              and the remainder 0s for e.g. 0xfff00000)
4332           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4333
4334           This can be done in 2 instructions by using shifts with mov or mvn.
4335           e.g. for
4336           x = x | 0xfff00000;
4337           we generate.
4338           mvn   r0, r0, asl #12
4339           mvn   r0, r0, lsr #12  */
4340       if (set_sign_bit_copies > 8
4341           && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4342         {
4343           if (generate)
4344             {
4345               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4346               rtx shift = GEN_INT (set_sign_bit_copies);
4347
4348               emit_constant_insn
4349                 (cond,
4350                  gen_rtx_SET (VOIDmode, sub,
4351                               gen_rtx_NOT (mode,
4352                                            gen_rtx_ASHIFT (mode,
4353                                                            source,
4354                                                            shift))));
4355               emit_constant_insn
4356                 (cond,
4357                  gen_rtx_SET (VOIDmode, target,
4358                               gen_rtx_NOT (mode,
4359                                            gen_rtx_LSHIFTRT (mode, sub,
4360                                                              shift))));
4361             }
4362           return 2;
4363         }
4364
4365       /* Convert
4366           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4367            to
4368           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4369
4370           For eg. r0 = r0 | 0xfff
4371                mvn      r0, r0, lsr #12
4372                mvn      r0, r0, asl #12
4373
4374       */
4375       if (set_zero_bit_copies > 8
4376           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4377         {
4378           if (generate)
4379             {
4380               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4381               rtx shift = GEN_INT (set_zero_bit_copies);
4382
4383               emit_constant_insn
4384                 (cond,
4385                  gen_rtx_SET (VOIDmode, sub,
4386                               gen_rtx_NOT (mode,
4387                                            gen_rtx_LSHIFTRT (mode,
4388                                                              source,
4389                                                              shift))));
4390               emit_constant_insn
4391                 (cond,
4392                  gen_rtx_SET (VOIDmode, target,
4393                               gen_rtx_NOT (mode,
4394                                            gen_rtx_ASHIFT (mode, sub,
4395                                                            shift))));
4396             }
4397           return 2;
4398         }
4399
4400       /* This will never be reached for Thumb2 because orn is a valid
4401          instruction. This is for Thumb1 and the ARM 32 bit cases.
4402
4403          x = y | constant (such that ~constant is a valid constant)
4404          Transform this to
4405          x = ~(~y & ~constant).
4406       */
4407       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4408         {
4409           if (generate)
4410             {
4411               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4412               emit_constant_insn (cond,
4413                                   gen_rtx_SET (VOIDmode, sub,
4414                                                gen_rtx_NOT (mode, source)));
4415               source = sub;
4416               if (subtargets)
4417                 sub = gen_reg_rtx (mode);
4418               emit_constant_insn (cond,
4419                                   gen_rtx_SET (VOIDmode, sub,
4420                                                gen_rtx_AND (mode, source,
4421                                                             GEN_INT (temp1))));
4422               emit_constant_insn (cond,
4423                                   gen_rtx_SET (VOIDmode, target,
4424                                                gen_rtx_NOT (mode, sub)));
4425             }
4426           return 3;
4427         }
4428       break;
4429
4430     case AND:
4431       /* See if two shifts will do 2 or more insn's worth of work.  */
4432       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4433         {
4434           HOST_WIDE_INT shift_mask = ((0xffffffff
4435                                        << (32 - clear_sign_bit_copies))
4436                                       & 0xffffffff);
4437
4438           if ((remainder | shift_mask) != 0xffffffff)
4439             {
4440               if (generate)
4441                 {
4442                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4443                   insns = arm_gen_constant (AND, mode, cond,
4444                                             remainder | shift_mask,
4445                                             new_src, source, subtargets, 1);
4446                   source = new_src;
4447                 }
4448               else
4449                 {
4450                   rtx targ = subtargets ? NULL_RTX : target;
4451                   insns = arm_gen_constant (AND, mode, cond,
4452                                             remainder | shift_mask,
4453                                             targ, source, subtargets, 0);
4454                 }
4455             }
4456
4457           if (generate)
4458             {
4459               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4460               rtx shift = GEN_INT (clear_sign_bit_copies);
4461
4462               emit_insn (gen_ashlsi3 (new_src, source, shift));
4463               emit_insn (gen_lshrsi3 (target, new_src, shift));
4464             }
4465
4466           return insns + 2;
4467         }
4468
4469       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4470         {
4471           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4472
4473           if ((remainder | shift_mask) != 0xffffffff)
4474             {
4475               if (generate)
4476                 {
4477                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4478
4479                   insns = arm_gen_constant (AND, mode, cond,
4480                                             remainder | shift_mask,
4481                                             new_src, source, subtargets, 1);
4482                   source = new_src;
4483                 }
4484               else
4485                 {
4486                   rtx targ = subtargets ? NULL_RTX : target;
4487
4488                   insns = arm_gen_constant (AND, mode, cond,
4489                                             remainder | shift_mask,
4490                                             targ, source, subtargets, 0);
4491                 }
4492             }
4493
4494           if (generate)
4495             {
4496               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4497               rtx shift = GEN_INT (clear_zero_bit_copies);
4498
4499               emit_insn (gen_lshrsi3 (new_src, source, shift));
4500               emit_insn (gen_ashlsi3 (target, new_src, shift));
4501             }
4502
4503           return insns + 2;
4504         }
4505
4506       break;
4507
4508     default:
4509       break;
4510     }
4511
4512   /* Calculate what the instruction sequences would be if we generated it
4513      normally, negated, or inverted.  */
4514   if (code == AND)
4515     /* AND cannot be split into multiple insns, so invert and use BIC.  */
4516     insns = 99;
4517   else
4518     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4519
4520   if (can_negate)
4521     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4522                                             &neg_immediates);
4523   else
4524     neg_insns = 99;
4525
4526   if (can_invert || final_invert)
4527     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4528                                             &inv_immediates);
4529   else
4530     inv_insns = 99;
4531
4532   immediates = &pos_immediates;
4533
4534   /* Is the negated immediate sequence more efficient?  */
4535   if (neg_insns < insns && neg_insns <= inv_insns)
4536     {
4537       insns = neg_insns;
4538       immediates = &neg_immediates;
4539     }
4540   else
4541     can_negate = 0;
4542
4543   /* Is the inverted immediate sequence more efficient?
4544      We must allow for an extra NOT instruction for XOR operations, although
4545      there is some chance that the final 'mvn' will get optimized later.  */
4546   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4547     {
4548       insns = inv_insns;
4549       immediates = &inv_immediates;
4550     }
4551   else
4552     {
4553       can_invert = 0;
4554       final_invert = 0;
4555     }
4556
4557   /* Now output the chosen sequence as instructions.  */
4558   if (generate)
4559     {
4560       for (i = 0; i < insns; i++)
4561         {
4562           rtx new_src, temp1_rtx;
4563
4564           temp1 = immediates->i[i];
4565
4566           if (code == SET || code == MINUS)
4567             new_src = (subtargets ? gen_reg_rtx (mode) : target);
4568           else if ((final_invert || i < (insns - 1)) && subtargets)
4569             new_src = gen_reg_rtx (mode);
4570           else
4571             new_src = target;
4572
4573           if (can_invert)
4574             temp1 = ~temp1;
4575           else if (can_negate)
4576             temp1 = -temp1;
4577
4578           temp1 = trunc_int_for_mode (temp1, mode);
4579           temp1_rtx = GEN_INT (temp1);
4580
4581           if (code == SET)
4582             ;
4583           else if (code == MINUS)
4584             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4585           else
4586             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4587
4588           emit_constant_insn (cond,
4589                               gen_rtx_SET (VOIDmode, new_src,
4590                                            temp1_rtx));
4591           source = new_src;
4592
4593           if (code == SET)
4594             {
4595               can_negate = can_invert;
4596               can_invert = 0;
4597               code = PLUS;
4598             }
4599           else if (code == MINUS)
4600             code = PLUS;
4601         }
4602     }
4603
4604   if (final_invert)
4605     {
4606       if (generate)
4607         emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4608                                                gen_rtx_NOT (mode, source)));
4609       insns++;
4610     }
4611
4612   return insns;
4613 }
4614
4615 /* Canonicalize a comparison so that we are more likely to recognize it.
4616    This can be done for a few constant compares, where we can make the
4617    immediate value easier to load.  */
4618
4619 static void
4620 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4621                              bool op0_preserve_value)
4622 {
4623   machine_mode mode;
4624   unsigned HOST_WIDE_INT i, maxval;
4625
4626   mode = GET_MODE (*op0);
4627   if (mode == VOIDmode)
4628     mode = GET_MODE (*op1);
4629
4630   maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4631
4632   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
4633      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
4634      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
4635      for GTU/LEU in Thumb mode.  */
4636   if (mode == DImode)
4637     {
4638       rtx tem;
4639
4640       if (*code == GT || *code == LE
4641           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4642         {
4643           /* Missing comparison.  First try to use an available
4644              comparison.  */
4645           if (CONST_INT_P (*op1))
4646             {
4647               i = INTVAL (*op1);
4648               switch (*code)
4649                 {
4650                 case GT:
4651                 case LE:
4652                   if (i != maxval
4653                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4654                     {
4655                       *op1 = GEN_INT (i + 1);
4656                       *code = *code == GT ? GE : LT;
4657                       return;
4658                     }
4659                   break;
4660                 case GTU:
4661                 case LEU:
4662                   if (i != ~((unsigned HOST_WIDE_INT) 0)
4663                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4664                     {
4665                       *op1 = GEN_INT (i + 1);
4666                       *code = *code == GTU ? GEU : LTU;
4667                       return;
4668                     }
4669                   break;
4670                 default:
4671                   gcc_unreachable ();
4672                 }
4673             }
4674
4675           /* If that did not work, reverse the condition.  */
4676           if (!op0_preserve_value)
4677             {
4678               tem = *op0;
4679               *op0 = *op1;
4680               *op1 = tem;
4681               *code = (int)swap_condition ((enum rtx_code)*code);
4682             }
4683         }
4684       return;
4685     }
4686
4687   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4688      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4689      to facilitate possible combining with a cmp into 'ands'.  */
4690   if (mode == SImode
4691       && GET_CODE (*op0) == ZERO_EXTEND
4692       && GET_CODE (XEXP (*op0, 0)) == SUBREG
4693       && GET_MODE (XEXP (*op0, 0)) == QImode
4694       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4695       && subreg_lowpart_p (XEXP (*op0, 0))
4696       && *op1 == const0_rtx)
4697     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4698                         GEN_INT (255));
4699
4700   /* Comparisons smaller than DImode.  Only adjust comparisons against
4701      an out-of-range constant.  */
4702   if (!CONST_INT_P (*op1)
4703       || const_ok_for_arm (INTVAL (*op1))
4704       || const_ok_for_arm (- INTVAL (*op1)))
4705     return;
4706
4707   i = INTVAL (*op1);
4708
4709   switch (*code)
4710     {
4711     case EQ:
4712     case NE:
4713       return;
4714
4715     case GT:
4716     case LE:
4717       if (i != maxval
4718           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4719         {
4720           *op1 = GEN_INT (i + 1);
4721           *code = *code == GT ? GE : LT;
4722           return;
4723         }
4724       break;
4725
4726     case GE:
4727     case LT:
4728       if (i != ~maxval
4729           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4730         {
4731           *op1 = GEN_INT (i - 1);
4732           *code = *code == GE ? GT : LE;
4733           return;
4734         }
4735       break;
4736
4737     case GTU:
4738     case LEU:
4739       if (i != ~((unsigned HOST_WIDE_INT) 0)
4740           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4741         {
4742           *op1 = GEN_INT (i + 1);
4743           *code = *code == GTU ? GEU : LTU;
4744           return;
4745         }
4746       break;
4747
4748     case GEU:
4749     case LTU:
4750       if (i != 0
4751           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4752         {
4753           *op1 = GEN_INT (i - 1);
4754           *code = *code == GEU ? GTU : LEU;
4755           return;
4756         }
4757       break;
4758
4759     default:
4760       gcc_unreachable ();
4761     }
4762 }
4763
4764
4765 /* Define how to find the value returned by a function.  */
4766
4767 static rtx
4768 arm_function_value(const_tree type, const_tree func,
4769                    bool outgoing ATTRIBUTE_UNUSED)
4770 {
4771   machine_mode mode;
4772   int unsignedp ATTRIBUTE_UNUSED;
4773   rtx r ATTRIBUTE_UNUSED;
4774
4775   mode = TYPE_MODE (type);
4776
4777   if (TARGET_AAPCS_BASED)
4778     return aapcs_allocate_return_reg (mode, type, func);
4779
4780   /* Promote integer types.  */
4781   if (INTEGRAL_TYPE_P (type))
4782     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4783
4784   /* Promotes small structs returned in a register to full-word size
4785      for big-endian AAPCS.  */
4786   if (arm_return_in_msb (type))
4787     {
4788       HOST_WIDE_INT size = int_size_in_bytes (type);
4789       if (size % UNITS_PER_WORD != 0)
4790         {
4791           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4792           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4793         }
4794     }
4795
4796   return arm_libcall_value_1 (mode);
4797 }
4798
4799 /* libcall hashtable helpers.  */
4800
4801 struct libcall_hasher : typed_noop_remove <rtx_def>
4802 {
4803   typedef rtx_def value_type;
4804   typedef rtx_def compare_type;
4805   static inline hashval_t hash (const value_type *);
4806   static inline bool equal (const value_type *, const compare_type *);
4807   static inline void remove (value_type *);
4808 };
4809
4810 inline bool
4811 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4812 {
4813   return rtx_equal_p (p1, p2);
4814 }
4815
4816 inline hashval_t
4817 libcall_hasher::hash (const value_type *p1)
4818 {
4819   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4820 }
4821
4822 typedef hash_table<libcall_hasher> libcall_table_type;
4823
4824 static void
4825 add_libcall (libcall_table_type *htab, rtx libcall)
4826 {
4827   *htab->find_slot (libcall, INSERT) = libcall;
4828 }
4829
4830 static bool
4831 arm_libcall_uses_aapcs_base (const_rtx libcall)
4832 {
4833   static bool init_done = false;
4834   static libcall_table_type *libcall_htab = NULL;
4835
4836   if (!init_done)
4837     {
4838       init_done = true;
4839
4840       libcall_htab = new libcall_table_type (31);
4841       add_libcall (libcall_htab,
4842                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4843       add_libcall (libcall_htab,
4844                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4845       add_libcall (libcall_htab,
4846                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4847       add_libcall (libcall_htab,
4848                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4849
4850       add_libcall (libcall_htab,
4851                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4852       add_libcall (libcall_htab,
4853                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4854       add_libcall (libcall_htab,
4855                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4856       add_libcall (libcall_htab,
4857                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4858
4859       add_libcall (libcall_htab,
4860                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
4861       add_libcall (libcall_htab,
4862                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4863       add_libcall (libcall_htab,
4864                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
4865       add_libcall (libcall_htab,
4866                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
4867       add_libcall (libcall_htab,
4868                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
4869       add_libcall (libcall_htab,
4870                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
4871       add_libcall (libcall_htab,
4872                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
4873       add_libcall (libcall_htab,
4874                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
4875
4876       /* Values from double-precision helper functions are returned in core
4877          registers if the selected core only supports single-precision
4878          arithmetic, even if we are using the hard-float ABI.  The same is
4879          true for single-precision helpers, but we will never be using the
4880          hard-float ABI on a CPU which doesn't support single-precision
4881          operations in hardware.  */
4882       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4883       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4884       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4885       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4886       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4887       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4888       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4889       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4890       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4891       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4892       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4893       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4894                                                         SFmode));
4895       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4896                                                         DFmode));
4897     }
4898
4899   return libcall && libcall_htab->find (libcall) != NULL;
4900 }
4901
4902 static rtx
4903 arm_libcall_value_1 (machine_mode mode)
4904 {
4905   if (TARGET_AAPCS_BASED)
4906     return aapcs_libcall_value (mode);
4907   else if (TARGET_IWMMXT_ABI
4908            && arm_vector_mode_supported_p (mode))
4909     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4910   else
4911     return gen_rtx_REG (mode, ARG_REGISTER (1));
4912 }
4913
4914 /* Define how to find the value returned by a library function
4915    assuming the value has mode MODE.  */
4916
4917 static rtx
4918 arm_libcall_value (machine_mode mode, const_rtx libcall)
4919 {
4920   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4921       && GET_MODE_CLASS (mode) == MODE_FLOAT)
4922     {
4923       /* The following libcalls return their result in integer registers,
4924          even though they return a floating point value.  */
4925       if (arm_libcall_uses_aapcs_base (libcall))
4926         return gen_rtx_REG (mode, ARG_REGISTER(1));
4927
4928     }
4929
4930   return arm_libcall_value_1 (mode);
4931 }
4932
4933 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
4934
4935 static bool
4936 arm_function_value_regno_p (const unsigned int regno)
4937 {
4938   if (regno == ARG_REGISTER (1)
4939       || (TARGET_32BIT
4940           && TARGET_AAPCS_BASED
4941           && TARGET_VFP
4942           && TARGET_HARD_FLOAT
4943           && regno == FIRST_VFP_REGNUM)
4944       || (TARGET_IWMMXT_ABI
4945           && regno == FIRST_IWMMXT_REGNUM))
4946     return true;
4947
4948   return false;
4949 }
4950
4951 /* Determine the amount of memory needed to store the possible return
4952    registers of an untyped call.  */
4953 int
4954 arm_apply_result_size (void)
4955 {
4956   int size = 16;
4957
4958   if (TARGET_32BIT)
4959     {
4960       if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4961         size += 32;
4962       if (TARGET_IWMMXT_ABI)
4963         size += 8;
4964     }
4965
4966   return size;
4967 }
4968
4969 /* Decide whether TYPE should be returned in memory (true)
4970    or in a register (false).  FNTYPE is the type of the function making
4971    the call.  */
4972 static bool
4973 arm_return_in_memory (const_tree type, const_tree fntype)
4974 {
4975   HOST_WIDE_INT size;
4976
4977   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
4978
4979   if (TARGET_AAPCS_BASED)
4980     {
4981       /* Simple, non-aggregate types (ie not including vectors and
4982          complex) are always returned in a register (or registers).
4983          We don't care about which register here, so we can short-cut
4984          some of the detail.  */
4985       if (!AGGREGATE_TYPE_P (type)
4986           && TREE_CODE (type) != VECTOR_TYPE
4987           && TREE_CODE (type) != COMPLEX_TYPE)
4988         return false;
4989
4990       /* Any return value that is no larger than one word can be
4991          returned in r0.  */
4992       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4993         return false;
4994
4995       /* Check any available co-processors to see if they accept the
4996          type as a register candidate (VFP, for example, can return
4997          some aggregates in consecutive registers).  These aren't
4998          available if the call is variadic.  */
4999       if (aapcs_select_return_coproc (type, fntype) >= 0)
5000         return false;
5001
5002       /* Vector values should be returned using ARM registers, not
5003          memory (unless they're over 16 bytes, which will break since
5004          we only have four call-clobbered registers to play with).  */
5005       if (TREE_CODE (type) == VECTOR_TYPE)
5006         return (size < 0 || size > (4 * UNITS_PER_WORD));
5007
5008       /* The rest go in memory.  */
5009       return true;
5010     }
5011
5012   if (TREE_CODE (type) == VECTOR_TYPE)
5013     return (size < 0 || size > (4 * UNITS_PER_WORD));
5014
5015   if (!AGGREGATE_TYPE_P (type) &&
5016       (TREE_CODE (type) != VECTOR_TYPE))
5017     /* All simple types are returned in registers.  */
5018     return false;
5019
5020   if (arm_abi != ARM_ABI_APCS)
5021     {
5022       /* ATPCS and later return aggregate types in memory only if they are
5023          larger than a word (or are variable size).  */
5024       return (size < 0 || size > UNITS_PER_WORD);
5025     }
5026
5027   /* For the arm-wince targets we choose to be compatible with Microsoft's
5028      ARM and Thumb compilers, which always return aggregates in memory.  */
5029 #ifndef ARM_WINCE
5030   /* All structures/unions bigger than one word are returned in memory.
5031      Also catch the case where int_size_in_bytes returns -1.  In this case
5032      the aggregate is either huge or of variable size, and in either case
5033      we will want to return it via memory and not in a register.  */
5034   if (size < 0 || size > UNITS_PER_WORD)
5035     return true;
5036
5037   if (TREE_CODE (type) == RECORD_TYPE)
5038     {
5039       tree field;
5040
5041       /* For a struct the APCS says that we only return in a register
5042          if the type is 'integer like' and every addressable element
5043          has an offset of zero.  For practical purposes this means
5044          that the structure can have at most one non bit-field element
5045          and that this element must be the first one in the structure.  */
5046
5047       /* Find the first field, ignoring non FIELD_DECL things which will
5048          have been created by C++.  */
5049       for (field = TYPE_FIELDS (type);
5050            field && TREE_CODE (field) != FIELD_DECL;
5051            field = DECL_CHAIN (field))
5052         continue;
5053
5054       if (field == NULL)
5055         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5056
5057       /* Check that the first field is valid for returning in a register.  */
5058
5059       /* ... Floats are not allowed */
5060       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5061         return true;
5062
5063       /* ... Aggregates that are not themselves valid for returning in
5064          a register are not allowed.  */
5065       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5066         return true;
5067
5068       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5069          since they are not addressable.  */
5070       for (field = DECL_CHAIN (field);
5071            field;
5072            field = DECL_CHAIN (field))
5073         {
5074           if (TREE_CODE (field) != FIELD_DECL)
5075             continue;
5076
5077           if (!DECL_BIT_FIELD_TYPE (field))
5078             return true;
5079         }
5080
5081       return false;
5082     }
5083
5084   if (TREE_CODE (type) == UNION_TYPE)
5085     {
5086       tree field;
5087
5088       /* Unions can be returned in registers if every element is
5089          integral, or can be returned in an integer register.  */
5090       for (field = TYPE_FIELDS (type);
5091            field;
5092            field = DECL_CHAIN (field))
5093         {
5094           if (TREE_CODE (field) != FIELD_DECL)
5095             continue;
5096
5097           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5098             return true;
5099
5100           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5101             return true;
5102         }
5103
5104       return false;
5105     }
5106 #endif /* not ARM_WINCE */
5107
5108   /* Return all other types in memory.  */
5109   return true;
5110 }
5111
5112 const struct pcs_attribute_arg
5113 {
5114   const char *arg;
5115   enum arm_pcs value;
5116 } pcs_attribute_args[] =
5117   {
5118     {"aapcs", ARM_PCS_AAPCS},
5119     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5120 #if 0
5121     /* We could recognize these, but changes would be needed elsewhere
5122      * to implement them.  */
5123     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5124     {"atpcs", ARM_PCS_ATPCS},
5125     {"apcs", ARM_PCS_APCS},
5126 #endif
5127     {NULL, ARM_PCS_UNKNOWN}
5128   };
5129
5130 static enum arm_pcs
5131 arm_pcs_from_attribute (tree attr)
5132 {
5133   const struct pcs_attribute_arg *ptr;
5134   const char *arg;
5135
5136   /* Get the value of the argument.  */
5137   if (TREE_VALUE (attr) == NULL_TREE
5138       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5139     return ARM_PCS_UNKNOWN;
5140
5141   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5142
5143   /* Check it against the list of known arguments.  */
5144   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5145     if (streq (arg, ptr->arg))
5146       return ptr->value;
5147
5148   /* An unrecognized interrupt type.  */
5149   return ARM_PCS_UNKNOWN;
5150 }
5151
5152 /* Get the PCS variant to use for this call.  TYPE is the function's type
5153    specification, DECL is the specific declartion.  DECL may be null if
5154    the call could be indirect or if this is a library call.  */
5155 static enum arm_pcs
5156 arm_get_pcs_model (const_tree type, const_tree decl)
5157 {
5158   bool user_convention = false;
5159   enum arm_pcs user_pcs = arm_pcs_default;
5160   tree attr;
5161
5162   gcc_assert (type);
5163
5164   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5165   if (attr)
5166     {
5167       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5168       user_convention = true;
5169     }
5170
5171   if (TARGET_AAPCS_BASED)
5172     {
5173       /* Detect varargs functions.  These always use the base rules
5174          (no argument is ever a candidate for a co-processor
5175          register).  */
5176       bool base_rules = stdarg_p (type);
5177
5178       if (user_convention)
5179         {
5180           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5181             sorry ("non-AAPCS derived PCS variant");
5182           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5183             error ("variadic functions must use the base AAPCS variant");
5184         }
5185
5186       if (base_rules)
5187         return ARM_PCS_AAPCS;
5188       else if (user_convention)
5189         return user_pcs;
5190       else if (decl && flag_unit_at_a_time)
5191         {
5192           /* Local functions never leak outside this compilation unit,
5193              so we are free to use whatever conventions are
5194              appropriate.  */
5195           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5196           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5197           if (i && i->local)
5198             return ARM_PCS_AAPCS_LOCAL;
5199         }
5200     }
5201   else if (user_convention && user_pcs != arm_pcs_default)
5202     sorry ("PCS variant");
5203
5204   /* For everything else we use the target's default.  */
5205   return arm_pcs_default;
5206 }
5207
5208
5209 static void
5210 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5211                     const_tree fntype ATTRIBUTE_UNUSED,
5212                     rtx libcall ATTRIBUTE_UNUSED,
5213                     const_tree fndecl ATTRIBUTE_UNUSED)
5214 {
5215   /* Record the unallocated VFP registers.  */
5216   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5217   pcum->aapcs_vfp_reg_alloc = 0;
5218 }
5219
5220 /* Walk down the type tree of TYPE counting consecutive base elements.
5221    If *MODEP is VOIDmode, then set it to the first valid floating point
5222    type.  If a non-floating point type is found, or if a floating point
5223    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5224    otherwise return the count in the sub-tree.  */
5225 static int
5226 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5227 {
5228   machine_mode mode;
5229   HOST_WIDE_INT size;
5230
5231   switch (TREE_CODE (type))
5232     {
5233     case REAL_TYPE:
5234       mode = TYPE_MODE (type);
5235       if (mode != DFmode && mode != SFmode)
5236         return -1;
5237
5238       if (*modep == VOIDmode)
5239         *modep = mode;
5240
5241       if (*modep == mode)
5242         return 1;
5243
5244       break;
5245
5246     case COMPLEX_TYPE:
5247       mode = TYPE_MODE (TREE_TYPE (type));
5248       if (mode != DFmode && mode != SFmode)
5249         return -1;
5250
5251       if (*modep == VOIDmode)
5252         *modep = mode;
5253
5254       if (*modep == mode)
5255         return 2;
5256
5257       break;
5258
5259     case VECTOR_TYPE:
5260       /* Use V2SImode and V4SImode as representatives of all 64-bit
5261          and 128-bit vector types, whether or not those modes are
5262          supported with the present options.  */
5263       size = int_size_in_bytes (type);
5264       switch (size)
5265         {
5266         case 8:
5267           mode = V2SImode;
5268           break;
5269         case 16:
5270           mode = V4SImode;
5271           break;
5272         default:
5273           return -1;
5274         }
5275
5276       if (*modep == VOIDmode)
5277         *modep = mode;
5278
5279       /* Vector modes are considered to be opaque: two vectors are
5280          equivalent for the purposes of being homogeneous aggregates
5281          if they are the same size.  */
5282       if (*modep == mode)
5283         return 1;
5284
5285       break;
5286
5287     case ARRAY_TYPE:
5288       {
5289         int count;
5290         tree index = TYPE_DOMAIN (type);
5291
5292         /* Can't handle incomplete types nor sizes that are not
5293            fixed.  */
5294         if (!COMPLETE_TYPE_P (type)
5295             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5296           return -1;
5297
5298         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5299         if (count == -1
5300             || !index
5301             || !TYPE_MAX_VALUE (index)
5302             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5303             || !TYPE_MIN_VALUE (index)
5304             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5305             || count < 0)
5306           return -1;
5307
5308         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5309                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5310
5311         /* There must be no padding.  */
5312         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5313           return -1;
5314
5315         return count;
5316       }
5317
5318     case RECORD_TYPE:
5319       {
5320         int count = 0;
5321         int sub_count;
5322         tree field;
5323
5324         /* Can't handle incomplete types nor sizes that are not
5325            fixed.  */
5326         if (!COMPLETE_TYPE_P (type)
5327             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5328           return -1;
5329
5330         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5331           {
5332             if (TREE_CODE (field) != FIELD_DECL)
5333               continue;
5334
5335             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5336             if (sub_count < 0)
5337               return -1;
5338             count += sub_count;
5339           }
5340
5341         /* There must be no padding.  */
5342         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5343           return -1;
5344
5345         return count;
5346       }
5347
5348     case UNION_TYPE:
5349     case QUAL_UNION_TYPE:
5350       {
5351         /* These aren't very interesting except in a degenerate case.  */
5352         int count = 0;
5353         int sub_count;
5354         tree field;
5355
5356         /* Can't handle incomplete types nor sizes that are not
5357            fixed.  */
5358         if (!COMPLETE_TYPE_P (type)
5359             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5360           return -1;
5361
5362         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5363           {
5364             if (TREE_CODE (field) != FIELD_DECL)
5365               continue;
5366
5367             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5368             if (sub_count < 0)
5369               return -1;
5370             count = count > sub_count ? count : sub_count;
5371           }
5372
5373         /* There must be no padding.  */
5374         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5375           return -1;
5376
5377         return count;
5378       }
5379
5380     default:
5381       break;
5382     }
5383
5384   return -1;
5385 }
5386
5387 /* Return true if PCS_VARIANT should use VFP registers.  */
5388 static bool
5389 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5390 {
5391   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5392     {
5393       static bool seen_thumb1_vfp = false;
5394
5395       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5396         {
5397           sorry ("Thumb-1 hard-float VFP ABI");
5398           /* sorry() is not immediately fatal, so only display this once.  */
5399           seen_thumb1_vfp = true;
5400         }
5401
5402       return true;
5403     }
5404
5405   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5406     return false;
5407
5408   return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5409           (TARGET_VFP_DOUBLE || !is_double));
5410 }
5411
5412 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5413    suitable for passing or returning in VFP registers for the PCS
5414    variant selected.  If it is, then *BASE_MODE is updated to contain
5415    a machine mode describing each element of the argument's type and
5416    *COUNT to hold the number of such elements.  */
5417 static bool
5418 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5419                                        machine_mode mode, const_tree type,
5420                                        machine_mode *base_mode, int *count)
5421 {
5422   machine_mode new_mode = VOIDmode;
5423
5424   /* If we have the type information, prefer that to working things
5425      out from the mode.  */
5426   if (type)
5427     {
5428       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5429
5430       if (ag_count > 0 && ag_count <= 4)
5431         *count = ag_count;
5432       else
5433         return false;
5434     }
5435   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5436            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5437            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5438     {
5439       *count = 1;
5440       new_mode = mode;
5441     }
5442   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5443     {
5444       *count = 2;
5445       new_mode = (mode == DCmode ? DFmode : SFmode);
5446     }
5447   else
5448     return false;
5449
5450
5451   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5452     return false;
5453
5454   *base_mode = new_mode;
5455   return true;
5456 }
5457
5458 static bool
5459 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5460                                machine_mode mode, const_tree type)
5461 {
5462   int count ATTRIBUTE_UNUSED;
5463   machine_mode ag_mode ATTRIBUTE_UNUSED;
5464
5465   if (!use_vfp_abi (pcs_variant, false))
5466     return false;
5467   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5468                                                 &ag_mode, &count);
5469 }
5470
5471 static bool
5472 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5473                              const_tree type)
5474 {
5475   if (!use_vfp_abi (pcum->pcs_variant, false))
5476     return false;
5477
5478   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5479                                                 &pcum->aapcs_vfp_rmode,
5480                                                 &pcum->aapcs_vfp_rcount);
5481 }
5482
5483 static bool
5484 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5485                     const_tree type  ATTRIBUTE_UNUSED)
5486 {
5487   int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5488   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5489   int regno;
5490
5491   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5492     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5493       {
5494         pcum->aapcs_vfp_reg_alloc = mask << regno;
5495         if (mode == BLKmode
5496             || (mode == TImode && ! TARGET_NEON)
5497             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5498           {
5499             int i;
5500             int rcount = pcum->aapcs_vfp_rcount;
5501             int rshift = shift;
5502             machine_mode rmode = pcum->aapcs_vfp_rmode;
5503             rtx par;
5504             if (!TARGET_NEON)
5505               {
5506                 /* Avoid using unsupported vector modes.  */
5507                 if (rmode == V2SImode)
5508                   rmode = DImode;
5509                 else if (rmode == V4SImode)
5510                   {
5511                     rmode = DImode;
5512                     rcount *= 2;
5513                     rshift /= 2;
5514                   }
5515               }
5516             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5517             for (i = 0; i < rcount; i++)
5518               {
5519                 rtx tmp = gen_rtx_REG (rmode,
5520                                        FIRST_VFP_REGNUM + regno + i * rshift);
5521                 tmp = gen_rtx_EXPR_LIST
5522                   (VOIDmode, tmp,
5523                    GEN_INT (i * GET_MODE_SIZE (rmode)));
5524                 XVECEXP (par, 0, i) = tmp;
5525               }
5526
5527             pcum->aapcs_reg = par;
5528           }
5529         else
5530           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5531         return true;
5532       }
5533   return false;
5534 }
5535
5536 static rtx
5537 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5538                                machine_mode mode,
5539                                const_tree type ATTRIBUTE_UNUSED)
5540 {
5541   if (!use_vfp_abi (pcs_variant, false))
5542     return NULL;
5543
5544   if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5545     {
5546       int count;
5547       machine_mode ag_mode;
5548       int i;
5549       rtx par;
5550       int shift;
5551
5552       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5553                                              &ag_mode, &count);
5554
5555       if (!TARGET_NEON)
5556         {
5557           if (ag_mode == V2SImode)
5558             ag_mode = DImode;
5559           else if (ag_mode == V4SImode)
5560             {
5561               ag_mode = DImode;
5562               count *= 2;
5563             }
5564         }
5565       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5566       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5567       for (i = 0; i < count; i++)
5568         {
5569           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5570           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5571                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5572           XVECEXP (par, 0, i) = tmp;
5573         }
5574
5575       return par;
5576     }
5577
5578   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5579 }
5580
5581 static void
5582 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5583                    machine_mode mode  ATTRIBUTE_UNUSED,
5584                    const_tree type  ATTRIBUTE_UNUSED)
5585 {
5586   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5587   pcum->aapcs_vfp_reg_alloc = 0;
5588   return;
5589 }
5590
5591 #define AAPCS_CP(X)                             \
5592   {                                             \
5593     aapcs_ ## X ## _cum_init,                   \
5594     aapcs_ ## X ## _is_call_candidate,          \
5595     aapcs_ ## X ## _allocate,                   \
5596     aapcs_ ## X ## _is_return_candidate,        \
5597     aapcs_ ## X ## _allocate_return_reg,        \
5598     aapcs_ ## X ## _advance                     \
5599   }
5600
5601 /* Table of co-processors that can be used to pass arguments in
5602    registers.  Idealy no arugment should be a candidate for more than
5603    one co-processor table entry, but the table is processed in order
5604    and stops after the first match.  If that entry then fails to put
5605    the argument into a co-processor register, the argument will go on
5606    the stack.  */
5607 static struct
5608 {
5609   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
5610   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5611
5612   /* Return true if an argument of mode MODE (or type TYPE if MODE is
5613      BLKmode) is a candidate for this co-processor's registers; this
5614      function should ignore any position-dependent state in
5615      CUMULATIVE_ARGS and only use call-type dependent information.  */
5616   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5617
5618   /* Return true if the argument does get a co-processor register; it
5619      should set aapcs_reg to an RTX of the register allocated as is
5620      required for a return from FUNCTION_ARG.  */
5621   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5622
5623   /* Return true if a result of mode MODE (or type TYPE if MODE is
5624      BLKmode) is can be returned in this co-processor's registers.  */
5625   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5626
5627   /* Allocate and return an RTX element to hold the return type of a
5628      call, this routine must not fail and will only be called if
5629      is_return_candidate returned true with the same parameters.  */
5630   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5631
5632   /* Finish processing this argument and prepare to start processing
5633      the next one.  */
5634   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5635 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5636   {
5637     AAPCS_CP(vfp)
5638   };
5639
5640 #undef AAPCS_CP
5641
5642 static int
5643 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5644                           const_tree type)
5645 {
5646   int i;
5647
5648   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5649     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5650       return i;
5651
5652   return -1;
5653 }
5654
5655 static int
5656 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5657 {
5658   /* We aren't passed a decl, so we can't check that a call is local.
5659      However, it isn't clear that that would be a win anyway, since it
5660      might limit some tail-calling opportunities.  */
5661   enum arm_pcs pcs_variant;
5662
5663   if (fntype)
5664     {
5665       const_tree fndecl = NULL_TREE;
5666
5667       if (TREE_CODE (fntype) == FUNCTION_DECL)
5668         {
5669           fndecl = fntype;
5670           fntype = TREE_TYPE (fntype);
5671         }
5672
5673       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5674     }
5675   else
5676     pcs_variant = arm_pcs_default;
5677
5678   if (pcs_variant != ARM_PCS_AAPCS)
5679     {
5680       int i;
5681
5682       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5683         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5684                                                         TYPE_MODE (type),
5685                                                         type))
5686           return i;
5687     }
5688   return -1;
5689 }
5690
5691 static rtx
5692 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5693                            const_tree fntype)
5694 {
5695   /* We aren't passed a decl, so we can't check that a call is local.
5696      However, it isn't clear that that would be a win anyway, since it
5697      might limit some tail-calling opportunities.  */
5698   enum arm_pcs pcs_variant;
5699   int unsignedp ATTRIBUTE_UNUSED;
5700
5701   if (fntype)
5702     {
5703       const_tree fndecl = NULL_TREE;
5704
5705       if (TREE_CODE (fntype) == FUNCTION_DECL)
5706         {
5707           fndecl = fntype;
5708           fntype = TREE_TYPE (fntype);
5709         }
5710
5711       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5712     }
5713   else
5714     pcs_variant = arm_pcs_default;
5715
5716   /* Promote integer types.  */
5717   if (type && INTEGRAL_TYPE_P (type))
5718     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5719
5720   if (pcs_variant != ARM_PCS_AAPCS)
5721     {
5722       int i;
5723
5724       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5725         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5726                                                         type))
5727           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5728                                                              mode, type);
5729     }
5730
5731   /* Promotes small structs returned in a register to full-word size
5732      for big-endian AAPCS.  */
5733   if (type && arm_return_in_msb (type))
5734     {
5735       HOST_WIDE_INT size = int_size_in_bytes (type);
5736       if (size % UNITS_PER_WORD != 0)
5737         {
5738           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5739           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5740         }
5741     }
5742
5743   return gen_rtx_REG (mode, R0_REGNUM);
5744 }
5745
5746 static rtx
5747 aapcs_libcall_value (machine_mode mode)
5748 {
5749   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5750       && GET_MODE_SIZE (mode) <= 4)
5751     mode = SImode;
5752
5753   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5754 }
5755
5756 /* Lay out a function argument using the AAPCS rules.  The rule
5757    numbers referred to here are those in the AAPCS.  */
5758 static void
5759 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5760                   const_tree type, bool named)
5761 {
5762   int nregs, nregs2;
5763   int ncrn;
5764
5765   /* We only need to do this once per argument.  */
5766   if (pcum->aapcs_arg_processed)
5767     return;
5768
5769   pcum->aapcs_arg_processed = true;
5770
5771   /* Special case: if named is false then we are handling an incoming
5772      anonymous argument which is on the stack.  */
5773   if (!named)
5774     return;
5775
5776   /* Is this a potential co-processor register candidate?  */
5777   if (pcum->pcs_variant != ARM_PCS_AAPCS)
5778     {
5779       int slot = aapcs_select_call_coproc (pcum, mode, type);
5780       pcum->aapcs_cprc_slot = slot;
5781
5782       /* We don't have to apply any of the rules from part B of the
5783          preparation phase, these are handled elsewhere in the
5784          compiler.  */
5785
5786       if (slot >= 0)
5787         {
5788           /* A Co-processor register candidate goes either in its own
5789              class of registers or on the stack.  */
5790           if (!pcum->aapcs_cprc_failed[slot])
5791             {
5792               /* C1.cp - Try to allocate the argument to co-processor
5793                  registers.  */
5794               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5795                 return;
5796
5797               /* C2.cp - Put the argument on the stack and note that we
5798                  can't assign any more candidates in this slot.  We also
5799                  need to note that we have allocated stack space, so that
5800                  we won't later try to split a non-cprc candidate between
5801                  core registers and the stack.  */
5802               pcum->aapcs_cprc_failed[slot] = true;
5803               pcum->can_split = false;
5804             }
5805
5806           /* We didn't get a register, so this argument goes on the
5807              stack.  */
5808           gcc_assert (pcum->can_split == false);
5809           return;
5810         }
5811     }
5812
5813   /* C3 - For double-word aligned arguments, round the NCRN up to the
5814      next even number.  */
5815   ncrn = pcum->aapcs_ncrn;
5816   if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5817     ncrn++;
5818
5819   nregs = ARM_NUM_REGS2(mode, type);
5820
5821   /* Sigh, this test should really assert that nregs > 0, but a GCC
5822      extension allows empty structs and then gives them empty size; it
5823      then allows such a structure to be passed by value.  For some of
5824      the code below we have to pretend that such an argument has
5825      non-zero size so that we 'locate' it correctly either in
5826      registers or on the stack.  */
5827   gcc_assert (nregs >= 0);
5828
5829   nregs2 = nregs ? nregs : 1;
5830
5831   /* C4 - Argument fits entirely in core registers.  */
5832   if (ncrn + nregs2 <= NUM_ARG_REGS)
5833     {
5834       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5835       pcum->aapcs_next_ncrn = ncrn + nregs;
5836       return;
5837     }
5838
5839   /* C5 - Some core registers left and there are no arguments already
5840      on the stack: split this argument between the remaining core
5841      registers and the stack.  */
5842   if (ncrn < NUM_ARG_REGS && pcum->can_split)
5843     {
5844       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5845       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5846       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5847       return;
5848     }
5849
5850   /* C6 - NCRN is set to 4.  */
5851   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5852
5853   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
5854   return;
5855 }
5856
5857 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5858    for a call to a function whose data type is FNTYPE.
5859    For a library call, FNTYPE is NULL.  */
5860 void
5861 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5862                           rtx libname,
5863                           tree fndecl ATTRIBUTE_UNUSED)
5864 {
5865   /* Long call handling.  */
5866   if (fntype)
5867     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5868   else
5869     pcum->pcs_variant = arm_pcs_default;
5870
5871   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5872     {
5873       if (arm_libcall_uses_aapcs_base (libname))
5874         pcum->pcs_variant = ARM_PCS_AAPCS;
5875
5876       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5877       pcum->aapcs_reg = NULL_RTX;
5878       pcum->aapcs_partial = 0;
5879       pcum->aapcs_arg_processed = false;
5880       pcum->aapcs_cprc_slot = -1;
5881       pcum->can_split = true;
5882
5883       if (pcum->pcs_variant != ARM_PCS_AAPCS)
5884         {
5885           int i;
5886
5887           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5888             {
5889               pcum->aapcs_cprc_failed[i] = false;
5890               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5891             }
5892         }
5893       return;
5894     }
5895
5896   /* Legacy ABIs */
5897
5898   /* On the ARM, the offset starts at 0.  */
5899   pcum->nregs = 0;
5900   pcum->iwmmxt_nregs = 0;
5901   pcum->can_split = true;
5902
5903   /* Varargs vectors are treated the same as long long.
5904      named_count avoids having to change the way arm handles 'named' */
5905   pcum->named_count = 0;
5906   pcum->nargs = 0;
5907
5908   if (TARGET_REALLY_IWMMXT && fntype)
5909     {
5910       tree fn_arg;
5911
5912       for (fn_arg = TYPE_ARG_TYPES (fntype);
5913            fn_arg;
5914            fn_arg = TREE_CHAIN (fn_arg))
5915         pcum->named_count += 1;
5916
5917       if (! pcum->named_count)
5918         pcum->named_count = INT_MAX;
5919     }
5920 }
5921
5922 /* Return true if we use LRA instead of reload pass.  */
5923 static bool
5924 arm_lra_p (void)
5925 {
5926   return arm_lra_flag;
5927 }
5928
5929 /* Return true if mode/type need doubleword alignment.  */
5930 static bool
5931 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5932 {
5933   return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5934           || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5935 }
5936
5937
5938 /* Determine where to put an argument to a function.
5939    Value is zero to push the argument on the stack,
5940    or a hard register in which to store the argument.
5941
5942    MODE is the argument's machine mode.
5943    TYPE is the data type of the argument (as a tree).
5944     This is null for libcalls where that information may
5945     not be available.
5946    CUM is a variable of type CUMULATIVE_ARGS which gives info about
5947     the preceding args and about the function being called.
5948    NAMED is nonzero if this argument is a named parameter
5949     (otherwise it is an extra parameter matching an ellipsis).
5950
5951    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5952    other arguments are passed on the stack.  If (NAMED == 0) (which happens
5953    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5954    defined), say it is passed in the stack (function_prologue will
5955    indeed make it pass in the stack if necessary).  */
5956
5957 static rtx
5958 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5959                   const_tree type, bool named)
5960 {
5961   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5962   int nregs;
5963
5964   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
5965      a call insn (op3 of a call_value insn).  */
5966   if (mode == VOIDmode)
5967     return const0_rtx;
5968
5969   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5970     {
5971       aapcs_layout_arg (pcum, mode, type, named);
5972       return pcum->aapcs_reg;
5973     }
5974
5975   /* Varargs vectors are treated the same as long long.
5976      named_count avoids having to change the way arm handles 'named' */
5977   if (TARGET_IWMMXT_ABI
5978       && arm_vector_mode_supported_p (mode)
5979       && pcum->named_count > pcum->nargs + 1)
5980     {
5981       if (pcum->iwmmxt_nregs <= 9)
5982         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5983       else
5984         {
5985           pcum->can_split = false;
5986           return NULL_RTX;
5987         }
5988     }
5989
5990   /* Put doubleword aligned quantities in even register pairs.  */
5991   if (pcum->nregs & 1
5992       && ARM_DOUBLEWORD_ALIGN
5993       && arm_needs_doubleword_align (mode, type))
5994     pcum->nregs++;
5995
5996   /* Only allow splitting an arg between regs and memory if all preceding
5997      args were allocated to regs.  For args passed by reference we only count
5998      the reference pointer.  */
5999   if (pcum->can_split)
6000     nregs = 1;
6001   else
6002     nregs = ARM_NUM_REGS2 (mode, type);
6003
6004   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6005     return NULL_RTX;
6006
6007   return gen_rtx_REG (mode, pcum->nregs);
6008 }
6009
6010 static unsigned int
6011 arm_function_arg_boundary (machine_mode mode, const_tree type)
6012 {
6013   return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6014           ? DOUBLEWORD_ALIGNMENT
6015           : PARM_BOUNDARY);
6016 }
6017
6018 static int
6019 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6020                        tree type, bool named)
6021 {
6022   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6023   int nregs = pcum->nregs;
6024
6025   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6026     {
6027       aapcs_layout_arg (pcum, mode, type, named);
6028       return pcum->aapcs_partial;
6029     }
6030
6031   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6032     return 0;
6033
6034   if (NUM_ARG_REGS > nregs
6035       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6036       && pcum->can_split)
6037     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6038
6039   return 0;
6040 }
6041
6042 /* Update the data in PCUM to advance over an argument
6043    of mode MODE and data type TYPE.
6044    (TYPE is null for libcalls where that information may not be available.)  */
6045
6046 static void
6047 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6048                           const_tree type, bool named)
6049 {
6050   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6051
6052   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6053     {
6054       aapcs_layout_arg (pcum, mode, type, named);
6055
6056       if (pcum->aapcs_cprc_slot >= 0)
6057         {
6058           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6059                                                               type);
6060           pcum->aapcs_cprc_slot = -1;
6061         }
6062
6063       /* Generic stuff.  */
6064       pcum->aapcs_arg_processed = false;
6065       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6066       pcum->aapcs_reg = NULL_RTX;
6067       pcum->aapcs_partial = 0;
6068     }
6069   else
6070     {
6071       pcum->nargs += 1;
6072       if (arm_vector_mode_supported_p (mode)
6073           && pcum->named_count > pcum->nargs
6074           && TARGET_IWMMXT_ABI)
6075         pcum->iwmmxt_nregs += 1;
6076       else
6077         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6078     }
6079 }
6080
6081 /* Variable sized types are passed by reference.  This is a GCC
6082    extension to the ARM ABI.  */
6083
6084 static bool
6085 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6086                        machine_mode mode ATTRIBUTE_UNUSED,
6087                        const_tree type, bool named ATTRIBUTE_UNUSED)
6088 {
6089   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6090 }
6091 \f
6092 /* Encode the current state of the #pragma [no_]long_calls.  */
6093 typedef enum
6094 {
6095   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6096   LONG,         /* #pragma long_calls is in effect.  */
6097   SHORT         /* #pragma no_long_calls is in effect.  */
6098 } arm_pragma_enum;
6099
6100 static arm_pragma_enum arm_pragma_long_calls = OFF;
6101
6102 void
6103 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6104 {
6105   arm_pragma_long_calls = LONG;
6106 }
6107
6108 void
6109 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6110 {
6111   arm_pragma_long_calls = SHORT;
6112 }
6113
6114 void
6115 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6116 {
6117   arm_pragma_long_calls = OFF;
6118 }
6119 \f
6120 /* Handle an attribute requiring a FUNCTION_DECL;
6121    arguments as in struct attribute_spec.handler.  */
6122 static tree
6123 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6124                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6125 {
6126   if (TREE_CODE (*node) != FUNCTION_DECL)
6127     {
6128       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6129                name);
6130       *no_add_attrs = true;
6131     }
6132
6133   return NULL_TREE;
6134 }
6135
6136 /* Handle an "interrupt" or "isr" attribute;
6137    arguments as in struct attribute_spec.handler.  */
6138 static tree
6139 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6140                           bool *no_add_attrs)
6141 {
6142   if (DECL_P (*node))
6143     {
6144       if (TREE_CODE (*node) != FUNCTION_DECL)
6145         {
6146           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6147                    name);
6148           *no_add_attrs = true;
6149         }
6150       /* FIXME: the argument if any is checked for type attributes;
6151          should it be checked for decl ones?  */
6152     }
6153   else
6154     {
6155       if (TREE_CODE (*node) == FUNCTION_TYPE
6156           || TREE_CODE (*node) == METHOD_TYPE)
6157         {
6158           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6159             {
6160               warning (OPT_Wattributes, "%qE attribute ignored",
6161                        name);
6162               *no_add_attrs = true;
6163             }
6164         }
6165       else if (TREE_CODE (*node) == POINTER_TYPE
6166                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6167                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6168                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6169         {
6170           *node = build_variant_type_copy (*node);
6171           TREE_TYPE (*node) = build_type_attribute_variant
6172             (TREE_TYPE (*node),
6173              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6174           *no_add_attrs = true;
6175         }
6176       else
6177         {
6178           /* Possibly pass this attribute on from the type to a decl.  */
6179           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6180                        | (int) ATTR_FLAG_FUNCTION_NEXT
6181                        | (int) ATTR_FLAG_ARRAY_NEXT))
6182             {
6183               *no_add_attrs = true;
6184               return tree_cons (name, args, NULL_TREE);
6185             }
6186           else
6187             {
6188               warning (OPT_Wattributes, "%qE attribute ignored",
6189                        name);
6190             }
6191         }
6192     }
6193
6194   return NULL_TREE;
6195 }
6196
6197 /* Handle a "pcs" attribute; arguments as in struct
6198    attribute_spec.handler.  */
6199 static tree
6200 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6201                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6202 {
6203   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6204     {
6205       warning (OPT_Wattributes, "%qE attribute ignored", name);
6206       *no_add_attrs = true;
6207     }
6208   return NULL_TREE;
6209 }
6210
6211 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6212 /* Handle the "notshared" attribute.  This attribute is another way of
6213    requesting hidden visibility.  ARM's compiler supports
6214    "__declspec(notshared)"; we support the same thing via an
6215    attribute.  */
6216
6217 static tree
6218 arm_handle_notshared_attribute (tree *node,
6219                                 tree name ATTRIBUTE_UNUSED,
6220                                 tree args ATTRIBUTE_UNUSED,
6221                                 int flags ATTRIBUTE_UNUSED,
6222                                 bool *no_add_attrs)
6223 {
6224   tree decl = TYPE_NAME (*node);
6225
6226   if (decl)
6227     {
6228       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6229       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6230       *no_add_attrs = false;
6231     }
6232   return NULL_TREE;
6233 }
6234 #endif
6235
6236 /* Return 0 if the attributes for two types are incompatible, 1 if they
6237    are compatible, and 2 if they are nearly compatible (which causes a
6238    warning to be generated).  */
6239 static int
6240 arm_comp_type_attributes (const_tree type1, const_tree type2)
6241 {
6242   int l1, l2, s1, s2;
6243
6244   /* Check for mismatch of non-default calling convention.  */
6245   if (TREE_CODE (type1) != FUNCTION_TYPE)
6246     return 1;
6247
6248   /* Check for mismatched call attributes.  */
6249   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6250   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6251   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6252   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6253
6254   /* Only bother to check if an attribute is defined.  */
6255   if (l1 | l2 | s1 | s2)
6256     {
6257       /* If one type has an attribute, the other must have the same attribute.  */
6258       if ((l1 != l2) || (s1 != s2))
6259         return 0;
6260
6261       /* Disallow mixed attributes.  */
6262       if ((l1 & s2) || (l2 & s1))
6263         return 0;
6264     }
6265
6266   /* Check for mismatched ISR attribute.  */
6267   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6268   if (! l1)
6269     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6270   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6271   if (! l2)
6272     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6273   if (l1 != l2)
6274     return 0;
6275
6276   return 1;
6277 }
6278
6279 /*  Assigns default attributes to newly defined type.  This is used to
6280     set short_call/long_call attributes for function types of
6281     functions defined inside corresponding #pragma scopes.  */
6282 static void
6283 arm_set_default_type_attributes (tree type)
6284 {
6285   /* Add __attribute__ ((long_call)) to all functions, when
6286      inside #pragma long_calls or __attribute__ ((short_call)),
6287      when inside #pragma no_long_calls.  */
6288   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6289     {
6290       tree type_attr_list, attr_name;
6291       type_attr_list = TYPE_ATTRIBUTES (type);
6292
6293       if (arm_pragma_long_calls == LONG)
6294         attr_name = get_identifier ("long_call");
6295       else if (arm_pragma_long_calls == SHORT)
6296         attr_name = get_identifier ("short_call");
6297       else
6298         return;
6299
6300       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6301       TYPE_ATTRIBUTES (type) = type_attr_list;
6302     }
6303 }
6304 \f
6305 /* Return true if DECL is known to be linked into section SECTION.  */
6306
6307 static bool
6308 arm_function_in_section_p (tree decl, section *section)
6309 {
6310   /* We can only be certain about functions defined in the same
6311      compilation unit.  */
6312   if (!TREE_STATIC (decl))
6313     return false;
6314
6315   /* Make sure that SYMBOL always binds to the definition in this
6316      compilation unit.  */
6317   if (!targetm.binds_local_p (decl))
6318     return false;
6319
6320   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
6321   if (!DECL_SECTION_NAME (decl))
6322     {
6323       /* Make sure that we will not create a unique section for DECL.  */
6324       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6325         return false;
6326     }
6327
6328   return function_section (decl) == section;
6329 }
6330
6331 /* Return nonzero if a 32-bit "long_call" should be generated for
6332    a call from the current function to DECL.  We generate a long_call
6333    if the function:
6334
6335         a.  has an __attribute__((long call))
6336      or b.  is within the scope of a #pragma long_calls
6337      or c.  the -mlong-calls command line switch has been specified
6338
6339    However we do not generate a long call if the function:
6340
6341         d.  has an __attribute__ ((short_call))
6342      or e.  is inside the scope of a #pragma no_long_calls
6343      or f.  is defined in the same section as the current function.  */
6344
6345 bool
6346 arm_is_long_call_p (tree decl)
6347 {
6348   tree attrs;
6349
6350   if (!decl)
6351     return TARGET_LONG_CALLS;
6352
6353   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6354   if (lookup_attribute ("short_call", attrs))
6355     return false;
6356
6357   /* For "f", be conservative, and only cater for cases in which the
6358      whole of the current function is placed in the same section.  */
6359   if (!flag_reorder_blocks_and_partition
6360       && TREE_CODE (decl) == FUNCTION_DECL
6361       && arm_function_in_section_p (decl, current_function_section ()))
6362     return false;
6363
6364   if (lookup_attribute ("long_call", attrs))
6365     return true;
6366
6367   return TARGET_LONG_CALLS;
6368 }
6369
6370 /* Return nonzero if it is ok to make a tail-call to DECL.  */
6371 static bool
6372 arm_function_ok_for_sibcall (tree decl, tree exp)
6373 {
6374   unsigned long func_type;
6375
6376   if (cfun->machine->sibcall_blocked)
6377     return false;
6378
6379   /* Never tailcall something if we are generating code for Thumb-1.  */
6380   if (TARGET_THUMB1)
6381     return false;
6382
6383   /* The PIC register is live on entry to VxWorks PLT entries, so we
6384      must make the call before restoring the PIC register.  */
6385   if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6386     return false;
6387
6388   /* If we are interworking and the function is not declared static
6389      then we can't tail-call it unless we know that it exists in this
6390      compilation unit (since it might be a Thumb routine).  */
6391   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6392       && !TREE_ASM_WRITTEN (decl))
6393     return false;
6394
6395   func_type = arm_current_func_type ();
6396   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
6397   if (IS_INTERRUPT (func_type))
6398     return false;
6399
6400   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6401     {
6402       /* Check that the return value locations are the same.  For
6403          example that we aren't returning a value from the sibling in
6404          a VFP register but then need to transfer it to a core
6405          register.  */
6406       rtx a, b;
6407
6408       a = arm_function_value (TREE_TYPE (exp), decl, false);
6409       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6410                               cfun->decl, false);
6411       if (!rtx_equal_p (a, b))
6412         return false;
6413     }
6414
6415   /* Never tailcall if function may be called with a misaligned SP.  */
6416   if (IS_STACKALIGN (func_type))
6417     return false;
6418
6419   /* The AAPCS says that, on bare-metal, calls to unresolved weak
6420      references should become a NOP.  Don't convert such calls into
6421      sibling calls.  */
6422   if (TARGET_AAPCS_BASED
6423       && arm_abi == ARM_ABI_AAPCS
6424       && decl
6425       && DECL_WEAK (decl))
6426     return false;
6427
6428   /* Everything else is ok.  */
6429   return true;
6430 }
6431
6432 \f
6433 /* Addressing mode support functions.  */
6434
6435 /* Return nonzero if X is a legitimate immediate operand when compiling
6436    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
6437 int
6438 legitimate_pic_operand_p (rtx x)
6439 {
6440   if (GET_CODE (x) == SYMBOL_REF
6441       || (GET_CODE (x) == CONST
6442           && GET_CODE (XEXP (x, 0)) == PLUS
6443           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6444     return 0;
6445
6446   return 1;
6447 }
6448
6449 /* Record that the current function needs a PIC register.  Initialize
6450    cfun->machine->pic_reg if we have not already done so.  */
6451
6452 static void
6453 require_pic_register (void)
6454 {
6455   /* A lot of the logic here is made obscure by the fact that this
6456      routine gets called as part of the rtx cost estimation process.
6457      We don't want those calls to affect any assumptions about the real
6458      function; and further, we can't call entry_of_function() until we
6459      start the real expansion process.  */
6460   if (!crtl->uses_pic_offset_table)
6461     {
6462       gcc_assert (can_create_pseudo_p ());
6463       if (arm_pic_register != INVALID_REGNUM
6464           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6465         {
6466           if (!cfun->machine->pic_reg)
6467             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6468
6469           /* Play games to avoid marking the function as needing pic
6470              if we are being called as part of the cost-estimation
6471              process.  */
6472           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6473             crtl->uses_pic_offset_table = 1;
6474         }
6475       else
6476         {
6477           rtx_insn *seq, *insn;
6478
6479           if (!cfun->machine->pic_reg)
6480             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6481
6482           /* Play games to avoid marking the function as needing pic
6483              if we are being called as part of the cost-estimation
6484              process.  */
6485           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6486             {
6487               crtl->uses_pic_offset_table = 1;
6488               start_sequence ();
6489
6490               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6491                   && arm_pic_register > LAST_LO_REGNUM)
6492                 emit_move_insn (cfun->machine->pic_reg,
6493                                 gen_rtx_REG (Pmode, arm_pic_register));
6494               else
6495                 arm_load_pic_register (0UL);
6496
6497               seq = get_insns ();
6498               end_sequence ();
6499
6500               for (insn = seq; insn; insn = NEXT_INSN (insn))
6501                 if (INSN_P (insn))
6502                   INSN_LOCATION (insn) = prologue_location;
6503
6504               /* We can be called during expansion of PHI nodes, where
6505                  we can't yet emit instructions directly in the final
6506                  insn stream.  Queue the insns on the entry edge, they will
6507                  be committed after everything else is expanded.  */
6508               insert_insn_on_edge (seq,
6509                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6510             }
6511         }
6512     }
6513 }
6514
6515 rtx
6516 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6517 {
6518   if (GET_CODE (orig) == SYMBOL_REF
6519       || GET_CODE (orig) == LABEL_REF)
6520     {
6521       rtx insn;
6522
6523       if (reg == 0)
6524         {
6525           gcc_assert (can_create_pseudo_p ());
6526           reg = gen_reg_rtx (Pmode);
6527         }
6528
6529       /* VxWorks does not impose a fixed gap between segments; the run-time
6530          gap can be different from the object-file gap.  We therefore can't
6531          use GOTOFF unless we are absolutely sure that the symbol is in the
6532          same segment as the GOT.  Unfortunately, the flexibility of linker
6533          scripts means that we can't be sure of that in general, so assume
6534          that GOTOFF is never valid on VxWorks.  */
6535       if ((GET_CODE (orig) == LABEL_REF
6536            || (GET_CODE (orig) == SYMBOL_REF &&
6537                SYMBOL_REF_LOCAL_P (orig)))
6538           && NEED_GOT_RELOC
6539           && arm_pic_data_is_text_relative)
6540         insn = arm_pic_static_addr (orig, reg);
6541       else
6542         {
6543           rtx pat;
6544           rtx mem;
6545
6546           /* If this function doesn't have a pic register, create one now.  */
6547           require_pic_register ();
6548
6549           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6550
6551           /* Make the MEM as close to a constant as possible.  */
6552           mem = SET_SRC (pat);
6553           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6554           MEM_READONLY_P (mem) = 1;
6555           MEM_NOTRAP_P (mem) = 1;
6556
6557           insn = emit_insn (pat);
6558         }
6559
6560       /* Put a REG_EQUAL note on this insn, so that it can be optimized
6561          by loop.  */
6562       set_unique_reg_note (insn, REG_EQUAL, orig);
6563
6564       return reg;
6565     }
6566   else if (GET_CODE (orig) == CONST)
6567     {
6568       rtx base, offset;
6569
6570       if (GET_CODE (XEXP (orig, 0)) == PLUS
6571           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6572         return orig;
6573
6574       /* Handle the case where we have: const (UNSPEC_TLS).  */
6575       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6576           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6577         return orig;
6578
6579       /* Handle the case where we have:
6580          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
6581          CONST_INT.  */
6582       if (GET_CODE (XEXP (orig, 0)) == PLUS
6583           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6584           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6585         {
6586           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6587           return orig;
6588         }
6589
6590       if (reg == 0)
6591         {
6592           gcc_assert (can_create_pseudo_p ());
6593           reg = gen_reg_rtx (Pmode);
6594         }
6595
6596       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6597
6598       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6599       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6600                                        base == reg ? 0 : reg);
6601
6602       if (CONST_INT_P (offset))
6603         {
6604           /* The base register doesn't really matter, we only want to
6605              test the index for the appropriate mode.  */
6606           if (!arm_legitimate_index_p (mode, offset, SET, 0))
6607             {
6608               gcc_assert (can_create_pseudo_p ());
6609               offset = force_reg (Pmode, offset);
6610             }
6611
6612           if (CONST_INT_P (offset))
6613             return plus_constant (Pmode, base, INTVAL (offset));
6614         }
6615
6616       if (GET_MODE_SIZE (mode) > 4
6617           && (GET_MODE_CLASS (mode) == MODE_INT
6618               || TARGET_SOFT_FLOAT))
6619         {
6620           emit_insn (gen_addsi3 (reg, base, offset));
6621           return reg;
6622         }
6623
6624       return gen_rtx_PLUS (Pmode, base, offset);
6625     }
6626
6627   return orig;
6628 }
6629
6630
6631 /* Find a spare register to use during the prolog of a function.  */
6632
6633 static int
6634 thumb_find_work_register (unsigned long pushed_regs_mask)
6635 {
6636   int reg;
6637
6638   /* Check the argument registers first as these are call-used.  The
6639      register allocation order means that sometimes r3 might be used
6640      but earlier argument registers might not, so check them all.  */
6641   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6642     if (!df_regs_ever_live_p (reg))
6643       return reg;
6644
6645   /* Before going on to check the call-saved registers we can try a couple
6646      more ways of deducing that r3 is available.  The first is when we are
6647      pushing anonymous arguments onto the stack and we have less than 4
6648      registers worth of fixed arguments(*).  In this case r3 will be part of
6649      the variable argument list and so we can be sure that it will be
6650      pushed right at the start of the function.  Hence it will be available
6651      for the rest of the prologue.
6652      (*): ie crtl->args.pretend_args_size is greater than 0.  */
6653   if (cfun->machine->uses_anonymous_args
6654       && crtl->args.pretend_args_size > 0)
6655     return LAST_ARG_REGNUM;
6656
6657   /* The other case is when we have fixed arguments but less than 4 registers
6658      worth.  In this case r3 might be used in the body of the function, but
6659      it is not being used to convey an argument into the function.  In theory
6660      we could just check crtl->args.size to see how many bytes are
6661      being passed in argument registers, but it seems that it is unreliable.
6662      Sometimes it will have the value 0 when in fact arguments are being
6663      passed.  (See testcase execute/20021111-1.c for an example).  So we also
6664      check the args_info.nregs field as well.  The problem with this field is
6665      that it makes no allowances for arguments that are passed to the
6666      function but which are not used.  Hence we could miss an opportunity
6667      when a function has an unused argument in r3.  But it is better to be
6668      safe than to be sorry.  */
6669   if (! cfun->machine->uses_anonymous_args
6670       && crtl->args.size >= 0
6671       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6672       && (TARGET_AAPCS_BASED
6673           ? crtl->args.info.aapcs_ncrn < 4
6674           : crtl->args.info.nregs < 4))
6675     return LAST_ARG_REGNUM;
6676
6677   /* Otherwise look for a call-saved register that is going to be pushed.  */
6678   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6679     if (pushed_regs_mask & (1 << reg))
6680       return reg;
6681
6682   if (TARGET_THUMB2)
6683     {
6684       /* Thumb-2 can use high regs.  */
6685       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6686         if (pushed_regs_mask & (1 << reg))
6687           return reg;
6688     }
6689   /* Something went wrong - thumb_compute_save_reg_mask()
6690      should have arranged for a suitable register to be pushed.  */
6691   gcc_unreachable ();
6692 }
6693
6694 static GTY(()) int pic_labelno;
6695
6696 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
6697    low register.  */
6698
6699 void
6700 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6701 {
6702   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6703
6704   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6705     return;
6706
6707   gcc_assert (flag_pic);
6708
6709   pic_reg = cfun->machine->pic_reg;
6710   if (TARGET_VXWORKS_RTP)
6711     {
6712       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6713       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6714       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6715
6716       emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6717
6718       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6719       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6720     }
6721   else
6722     {
6723       /* We use an UNSPEC rather than a LABEL_REF because this label
6724          never appears in the code stream.  */
6725
6726       labelno = GEN_INT (pic_labelno++);
6727       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6728       l1 = gen_rtx_CONST (VOIDmode, l1);
6729
6730       /* On the ARM the PC register contains 'dot + 8' at the time of the
6731          addition, on the Thumb it is 'dot + 4'.  */
6732       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6733       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6734                                 UNSPEC_GOTSYM_OFF);
6735       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6736
6737       if (TARGET_32BIT)
6738         {
6739           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6740         }
6741       else /* TARGET_THUMB1 */
6742         {
6743           if (arm_pic_register != INVALID_REGNUM
6744               && REGNO (pic_reg) > LAST_LO_REGNUM)
6745             {
6746               /* We will have pushed the pic register, so we should always be
6747                  able to find a work register.  */
6748               pic_tmp = gen_rtx_REG (SImode,
6749                                      thumb_find_work_register (saved_regs));
6750               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6751               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6752               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6753             }
6754           else if (arm_pic_register != INVALID_REGNUM
6755                    && arm_pic_register > LAST_LO_REGNUM
6756                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
6757             {
6758               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6759               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6760               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6761             }
6762           else
6763             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6764         }
6765     }
6766
6767   /* Need to emit this whether or not we obey regdecls,
6768      since setjmp/longjmp can cause life info to screw up.  */
6769   emit_use (pic_reg);
6770 }
6771
6772 /* Generate code to load the address of a static var when flag_pic is set.  */
6773 static rtx
6774 arm_pic_static_addr (rtx orig, rtx reg)
6775 {
6776   rtx l1, labelno, offset_rtx, insn;
6777
6778   gcc_assert (flag_pic);
6779
6780   /* We use an UNSPEC rather than a LABEL_REF because this label
6781      never appears in the code stream.  */
6782   labelno = GEN_INT (pic_labelno++);
6783   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6784   l1 = gen_rtx_CONST (VOIDmode, l1);
6785
6786   /* On the ARM the PC register contains 'dot + 8' at the time of the
6787      addition, on the Thumb it is 'dot + 4'.  */
6788   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6789   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6790                                UNSPEC_SYMBOL_OFFSET);
6791   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6792
6793   insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6794   return insn;
6795 }
6796
6797 /* Return nonzero if X is valid as an ARM state addressing register.  */
6798 static int
6799 arm_address_register_rtx_p (rtx x, int strict_p)
6800 {
6801   int regno;
6802
6803   if (!REG_P (x))
6804     return 0;
6805
6806   regno = REGNO (x);
6807
6808   if (strict_p)
6809     return ARM_REGNO_OK_FOR_BASE_P (regno);
6810
6811   return (regno <= LAST_ARM_REGNUM
6812           || regno >= FIRST_PSEUDO_REGISTER
6813           || regno == FRAME_POINTER_REGNUM
6814           || regno == ARG_POINTER_REGNUM);
6815 }
6816
6817 /* Return TRUE if this rtx is the difference of a symbol and a label,
6818    and will reduce to a PC-relative relocation in the object file.
6819    Expressions like this can be left alone when generating PIC, rather
6820    than forced through the GOT.  */
6821 static int
6822 pcrel_constant_p (rtx x)
6823 {
6824   if (GET_CODE (x) == MINUS)
6825     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6826
6827   return FALSE;
6828 }
6829
6830 /* Return true if X will surely end up in an index register after next
6831    splitting pass.  */
6832 static bool
6833 will_be_in_index_register (const_rtx x)
6834 {
6835   /* arm.md: calculate_pic_address will split this into a register.  */
6836   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6837 }
6838
6839 /* Return nonzero if X is a valid ARM state address operand.  */
6840 int
6841 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6842                                 int strict_p)
6843 {
6844   bool use_ldrd;
6845   enum rtx_code code = GET_CODE (x);
6846
6847   if (arm_address_register_rtx_p (x, strict_p))
6848     return 1;
6849
6850   use_ldrd = (TARGET_LDRD
6851               && (mode == DImode
6852                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6853
6854   if (code == POST_INC || code == PRE_DEC
6855       || ((code == PRE_INC || code == POST_DEC)
6856           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6857     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6858
6859   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6860            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6861            && GET_CODE (XEXP (x, 1)) == PLUS
6862            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6863     {
6864       rtx addend = XEXP (XEXP (x, 1), 1);
6865
6866       /* Don't allow ldrd post increment by register because it's hard
6867          to fixup invalid register choices.  */
6868       if (use_ldrd
6869           && GET_CODE (x) == POST_MODIFY
6870           && REG_P (addend))
6871         return 0;
6872
6873       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6874               && arm_legitimate_index_p (mode, addend, outer, strict_p));
6875     }
6876
6877   /* After reload constants split into minipools will have addresses
6878      from a LABEL_REF.  */
6879   else if (reload_completed
6880            && (code == LABEL_REF
6881                || (code == CONST
6882                    && GET_CODE (XEXP (x, 0)) == PLUS
6883                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6884                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6885     return 1;
6886
6887   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6888     return 0;
6889
6890   else if (code == PLUS)
6891     {
6892       rtx xop0 = XEXP (x, 0);
6893       rtx xop1 = XEXP (x, 1);
6894
6895       return ((arm_address_register_rtx_p (xop0, strict_p)
6896                && ((CONST_INT_P (xop1)
6897                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6898                    || (!strict_p && will_be_in_index_register (xop1))))
6899               || (arm_address_register_rtx_p (xop1, strict_p)
6900                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6901     }
6902
6903 #if 0
6904   /* Reload currently can't handle MINUS, so disable this for now */
6905   else if (GET_CODE (x) == MINUS)
6906     {
6907       rtx xop0 = XEXP (x, 0);
6908       rtx xop1 = XEXP (x, 1);
6909
6910       return (arm_address_register_rtx_p (xop0, strict_p)
6911               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6912     }
6913 #endif
6914
6915   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6916            && code == SYMBOL_REF
6917            && CONSTANT_POOL_ADDRESS_P (x)
6918            && ! (flag_pic
6919                  && symbol_mentioned_p (get_pool_constant (x))
6920                  && ! pcrel_constant_p (get_pool_constant (x))))
6921     return 1;
6922
6923   return 0;
6924 }
6925
6926 /* Return nonzero if X is a valid Thumb-2 address operand.  */
6927 static int
6928 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6929 {
6930   bool use_ldrd;
6931   enum rtx_code code = GET_CODE (x);
6932
6933   if (arm_address_register_rtx_p (x, strict_p))
6934     return 1;
6935
6936   use_ldrd = (TARGET_LDRD
6937               && (mode == DImode
6938                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6939
6940   if (code == POST_INC || code == PRE_DEC
6941       || ((code == PRE_INC || code == POST_DEC)
6942           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6943     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6944
6945   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6946            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6947            && GET_CODE (XEXP (x, 1)) == PLUS
6948            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6949     {
6950       /* Thumb-2 only has autoincrement by constant.  */
6951       rtx addend = XEXP (XEXP (x, 1), 1);
6952       HOST_WIDE_INT offset;
6953
6954       if (!CONST_INT_P (addend))
6955         return 0;
6956
6957       offset = INTVAL(addend);
6958       if (GET_MODE_SIZE (mode) <= 4)
6959         return (offset > -256 && offset < 256);
6960
6961       return (use_ldrd && offset > -1024 && offset < 1024
6962               && (offset & 3) == 0);
6963     }
6964
6965   /* After reload constants split into minipools will have addresses
6966      from a LABEL_REF.  */
6967   else if (reload_completed
6968            && (code == LABEL_REF
6969                || (code == CONST
6970                    && GET_CODE (XEXP (x, 0)) == PLUS
6971                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6972                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6973     return 1;
6974
6975   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6976     return 0;
6977
6978   else if (code == PLUS)
6979     {
6980       rtx xop0 = XEXP (x, 0);
6981       rtx xop1 = XEXP (x, 1);
6982
6983       return ((arm_address_register_rtx_p (xop0, strict_p)
6984                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6985                    || (!strict_p && will_be_in_index_register (xop1))))
6986               || (arm_address_register_rtx_p (xop1, strict_p)
6987                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6988     }
6989
6990   /* Normally we can assign constant values to target registers without
6991      the help of constant pool.  But there are cases we have to use constant
6992      pool like:
6993      1) assign a label to register.
6994      2) sign-extend a 8bit value to 32bit and then assign to register.
6995
6996      Constant pool access in format:
6997      (set (reg r0) (mem (symbol_ref (".LC0"))))
6998      will cause the use of literal pool (later in function arm_reorg).
6999      So here we mark such format as an invalid format, then the compiler
7000      will adjust it into:
7001      (set (reg r0) (symbol_ref (".LC0")))
7002      (set (reg r0) (mem (reg r0))).
7003      No extra register is required, and (mem (reg r0)) won't cause the use
7004      of literal pools.  */
7005   else if (arm_disable_literal_pool && code == SYMBOL_REF
7006            && CONSTANT_POOL_ADDRESS_P (x))
7007     return 0;
7008
7009   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7010            && code == SYMBOL_REF
7011            && CONSTANT_POOL_ADDRESS_P (x)
7012            && ! (flag_pic
7013                  && symbol_mentioned_p (get_pool_constant (x))
7014                  && ! pcrel_constant_p (get_pool_constant (x))))
7015     return 1;
7016
7017   return 0;
7018 }
7019
7020 /* Return nonzero if INDEX is valid for an address index operand in
7021    ARM state.  */
7022 static int
7023 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7024                         int strict_p)
7025 {
7026   HOST_WIDE_INT range;
7027   enum rtx_code code = GET_CODE (index);
7028
7029   /* Standard coprocessor addressing modes.  */
7030   if (TARGET_HARD_FLOAT
7031       && TARGET_VFP
7032       && (mode == SFmode || mode == DFmode))
7033     return (code == CONST_INT && INTVAL (index) < 1024
7034             && INTVAL (index) > -1024
7035             && (INTVAL (index) & 3) == 0);
7036
7037   /* For quad modes, we restrict the constant offset to be slightly less
7038      than what the instruction format permits.  We do this because for
7039      quad mode moves, we will actually decompose them into two separate
7040      double-mode reads or writes.  INDEX must therefore be a valid
7041      (double-mode) offset and so should INDEX+8.  */
7042   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7043     return (code == CONST_INT
7044             && INTVAL (index) < 1016
7045             && INTVAL (index) > -1024
7046             && (INTVAL (index) & 3) == 0);
7047
7048   /* We have no such constraint on double mode offsets, so we permit the
7049      full range of the instruction format.  */
7050   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7051     return (code == CONST_INT
7052             && INTVAL (index) < 1024
7053             && INTVAL (index) > -1024
7054             && (INTVAL (index) & 3) == 0);
7055
7056   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7057     return (code == CONST_INT
7058             && INTVAL (index) < 1024
7059             && INTVAL (index) > -1024
7060             && (INTVAL (index) & 3) == 0);
7061
7062   if (arm_address_register_rtx_p (index, strict_p)
7063       && (GET_MODE_SIZE (mode) <= 4))
7064     return 1;
7065
7066   if (mode == DImode || mode == DFmode)
7067     {
7068       if (code == CONST_INT)
7069         {
7070           HOST_WIDE_INT val = INTVAL (index);
7071
7072           if (TARGET_LDRD)
7073             return val > -256 && val < 256;
7074           else
7075             return val > -4096 && val < 4092;
7076         }
7077
7078       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7079     }
7080
7081   if (GET_MODE_SIZE (mode) <= 4
7082       && ! (arm_arch4
7083             && (mode == HImode
7084                 || mode == HFmode
7085                 || (mode == QImode && outer == SIGN_EXTEND))))
7086     {
7087       if (code == MULT)
7088         {
7089           rtx xiop0 = XEXP (index, 0);
7090           rtx xiop1 = XEXP (index, 1);
7091
7092           return ((arm_address_register_rtx_p (xiop0, strict_p)
7093                    && power_of_two_operand (xiop1, SImode))
7094                   || (arm_address_register_rtx_p (xiop1, strict_p)
7095                       && power_of_two_operand (xiop0, SImode)));
7096         }
7097       else if (code == LSHIFTRT || code == ASHIFTRT
7098                || code == ASHIFT || code == ROTATERT)
7099         {
7100           rtx op = XEXP (index, 1);
7101
7102           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7103                   && CONST_INT_P (op)
7104                   && INTVAL (op) > 0
7105                   && INTVAL (op) <= 31);
7106         }
7107     }
7108
7109   /* For ARM v4 we may be doing a sign-extend operation during the
7110      load.  */
7111   if (arm_arch4)
7112     {
7113       if (mode == HImode
7114           || mode == HFmode
7115           || (outer == SIGN_EXTEND && mode == QImode))
7116         range = 256;
7117       else
7118         range = 4096;
7119     }
7120   else
7121     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7122
7123   return (code == CONST_INT
7124           && INTVAL (index) < range
7125           && INTVAL (index) > -range);
7126 }
7127
7128 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7129    index operand.  i.e. 1, 2, 4 or 8.  */
7130 static bool
7131 thumb2_index_mul_operand (rtx op)
7132 {
7133   HOST_WIDE_INT val;
7134
7135   if (!CONST_INT_P (op))
7136     return false;
7137
7138   val = INTVAL(op);
7139   return (val == 1 || val == 2 || val == 4 || val == 8);
7140 }
7141
7142 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
7143 static int
7144 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7145 {
7146   enum rtx_code code = GET_CODE (index);
7147
7148   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
7149   /* Standard coprocessor addressing modes.  */
7150   if (TARGET_HARD_FLOAT
7151       && TARGET_VFP
7152       && (mode == SFmode || mode == DFmode))
7153     return (code == CONST_INT && INTVAL (index) < 1024
7154             /* Thumb-2 allows only > -256 index range for it's core register
7155                load/stores. Since we allow SF/DF in core registers, we have
7156                to use the intersection between -256~4096 (core) and -1024~1024
7157                (coprocessor).  */
7158             && INTVAL (index) > -256
7159             && (INTVAL (index) & 3) == 0);
7160
7161   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7162     {
7163       /* For DImode assume values will usually live in core regs
7164          and only allow LDRD addressing modes.  */
7165       if (!TARGET_LDRD || mode != DImode)
7166         return (code == CONST_INT
7167                 && INTVAL (index) < 1024
7168                 && INTVAL (index) > -1024
7169                 && (INTVAL (index) & 3) == 0);
7170     }
7171
7172   /* For quad modes, we restrict the constant offset to be slightly less
7173      than what the instruction format permits.  We do this because for
7174      quad mode moves, we will actually decompose them into two separate
7175      double-mode reads or writes.  INDEX must therefore be a valid
7176      (double-mode) offset and so should INDEX+8.  */
7177   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7178     return (code == CONST_INT
7179             && INTVAL (index) < 1016
7180             && INTVAL (index) > -1024
7181             && (INTVAL (index) & 3) == 0);
7182
7183   /* We have no such constraint on double mode offsets, so we permit the
7184      full range of the instruction format.  */
7185   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7186     return (code == CONST_INT
7187             && INTVAL (index) < 1024
7188             && INTVAL (index) > -1024
7189             && (INTVAL (index) & 3) == 0);
7190
7191   if (arm_address_register_rtx_p (index, strict_p)
7192       && (GET_MODE_SIZE (mode) <= 4))
7193     return 1;
7194
7195   if (mode == DImode || mode == DFmode)
7196     {
7197       if (code == CONST_INT)
7198         {
7199           HOST_WIDE_INT val = INTVAL (index);
7200           /* ??? Can we assume ldrd for thumb2?  */
7201           /* Thumb-2 ldrd only has reg+const addressing modes.  */
7202           /* ldrd supports offsets of +-1020.
7203              However the ldr fallback does not.  */
7204           return val > -256 && val < 256 && (val & 3) == 0;
7205         }
7206       else
7207         return 0;
7208     }
7209
7210   if (code == MULT)
7211     {
7212       rtx xiop0 = XEXP (index, 0);
7213       rtx xiop1 = XEXP (index, 1);
7214
7215       return ((arm_address_register_rtx_p (xiop0, strict_p)
7216                && thumb2_index_mul_operand (xiop1))
7217               || (arm_address_register_rtx_p (xiop1, strict_p)
7218                   && thumb2_index_mul_operand (xiop0)));
7219     }
7220   else if (code == ASHIFT)
7221     {
7222       rtx op = XEXP (index, 1);
7223
7224       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7225               && CONST_INT_P (op)
7226               && INTVAL (op) > 0
7227               && INTVAL (op) <= 3);
7228     }
7229
7230   return (code == CONST_INT
7231           && INTVAL (index) < 4096
7232           && INTVAL (index) > -256);
7233 }
7234
7235 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
7236 static int
7237 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7238 {
7239   int regno;
7240
7241   if (!REG_P (x))
7242     return 0;
7243
7244   regno = REGNO (x);
7245
7246   if (strict_p)
7247     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7248
7249   return (regno <= LAST_LO_REGNUM
7250           || regno > LAST_VIRTUAL_REGISTER
7251           || regno == FRAME_POINTER_REGNUM
7252           || (GET_MODE_SIZE (mode) >= 4
7253               && (regno == STACK_POINTER_REGNUM
7254                   || regno >= FIRST_PSEUDO_REGISTER
7255                   || x == hard_frame_pointer_rtx
7256                   || x == arg_pointer_rtx)));
7257 }
7258
7259 /* Return nonzero if x is a legitimate index register.  This is the case
7260    for any base register that can access a QImode object.  */
7261 inline static int
7262 thumb1_index_register_rtx_p (rtx x, int strict_p)
7263 {
7264   return thumb1_base_register_rtx_p (x, QImode, strict_p);
7265 }
7266
7267 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7268
7269    The AP may be eliminated to either the SP or the FP, so we use the
7270    least common denominator, e.g. SImode, and offsets from 0 to 64.
7271
7272    ??? Verify whether the above is the right approach.
7273
7274    ??? Also, the FP may be eliminated to the SP, so perhaps that
7275    needs special handling also.
7276
7277    ??? Look at how the mips16 port solves this problem.  It probably uses
7278    better ways to solve some of these problems.
7279
7280    Although it is not incorrect, we don't accept QImode and HImode
7281    addresses based on the frame pointer or arg pointer until the
7282    reload pass starts.  This is so that eliminating such addresses
7283    into stack based ones won't produce impossible code.  */
7284 int
7285 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7286 {
7287   /* ??? Not clear if this is right.  Experiment.  */
7288   if (GET_MODE_SIZE (mode) < 4
7289       && !(reload_in_progress || reload_completed)
7290       && (reg_mentioned_p (frame_pointer_rtx, x)
7291           || reg_mentioned_p (arg_pointer_rtx, x)
7292           || reg_mentioned_p (virtual_incoming_args_rtx, x)
7293           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7294           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7295           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7296     return 0;
7297
7298   /* Accept any base register.  SP only in SImode or larger.  */
7299   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7300     return 1;
7301
7302   /* This is PC relative data before arm_reorg runs.  */
7303   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7304            && GET_CODE (x) == SYMBOL_REF
7305            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7306     return 1;
7307
7308   /* This is PC relative data after arm_reorg runs.  */
7309   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7310            && reload_completed
7311            && (GET_CODE (x) == LABEL_REF
7312                || (GET_CODE (x) == CONST
7313                    && GET_CODE (XEXP (x, 0)) == PLUS
7314                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7315                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7316     return 1;
7317
7318   /* Post-inc indexing only supported for SImode and larger.  */
7319   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7320            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7321     return 1;
7322
7323   else if (GET_CODE (x) == PLUS)
7324     {
7325       /* REG+REG address can be any two index registers.  */
7326       /* We disallow FRAME+REG addressing since we know that FRAME
7327          will be replaced with STACK, and SP relative addressing only
7328          permits SP+OFFSET.  */
7329       if (GET_MODE_SIZE (mode) <= 4
7330           && XEXP (x, 0) != frame_pointer_rtx
7331           && XEXP (x, 1) != frame_pointer_rtx
7332           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7333           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7334               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7335         return 1;
7336
7337       /* REG+const has 5-7 bit offset for non-SP registers.  */
7338       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7339                 || XEXP (x, 0) == arg_pointer_rtx)
7340                && CONST_INT_P (XEXP (x, 1))
7341                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7342         return 1;
7343
7344       /* REG+const has 10-bit offset for SP, but only SImode and
7345          larger is supported.  */
7346       /* ??? Should probably check for DI/DFmode overflow here
7347          just like GO_IF_LEGITIMATE_OFFSET does.  */
7348       else if (REG_P (XEXP (x, 0))
7349                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7350                && GET_MODE_SIZE (mode) >= 4
7351                && CONST_INT_P (XEXP (x, 1))
7352                && INTVAL (XEXP (x, 1)) >= 0
7353                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7354                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7355         return 1;
7356
7357       else if (REG_P (XEXP (x, 0))
7358                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7359                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7360                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7361                        && REGNO (XEXP (x, 0))
7362                           <= LAST_VIRTUAL_POINTER_REGISTER))
7363                && GET_MODE_SIZE (mode) >= 4
7364                && CONST_INT_P (XEXP (x, 1))
7365                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7366         return 1;
7367     }
7368
7369   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7370            && GET_MODE_SIZE (mode) == 4
7371            && GET_CODE (x) == SYMBOL_REF
7372            && CONSTANT_POOL_ADDRESS_P (x)
7373            && ! (flag_pic
7374                  && symbol_mentioned_p (get_pool_constant (x))
7375                  && ! pcrel_constant_p (get_pool_constant (x))))
7376     return 1;
7377
7378   return 0;
7379 }
7380
7381 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7382    instruction of mode MODE.  */
7383 int
7384 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7385 {
7386   switch (GET_MODE_SIZE (mode))
7387     {
7388     case 1:
7389       return val >= 0 && val < 32;
7390
7391     case 2:
7392       return val >= 0 && val < 64 && (val & 1) == 0;
7393
7394     default:
7395       return (val >= 0
7396               && (val + GET_MODE_SIZE (mode)) <= 128
7397               && (val & 3) == 0);
7398     }
7399 }
7400
7401 bool
7402 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7403 {
7404   if (TARGET_ARM)
7405     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7406   else if (TARGET_THUMB2)
7407     return thumb2_legitimate_address_p (mode, x, strict_p);
7408   else /* if (TARGET_THUMB1) */
7409     return thumb1_legitimate_address_p (mode, x, strict_p);
7410 }
7411
7412 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7413
7414    Given an rtx X being reloaded into a reg required to be
7415    in class CLASS, return the class of reg to actually use.
7416    In general this is just CLASS, but for the Thumb core registers and
7417    immediate constants we prefer a LO_REGS class or a subset.  */
7418
7419 static reg_class_t
7420 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7421 {
7422   if (TARGET_32BIT)
7423     return rclass;
7424   else
7425     {
7426       if (rclass == GENERAL_REGS)
7427         return LO_REGS;
7428       else
7429         return rclass;
7430     }
7431 }
7432
7433 /* Build the SYMBOL_REF for __tls_get_addr.  */
7434
7435 static GTY(()) rtx tls_get_addr_libfunc;
7436
7437 static rtx
7438 get_tls_get_addr (void)
7439 {
7440   if (!tls_get_addr_libfunc)
7441     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7442   return tls_get_addr_libfunc;
7443 }
7444
7445 rtx
7446 arm_load_tp (rtx target)
7447 {
7448   if (!target)
7449     target = gen_reg_rtx (SImode);
7450
7451   if (TARGET_HARD_TP)
7452     {
7453       /* Can return in any reg.  */
7454       emit_insn (gen_load_tp_hard (target));
7455     }
7456   else
7457     {
7458       /* Always returned in r0.  Immediately copy the result into a pseudo,
7459          otherwise other uses of r0 (e.g. setting up function arguments) may
7460          clobber the value.  */
7461
7462       rtx tmp;
7463
7464       emit_insn (gen_load_tp_soft ());
7465
7466       tmp = gen_rtx_REG (SImode, 0);
7467       emit_move_insn (target, tmp);
7468     }
7469   return target;
7470 }
7471
7472 static rtx
7473 load_tls_operand (rtx x, rtx reg)
7474 {
7475   rtx tmp;
7476
7477   if (reg == NULL_RTX)
7478     reg = gen_reg_rtx (SImode);
7479
7480   tmp = gen_rtx_CONST (SImode, x);
7481
7482   emit_move_insn (reg, tmp);
7483
7484   return reg;
7485 }
7486
7487 static rtx
7488 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7489 {
7490   rtx insns, label, labelno, sum;
7491
7492   gcc_assert (reloc != TLS_DESCSEQ);
7493   start_sequence ();
7494
7495   labelno = GEN_INT (pic_labelno++);
7496   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7497   label = gen_rtx_CONST (VOIDmode, label);
7498
7499   sum = gen_rtx_UNSPEC (Pmode,
7500                         gen_rtvec (4, x, GEN_INT (reloc), label,
7501                                    GEN_INT (TARGET_ARM ? 8 : 4)),
7502                         UNSPEC_TLS);
7503   reg = load_tls_operand (sum, reg);
7504
7505   if (TARGET_ARM)
7506     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7507   else
7508     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7509
7510   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7511                                      LCT_PURE, /* LCT_CONST?  */
7512                                      Pmode, 1, reg, Pmode);
7513
7514   insns = get_insns ();
7515   end_sequence ();
7516
7517   return insns;
7518 }
7519
7520 static rtx
7521 arm_tls_descseq_addr (rtx x, rtx reg)
7522 {
7523   rtx labelno = GEN_INT (pic_labelno++);
7524   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7525   rtx sum = gen_rtx_UNSPEC (Pmode,
7526                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7527                                        gen_rtx_CONST (VOIDmode, label),
7528                                        GEN_INT (!TARGET_ARM)),
7529                             UNSPEC_TLS);
7530   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7531
7532   emit_insn (gen_tlscall (x, labelno));
7533   if (!reg)
7534     reg = gen_reg_rtx (SImode);
7535   else
7536     gcc_assert (REGNO (reg) != 0);
7537
7538   emit_move_insn (reg, reg0);
7539
7540   return reg;
7541 }
7542
7543 rtx
7544 legitimize_tls_address (rtx x, rtx reg)
7545 {
7546   rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7547   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7548
7549   switch (model)
7550     {
7551     case TLS_MODEL_GLOBAL_DYNAMIC:
7552       if (TARGET_GNU2_TLS)
7553         {
7554           reg = arm_tls_descseq_addr (x, reg);
7555
7556           tp = arm_load_tp (NULL_RTX);
7557
7558           dest = gen_rtx_PLUS (Pmode, tp, reg);
7559         }
7560       else
7561         {
7562           /* Original scheme */
7563           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7564           dest = gen_reg_rtx (Pmode);
7565           emit_libcall_block (insns, dest, ret, x);
7566         }
7567       return dest;
7568
7569     case TLS_MODEL_LOCAL_DYNAMIC:
7570       if (TARGET_GNU2_TLS)
7571         {
7572           reg = arm_tls_descseq_addr (x, reg);
7573
7574           tp = arm_load_tp (NULL_RTX);
7575
7576           dest = gen_rtx_PLUS (Pmode, tp, reg);
7577         }
7578       else
7579         {
7580           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7581
7582           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7583              share the LDM result with other LD model accesses.  */
7584           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7585                                 UNSPEC_TLS);
7586           dest = gen_reg_rtx (Pmode);
7587           emit_libcall_block (insns, dest, ret, eqv);
7588
7589           /* Load the addend.  */
7590           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7591                                                      GEN_INT (TLS_LDO32)),
7592                                    UNSPEC_TLS);
7593           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7594           dest = gen_rtx_PLUS (Pmode, dest, addend);
7595         }
7596       return dest;
7597
7598     case TLS_MODEL_INITIAL_EXEC:
7599       labelno = GEN_INT (pic_labelno++);
7600       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7601       label = gen_rtx_CONST (VOIDmode, label);
7602       sum = gen_rtx_UNSPEC (Pmode,
7603                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7604                                        GEN_INT (TARGET_ARM ? 8 : 4)),
7605                             UNSPEC_TLS);
7606       reg = load_tls_operand (sum, reg);
7607
7608       if (TARGET_ARM)
7609         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7610       else if (TARGET_THUMB2)
7611         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7612       else
7613         {
7614           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7615           emit_move_insn (reg, gen_const_mem (SImode, reg));
7616         }
7617
7618       tp = arm_load_tp (NULL_RTX);
7619
7620       return gen_rtx_PLUS (Pmode, tp, reg);
7621
7622     case TLS_MODEL_LOCAL_EXEC:
7623       tp = arm_load_tp (NULL_RTX);
7624
7625       reg = gen_rtx_UNSPEC (Pmode,
7626                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7627                             UNSPEC_TLS);
7628       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7629
7630       return gen_rtx_PLUS (Pmode, tp, reg);
7631
7632     default:
7633       abort ();
7634     }
7635 }
7636
7637 /* Try machine-dependent ways of modifying an illegitimate address
7638    to be legitimate.  If we find one, return the new, valid address.  */
7639 rtx
7640 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7641 {
7642   if (arm_tls_referenced_p (x))
7643     {
7644       rtx addend = NULL;
7645
7646       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7647         {
7648           addend = XEXP (XEXP (x, 0), 1);
7649           x = XEXP (XEXP (x, 0), 0);
7650         }
7651
7652       if (GET_CODE (x) != SYMBOL_REF)
7653         return x;
7654
7655       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7656
7657       x = legitimize_tls_address (x, NULL_RTX);
7658
7659       if (addend)
7660         {
7661           x = gen_rtx_PLUS (SImode, x, addend);
7662           orig_x = x;
7663         }
7664       else
7665         return x;
7666     }
7667
7668   if (!TARGET_ARM)
7669     {
7670       /* TODO: legitimize_address for Thumb2.  */
7671       if (TARGET_THUMB2)
7672         return x;
7673       return thumb_legitimize_address (x, orig_x, mode);
7674     }
7675
7676   if (GET_CODE (x) == PLUS)
7677     {
7678       rtx xop0 = XEXP (x, 0);
7679       rtx xop1 = XEXP (x, 1);
7680
7681       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7682         xop0 = force_reg (SImode, xop0);
7683
7684       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7685           && !symbol_mentioned_p (xop1))
7686         xop1 = force_reg (SImode, xop1);
7687
7688       if (ARM_BASE_REGISTER_RTX_P (xop0)
7689           && CONST_INT_P (xop1))
7690         {
7691           HOST_WIDE_INT n, low_n;
7692           rtx base_reg, val;
7693           n = INTVAL (xop1);
7694
7695           /* VFP addressing modes actually allow greater offsets, but for
7696              now we just stick with the lowest common denominator.  */
7697           if (mode == DImode
7698               || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7699             {
7700               low_n = n & 0x0f;
7701               n &= ~0x0f;
7702               if (low_n > 4)
7703                 {
7704                   n += 16;
7705                   low_n -= 16;
7706                 }
7707             }
7708           else
7709             {
7710               low_n = ((mode) == TImode ? 0
7711                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7712               n -= low_n;
7713             }
7714
7715           base_reg = gen_reg_rtx (SImode);
7716           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7717           emit_move_insn (base_reg, val);
7718           x = plus_constant (Pmode, base_reg, low_n);
7719         }
7720       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7721         x = gen_rtx_PLUS (SImode, xop0, xop1);
7722     }
7723
7724   /* XXX We don't allow MINUS any more -- see comment in
7725      arm_legitimate_address_outer_p ().  */
7726   else if (GET_CODE (x) == MINUS)
7727     {
7728       rtx xop0 = XEXP (x, 0);
7729       rtx xop1 = XEXP (x, 1);
7730
7731       if (CONSTANT_P (xop0))
7732         xop0 = force_reg (SImode, xop0);
7733
7734       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7735         xop1 = force_reg (SImode, xop1);
7736
7737       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7738         x = gen_rtx_MINUS (SImode, xop0, xop1);
7739     }
7740
7741   /* Make sure to take full advantage of the pre-indexed addressing mode
7742      with absolute addresses which often allows for the base register to
7743      be factorized for multiple adjacent memory references, and it might
7744      even allows for the mini pool to be avoided entirely. */
7745   else if (CONST_INT_P (x) && optimize > 0)
7746     {
7747       unsigned int bits;
7748       HOST_WIDE_INT mask, base, index;
7749       rtx base_reg;
7750
7751       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7752          use a 8-bit index. So let's use a 12-bit index for SImode only and
7753          hope that arm_gen_constant will enable ldrb to use more bits. */
7754       bits = (mode == SImode) ? 12 : 8;
7755       mask = (1 << bits) - 1;
7756       base = INTVAL (x) & ~mask;
7757       index = INTVAL (x) & mask;
7758       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7759         {
7760           /* It'll most probably be more efficient to generate the base
7761              with more bits set and use a negative index instead. */
7762           base |= mask;
7763           index -= mask;
7764         }
7765       base_reg = force_reg (SImode, GEN_INT (base));
7766       x = plus_constant (Pmode, base_reg, index);
7767     }
7768
7769   if (flag_pic)
7770     {
7771       /* We need to find and carefully transform any SYMBOL and LABEL
7772          references; so go back to the original address expression.  */
7773       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7774
7775       if (new_x != orig_x)
7776         x = new_x;
7777     }
7778
7779   return x;
7780 }
7781
7782
7783 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7784    to be legitimate.  If we find one, return the new, valid address.  */
7785 rtx
7786 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7787 {
7788   if (GET_CODE (x) == PLUS
7789       && CONST_INT_P (XEXP (x, 1))
7790       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7791           || INTVAL (XEXP (x, 1)) < 0))
7792     {
7793       rtx xop0 = XEXP (x, 0);
7794       rtx xop1 = XEXP (x, 1);
7795       HOST_WIDE_INT offset = INTVAL (xop1);
7796
7797       /* Try and fold the offset into a biasing of the base register and
7798          then offsetting that.  Don't do this when optimizing for space
7799          since it can cause too many CSEs.  */
7800       if (optimize_size && offset >= 0
7801           && offset < 256 + 31 * GET_MODE_SIZE (mode))
7802         {
7803           HOST_WIDE_INT delta;
7804
7805           if (offset >= 256)
7806             delta = offset - (256 - GET_MODE_SIZE (mode));
7807           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7808             delta = 31 * GET_MODE_SIZE (mode);
7809           else
7810             delta = offset & (~31 * GET_MODE_SIZE (mode));
7811
7812           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7813                                 NULL_RTX);
7814           x = plus_constant (Pmode, xop0, delta);
7815         }
7816       else if (offset < 0 && offset > -256)
7817         /* Small negative offsets are best done with a subtract before the
7818            dereference, forcing these into a register normally takes two
7819            instructions.  */
7820         x = force_operand (x, NULL_RTX);
7821       else
7822         {
7823           /* For the remaining cases, force the constant into a register.  */
7824           xop1 = force_reg (SImode, xop1);
7825           x = gen_rtx_PLUS (SImode, xop0, xop1);
7826         }
7827     }
7828   else if (GET_CODE (x) == PLUS
7829            && s_register_operand (XEXP (x, 1), SImode)
7830            && !s_register_operand (XEXP (x, 0), SImode))
7831     {
7832       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7833
7834       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7835     }
7836
7837   if (flag_pic)
7838     {
7839       /* We need to find and carefully transform any SYMBOL and LABEL
7840          references; so go back to the original address expression.  */
7841       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7842
7843       if (new_x != orig_x)
7844         x = new_x;
7845     }
7846
7847   return x;
7848 }
7849
7850 bool
7851 arm_legitimize_reload_address (rtx *p,
7852                                machine_mode mode,
7853                                int opnum, int type,
7854                                int ind_levels ATTRIBUTE_UNUSED)
7855 {
7856   /* We must recognize output that we have already generated ourselves.  */
7857   if (GET_CODE (*p) == PLUS
7858       && GET_CODE (XEXP (*p, 0)) == PLUS
7859       && REG_P (XEXP (XEXP (*p, 0), 0))
7860       && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7861       && CONST_INT_P (XEXP (*p, 1)))
7862     {
7863       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7864                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7865                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
7866       return true;
7867     }
7868
7869   if (GET_CODE (*p) == PLUS
7870       && REG_P (XEXP (*p, 0))
7871       && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7872       /* If the base register is equivalent to a constant, let the generic
7873          code handle it.  Otherwise we will run into problems if a future
7874          reload pass decides to rematerialize the constant.  */
7875       && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7876       && CONST_INT_P (XEXP (*p, 1)))
7877     {
7878       HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7879       HOST_WIDE_INT low, high;
7880
7881       /* Detect coprocessor load/stores.  */
7882       bool coproc_p = ((TARGET_HARD_FLOAT
7883                         && TARGET_VFP
7884                         && (mode == SFmode || mode == DFmode))
7885                        || (TARGET_REALLY_IWMMXT
7886                            && VALID_IWMMXT_REG_MODE (mode))
7887                        || (TARGET_NEON
7888                            && (VALID_NEON_DREG_MODE (mode)
7889                                || VALID_NEON_QREG_MODE (mode))));
7890
7891       /* For some conditions, bail out when lower two bits are unaligned.  */
7892       if ((val & 0x3) != 0
7893           /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
7894           && (coproc_p
7895               /* For DI, and DF under soft-float: */
7896               || ((mode == DImode || mode == DFmode)
7897                   /* Without ldrd, we use stm/ldm, which does not
7898                      fair well with unaligned bits.  */
7899                   && (! TARGET_LDRD
7900                       /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
7901                       || TARGET_THUMB2))))
7902         return false;
7903
7904       /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7905          of which the (reg+high) gets turned into a reload add insn,
7906          we try to decompose the index into high/low values that can often
7907          also lead to better reload CSE.
7908          For example:
7909                  ldr r0, [r2, #4100]  // Offset too large
7910                  ldr r1, [r2, #4104]  // Offset too large
7911
7912          is best reloaded as:
7913                  add t1, r2, #4096
7914                  ldr r0, [t1, #4]
7915                  add t2, r2, #4096
7916                  ldr r1, [t2, #8]
7917
7918          which post-reload CSE can simplify in most cases to eliminate the
7919          second add instruction:
7920                  add t1, r2, #4096
7921                  ldr r0, [t1, #4]
7922                  ldr r1, [t1, #8]
7923
7924          The idea here is that we want to split out the bits of the constant
7925          as a mask, rather than as subtracting the maximum offset that the
7926          respective type of load/store used can handle.
7927
7928          When encountering negative offsets, we can still utilize it even if
7929          the overall offset is positive; sometimes this may lead to an immediate
7930          that can be constructed with fewer instructions.
7931          For example:
7932                  ldr r0, [r2, #0x3FFFFC]
7933
7934          This is best reloaded as:
7935                  add t1, r2, #0x400000
7936                  ldr r0, [t1, #-4]
7937
7938          The trick for spotting this for a load insn with N bits of offset
7939          (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7940          negative offset that is going to make bit N and all the bits below
7941          it become zero in the remainder part.
7942
7943          The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7944          to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7945          used in most cases of ARM load/store instructions.  */
7946
7947 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N)                                  \
7948       (((VAL) & ((1 << (N)) - 1))                                       \
7949        ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))   \
7950        : 0)
7951
7952       if (coproc_p)
7953         {
7954           low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7955
7956           /* NEON quad-word load/stores are made of two double-word accesses,
7957              so the valid index range is reduced by 8. Treat as 9-bit range if
7958              we go over it.  */
7959           if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7960             low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7961         }
7962       else if (GET_MODE_SIZE (mode) == 8)
7963         {
7964           if (TARGET_LDRD)
7965             low = (TARGET_THUMB2
7966                    ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7967                    : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7968           else
7969             /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7970                to access doublewords. The supported load/store offsets are
7971                -8, -4, and 4, which we try to produce here.  */
7972             low = ((val & 0xf) ^ 0x8) - 0x8;
7973         }
7974       else if (GET_MODE_SIZE (mode) < 8)
7975         {
7976           /* NEON element load/stores do not have an offset.  */
7977           if (TARGET_NEON_FP16 && mode == HFmode)
7978             return false;
7979
7980           if (TARGET_THUMB2)
7981             {
7982               /* Thumb-2 has an asymmetrical index range of (-256,4096).
7983                  Try the wider 12-bit range first, and re-try if the result
7984                  is out of range.  */
7985               low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7986               if (low < -255)
7987                 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7988             }
7989           else
7990             {
7991               if (mode == HImode || mode == HFmode)
7992                 {
7993                   if (arm_arch4)
7994                     low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7995                   else
7996                     {
7997                       /* The storehi/movhi_bytes fallbacks can use only
7998                          [-4094,+4094] of the full ldrb/strb index range.  */
7999                       low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8000                       if (low == 4095 || low == -4095)
8001                         return false;
8002                     }
8003                 }
8004               else
8005                 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8006             }
8007         }
8008       else
8009         return false;
8010
8011       high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
8012                ^ (unsigned HOST_WIDE_INT) 0x80000000)
8013               - (unsigned HOST_WIDE_INT) 0x80000000);
8014       /* Check for overflow or zero */
8015       if (low == 0 || high == 0 || (high + low != val))
8016         return false;
8017
8018       /* Reload the high part into a base reg; leave the low part
8019          in the mem.
8020          Note that replacing this gen_rtx_PLUS with plus_constant is
8021          wrong in this case because we rely on the
8022          (plus (plus reg c1) c2) structure being preserved so that
8023          XEXP (*p, 0) in push_reload below uses the correct term.  */
8024       *p = gen_rtx_PLUS (GET_MODE (*p),
8025                          gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8026                                        GEN_INT (high)),
8027                          GEN_INT (low));
8028       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8029                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8030                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
8031       return true;
8032     }
8033
8034   return false;
8035 }
8036
8037 rtx
8038 thumb_legitimize_reload_address (rtx *x_p,
8039                                  machine_mode mode,
8040                                  int opnum, int type,
8041                                  int ind_levels ATTRIBUTE_UNUSED)
8042 {
8043   rtx x = *x_p;
8044
8045   if (GET_CODE (x) == PLUS
8046       && GET_MODE_SIZE (mode) < 4
8047       && REG_P (XEXP (x, 0))
8048       && XEXP (x, 0) == stack_pointer_rtx
8049       && CONST_INT_P (XEXP (x, 1))
8050       && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8051     {
8052       rtx orig_x = x;
8053
8054       x = copy_rtx (x);
8055       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8056                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8057       return x;
8058     }
8059
8060   /* If both registers are hi-regs, then it's better to reload the
8061      entire expression rather than each register individually.  That
8062      only requires one reload register rather than two.  */
8063   if (GET_CODE (x) == PLUS
8064       && REG_P (XEXP (x, 0))
8065       && REG_P (XEXP (x, 1))
8066       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8067       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8068     {
8069       rtx orig_x = x;
8070
8071       x = copy_rtx (x);
8072       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8073                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8074       return x;
8075     }
8076
8077   return NULL;
8078 }
8079
8080 /* Test for various thread-local symbols.  */
8081
8082 /* Helper for arm_tls_referenced_p.  */
8083
8084 static int
8085 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
8086 {
8087   if (GET_CODE (*x) == SYMBOL_REF)
8088     return SYMBOL_REF_TLS_MODEL (*x) != 0;
8089
8090   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8091      TLS offsets, not real symbol references.  */
8092   if (GET_CODE (*x) == UNSPEC
8093       && XINT (*x, 1) == UNSPEC_TLS)
8094     return -1;
8095
8096   return 0;
8097 }
8098
8099 /* Return TRUE if X contains any TLS symbol references.  */
8100
8101 bool
8102 arm_tls_referenced_p (rtx x)
8103 {
8104   if (! TARGET_HAVE_TLS)
8105     return false;
8106
8107   return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
8108 }
8109
8110 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8111
8112    On the ARM, allow any integer (invalid ones are removed later by insn
8113    patterns), nice doubles and symbol_refs which refer to the function's
8114    constant pool XXX.
8115
8116    When generating pic allow anything.  */
8117
8118 static bool
8119 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8120 {
8121   /* At present, we have no support for Neon structure constants, so forbid
8122      them here.  It might be possible to handle simple cases like 0 and -1
8123      in future.  */
8124   if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8125     return false;
8126
8127   return flag_pic || !label_mentioned_p (x);
8128 }
8129
8130 static bool
8131 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8132 {
8133   return (CONST_INT_P (x)
8134           || CONST_DOUBLE_P (x)
8135           || CONSTANT_ADDRESS_P (x)
8136           || flag_pic);
8137 }
8138
8139 static bool
8140 arm_legitimate_constant_p (machine_mode mode, rtx x)
8141 {
8142   return (!arm_cannot_force_const_mem (mode, x)
8143           && (TARGET_32BIT
8144               ? arm_legitimate_constant_p_1 (mode, x)
8145               : thumb_legitimate_constant_p (mode, x)));
8146 }
8147
8148 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8149
8150 static bool
8151 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8152 {
8153   rtx base, offset;
8154
8155   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8156     {
8157       split_const (x, &base, &offset);
8158       if (GET_CODE (base) == SYMBOL_REF
8159           && !offset_within_block_p (base, INTVAL (offset)))
8160         return true;
8161     }
8162   return arm_tls_referenced_p (x);
8163 }
8164 \f
8165 #define REG_OR_SUBREG_REG(X)                                            \
8166   (REG_P (X)                                                    \
8167    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8168
8169 #define REG_OR_SUBREG_RTX(X)                    \
8170    (REG_P (X) ? (X) : SUBREG_REG (X))
8171
8172 static inline int
8173 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8174 {
8175   machine_mode mode = GET_MODE (x);
8176   int total, words;
8177
8178   switch (code)
8179     {
8180     case ASHIFT:
8181     case ASHIFTRT:
8182     case LSHIFTRT:
8183     case ROTATERT:
8184       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8185
8186     case PLUS:
8187     case MINUS:
8188     case COMPARE:
8189     case NEG:
8190     case NOT:
8191       return COSTS_N_INSNS (1);
8192
8193     case MULT:
8194       if (CONST_INT_P (XEXP (x, 1)))
8195         {
8196           int cycles = 0;
8197           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8198
8199           while (i)
8200             {
8201               i >>= 2;
8202               cycles++;
8203             }
8204           return COSTS_N_INSNS (2) + cycles;
8205         }
8206       return COSTS_N_INSNS (1) + 16;
8207
8208     case SET:
8209       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8210          the mode.  */
8211       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8212       return (COSTS_N_INSNS (words)
8213               + 4 * ((MEM_P (SET_SRC (x)))
8214                      + MEM_P (SET_DEST (x))));
8215
8216     case CONST_INT:
8217       if (outer == SET)
8218         {
8219           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8220             return 0;
8221           if (thumb_shiftable_const (INTVAL (x)))
8222             return COSTS_N_INSNS (2);
8223           return COSTS_N_INSNS (3);
8224         }
8225       else if ((outer == PLUS || outer == COMPARE)
8226                && INTVAL (x) < 256 && INTVAL (x) > -256)
8227         return 0;
8228       else if ((outer == IOR || outer == XOR || outer == AND)
8229                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8230         return COSTS_N_INSNS (1);
8231       else if (outer == AND)
8232         {
8233           int i;
8234           /* This duplicates the tests in the andsi3 expander.  */
8235           for (i = 9; i <= 31; i++)
8236             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8237                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8238               return COSTS_N_INSNS (2);
8239         }
8240       else if (outer == ASHIFT || outer == ASHIFTRT
8241                || outer == LSHIFTRT)
8242         return 0;
8243       return COSTS_N_INSNS (2);
8244
8245     case CONST:
8246     case CONST_DOUBLE:
8247     case LABEL_REF:
8248     case SYMBOL_REF:
8249       return COSTS_N_INSNS (3);
8250
8251     case UDIV:
8252     case UMOD:
8253     case DIV:
8254     case MOD:
8255       return 100;
8256
8257     case TRUNCATE:
8258       return 99;
8259
8260     case AND:
8261     case XOR:
8262     case IOR:
8263       /* XXX guess.  */
8264       return 8;
8265
8266     case MEM:
8267       /* XXX another guess.  */
8268       /* Memory costs quite a lot for the first word, but subsequent words
8269          load at the equivalent of a single insn each.  */
8270       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8271               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8272                  ? 4 : 0));
8273
8274     case IF_THEN_ELSE:
8275       /* XXX a guess.  */
8276       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8277         return 14;
8278       return 2;
8279
8280     case SIGN_EXTEND:
8281     case ZERO_EXTEND:
8282       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8283       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8284
8285       if (mode == SImode)
8286         return total;
8287
8288       if (arm_arch6)
8289         return total + COSTS_N_INSNS (1);
8290
8291       /* Assume a two-shift sequence.  Increase the cost slightly so
8292          we prefer actual shifts over an extend operation.  */
8293       return total + 1 + COSTS_N_INSNS (2);
8294
8295     default:
8296       return 99;
8297     }
8298 }
8299
8300 static inline bool
8301 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8302 {
8303   machine_mode mode = GET_MODE (x);
8304   enum rtx_code subcode;
8305   rtx operand;
8306   enum rtx_code code = GET_CODE (x);
8307   *total = 0;
8308
8309   switch (code)
8310     {
8311     case MEM:
8312       /* Memory costs quite a lot for the first word, but subsequent words
8313          load at the equivalent of a single insn each.  */
8314       *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8315       return true;
8316
8317     case DIV:
8318     case MOD:
8319     case UDIV:
8320     case UMOD:
8321       if (TARGET_HARD_FLOAT && mode == SFmode)
8322         *total = COSTS_N_INSNS (2);
8323       else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8324         *total = COSTS_N_INSNS (4);
8325       else
8326         *total = COSTS_N_INSNS (20);
8327       return false;
8328
8329     case ROTATE:
8330       if (REG_P (XEXP (x, 1)))
8331         *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8332       else if (!CONST_INT_P (XEXP (x, 1)))
8333         *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8334
8335       /* Fall through */
8336     case ROTATERT:
8337       if (mode != SImode)
8338         {
8339           *total += COSTS_N_INSNS (4);
8340           return true;
8341         }
8342
8343       /* Fall through */
8344     case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8345       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8346       if (mode == DImode)
8347         {
8348           *total += COSTS_N_INSNS (3);
8349           return true;
8350         }
8351
8352       *total += COSTS_N_INSNS (1);
8353       /* Increase the cost of complex shifts because they aren't any faster,
8354          and reduce dual issue opportunities.  */
8355       if (arm_tune_cortex_a9
8356           && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8357         ++*total;
8358
8359       return true;
8360
8361     case MINUS:
8362       if (mode == DImode)
8363         {
8364           *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8365           if (CONST_INT_P (XEXP (x, 0))
8366               && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8367             {
8368               *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8369               return true;
8370             }
8371
8372           if (CONST_INT_P (XEXP (x, 1))
8373               && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8374             {
8375               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8376               return true;
8377             }
8378
8379           return false;
8380         }
8381
8382       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8383         {
8384           if (TARGET_HARD_FLOAT
8385               && (mode == SFmode
8386                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8387             {
8388               *total = COSTS_N_INSNS (1);
8389               if (CONST_DOUBLE_P (XEXP (x, 0))
8390                   && arm_const_double_rtx (XEXP (x, 0)))
8391                 {
8392                   *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8393                   return true;
8394                 }
8395
8396               if (CONST_DOUBLE_P (XEXP (x, 1))
8397                   && arm_const_double_rtx (XEXP (x, 1)))
8398                 {
8399                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8400                   return true;
8401                 }
8402
8403               return false;
8404             }
8405           *total = COSTS_N_INSNS (20);
8406           return false;
8407         }
8408
8409       *total = COSTS_N_INSNS (1);
8410       if (CONST_INT_P (XEXP (x, 0))
8411           && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8412         {
8413           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8414           return true;
8415         }
8416
8417       subcode = GET_CODE (XEXP (x, 1));
8418       if (subcode == ASHIFT || subcode == ASHIFTRT
8419           || subcode == LSHIFTRT
8420           || subcode == ROTATE || subcode == ROTATERT)
8421         {
8422           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8423           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8424           return true;
8425         }
8426
8427       /* A shift as a part of RSB costs no more than RSB itself.  */
8428       if (GET_CODE (XEXP (x, 0)) == MULT
8429           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8430         {
8431           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8432           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8433           return true;
8434         }
8435
8436       if (subcode == MULT
8437           && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8438         {
8439           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8440           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8441           return true;
8442         }
8443
8444       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8445           || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8446         {
8447           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8448           if (REG_P (XEXP (XEXP (x, 1), 0))
8449               && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8450             *total += COSTS_N_INSNS (1);
8451
8452           return true;
8453         }
8454
8455       /* Fall through */
8456
8457     case PLUS:
8458       if (code == PLUS && arm_arch6 && mode == SImode
8459           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8460               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8461         {
8462           *total = COSTS_N_INSNS (1);
8463           *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8464                               0, speed);
8465           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8466           return true;
8467         }
8468
8469       /* MLA: All arguments must be registers.  We filter out
8470          multiplication by a power of two, so that we fall down into
8471          the code below.  */
8472       if (GET_CODE (XEXP (x, 0)) == MULT
8473           && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8474         {
8475           /* The cost comes from the cost of the multiply.  */
8476           return false;
8477         }
8478
8479       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8480         {
8481           if (TARGET_HARD_FLOAT
8482               && (mode == SFmode
8483                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8484             {
8485               *total = COSTS_N_INSNS (1);
8486               if (CONST_DOUBLE_P (XEXP (x, 1))
8487                   && arm_const_double_rtx (XEXP (x, 1)))
8488                 {
8489                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8490                   return true;
8491                 }
8492
8493               return false;
8494             }
8495
8496           *total = COSTS_N_INSNS (20);
8497           return false;
8498         }
8499
8500       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8501           || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8502         {
8503           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8504           if (REG_P (XEXP (XEXP (x, 0), 0))
8505               && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8506             *total += COSTS_N_INSNS (1);
8507           return true;
8508         }
8509
8510       /* Fall through */
8511
8512     case AND: case XOR: case IOR:
8513
8514       /* Normally the frame registers will be spilt into reg+const during
8515          reload, so it is a bad idea to combine them with other instructions,
8516          since then they might not be moved outside of loops.  As a compromise
8517          we allow integration with ops that have a constant as their second
8518          operand.  */
8519       if (REG_OR_SUBREG_REG (XEXP (x, 0))
8520           && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8521           && !CONST_INT_P (XEXP (x, 1)))
8522         *total = COSTS_N_INSNS (1);
8523
8524       if (mode == DImode)
8525         {
8526           *total += COSTS_N_INSNS (2);
8527           if (CONST_INT_P (XEXP (x, 1))
8528               && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8529             {
8530               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8531               return true;
8532             }
8533
8534           return false;
8535         }
8536
8537       *total += COSTS_N_INSNS (1);
8538       if (CONST_INT_P (XEXP (x, 1))
8539           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8540         {
8541           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8542           return true;
8543         }
8544       subcode = GET_CODE (XEXP (x, 0));
8545       if (subcode == ASHIFT || subcode == ASHIFTRT
8546           || subcode == LSHIFTRT
8547           || subcode == ROTATE || subcode == ROTATERT)
8548         {
8549           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8550           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8551           return true;
8552         }
8553
8554       if (subcode == MULT
8555           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8556         {
8557           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8558           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8559           return true;
8560         }
8561
8562       if (subcode == UMIN || subcode == UMAX
8563           || subcode == SMIN || subcode == SMAX)
8564         {
8565           *total = COSTS_N_INSNS (3);
8566           return true;
8567         }
8568
8569       return false;
8570
8571     case MULT:
8572       /* This should have been handled by the CPU specific routines.  */
8573       gcc_unreachable ();
8574
8575     case TRUNCATE:
8576       if (arm_arch3m && mode == SImode
8577           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8578           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8579           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8580               == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8581           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8582               || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8583         {
8584           *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8585           return true;
8586         }
8587       *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8588       return false;
8589
8590     case NEG:
8591       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8592         {
8593           if (TARGET_HARD_FLOAT
8594               && (mode == SFmode
8595                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8596             {
8597               *total = COSTS_N_INSNS (1);
8598               return false;
8599             }
8600           *total = COSTS_N_INSNS (2);
8601           return false;
8602         }
8603
8604       /* Fall through */
8605     case NOT:
8606       *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8607       if (mode == SImode && code == NOT)
8608         {
8609           subcode = GET_CODE (XEXP (x, 0));
8610           if (subcode == ASHIFT || subcode == ASHIFTRT
8611               || subcode == LSHIFTRT
8612               || subcode == ROTATE || subcode == ROTATERT
8613               || (subcode == MULT
8614                   && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8615             {
8616               *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8617               /* Register shifts cost an extra cycle.  */
8618               if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8619                 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8620                                                         subcode, 1, speed);
8621               return true;
8622             }
8623         }
8624
8625       return false;
8626
8627     case IF_THEN_ELSE:
8628       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8629         {
8630           *total = COSTS_N_INSNS (4);
8631           return true;
8632         }
8633
8634       operand = XEXP (x, 0);
8635
8636       if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8637              || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8638             && REG_P (XEXP (operand, 0))
8639             && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8640         *total += COSTS_N_INSNS (1);
8641       *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8642                  + rtx_cost (XEXP (x, 2), code, 2, speed));
8643       return true;
8644
8645     case NE:
8646       if (mode == SImode && XEXP (x, 1) == const0_rtx)
8647         {
8648           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8649           return true;
8650         }
8651       goto scc_insn;
8652
8653     case GE:
8654       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8655           && mode == SImode && XEXP (x, 1) == const0_rtx)
8656         {
8657           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8658           return true;
8659         }
8660       goto scc_insn;
8661
8662     case LT:
8663       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8664           && mode == SImode && XEXP (x, 1) == const0_rtx)
8665         {
8666           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8667           return true;
8668         }
8669       goto scc_insn;
8670
8671     case EQ:
8672     case GT:
8673     case LE:
8674     case GEU:
8675     case LTU:
8676     case GTU:
8677     case LEU:
8678     case UNORDERED:
8679     case ORDERED:
8680     case UNEQ:
8681     case UNGE:
8682     case UNLT:
8683     case UNGT:
8684     case UNLE:
8685     scc_insn:
8686       /* SCC insns.  In the case where the comparison has already been
8687          performed, then they cost 2 instructions.  Otherwise they need
8688          an additional comparison before them.  */
8689       *total = COSTS_N_INSNS (2);
8690       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8691         {
8692           return true;
8693         }
8694
8695       /* Fall through */
8696     case COMPARE:
8697       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8698         {
8699           *total = 0;
8700           return true;
8701         }
8702
8703       *total += COSTS_N_INSNS (1);
8704       if (CONST_INT_P (XEXP (x, 1))
8705           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8706         {
8707           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8708           return true;
8709         }
8710
8711       subcode = GET_CODE (XEXP (x, 0));
8712       if (subcode == ASHIFT || subcode == ASHIFTRT
8713           || subcode == LSHIFTRT
8714           || subcode == ROTATE || subcode == ROTATERT)
8715         {
8716           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8717           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8718           return true;
8719         }
8720
8721       if (subcode == MULT
8722           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8723         {
8724           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8725           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8726           return true;
8727         }
8728
8729       return false;
8730
8731     case UMIN:
8732     case UMAX:
8733     case SMIN:
8734     case SMAX:
8735       *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8736       if (!CONST_INT_P (XEXP (x, 1))
8737           || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8738         *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8739       return true;
8740
8741     case ABS:
8742       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8743         {
8744           if (TARGET_HARD_FLOAT
8745               && (mode == SFmode
8746                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8747             {
8748               *total = COSTS_N_INSNS (1);
8749               return false;
8750             }
8751           *total = COSTS_N_INSNS (20);
8752           return false;
8753         }
8754       *total = COSTS_N_INSNS (1);
8755       if (mode == DImode)
8756         *total += COSTS_N_INSNS (3);
8757       return false;
8758
8759     case SIGN_EXTEND:
8760     case ZERO_EXTEND:
8761       *total = 0;
8762       if (GET_MODE_CLASS (mode) == MODE_INT)
8763         {
8764           rtx op = XEXP (x, 0);
8765           machine_mode opmode = GET_MODE (op);
8766
8767           if (mode == DImode)
8768             *total += COSTS_N_INSNS (1);
8769
8770           if (opmode != SImode)
8771             {
8772               if (MEM_P (op))
8773                 {
8774                   /* If !arm_arch4, we use one of the extendhisi2_mem
8775                      or movhi_bytes patterns for HImode.  For a QImode
8776                      sign extension, we first zero-extend from memory
8777                      and then perform a shift sequence.  */
8778                   if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8779                     *total += COSTS_N_INSNS (2);
8780                 }
8781               else if (arm_arch6)
8782                 *total += COSTS_N_INSNS (1);
8783
8784               /* We don't have the necessary insn, so we need to perform some
8785                  other operation.  */
8786               else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8787                 /* An and with constant 255.  */
8788                 *total += COSTS_N_INSNS (1);
8789               else
8790                 /* A shift sequence.  Increase costs slightly to avoid
8791                    combining two shifts into an extend operation.  */
8792                 *total += COSTS_N_INSNS (2) + 1;
8793             }
8794
8795           return false;
8796         }
8797
8798       switch (GET_MODE (XEXP (x, 0)))
8799         {
8800         case V8QImode:
8801         case V4HImode:
8802         case V2SImode:
8803         case V4QImode:
8804         case V2HImode:
8805           *total = COSTS_N_INSNS (1);
8806           return false;
8807
8808         default:
8809           gcc_unreachable ();
8810         }
8811       gcc_unreachable ();
8812
8813     case ZERO_EXTRACT:
8814     case SIGN_EXTRACT:
8815       *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8816       return true;
8817
8818     case CONST_INT:
8819       if (const_ok_for_arm (INTVAL (x))
8820           || const_ok_for_arm (~INTVAL (x)))
8821         *total = COSTS_N_INSNS (1);
8822       else
8823         *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8824                                                   INTVAL (x), NULL_RTX,
8825                                                   NULL_RTX, 0, 0));
8826       return true;
8827
8828     case CONST:
8829     case LABEL_REF:
8830     case SYMBOL_REF:
8831       *total = COSTS_N_INSNS (3);
8832       return true;
8833
8834     case HIGH:
8835       *total = COSTS_N_INSNS (1);
8836       return true;
8837
8838     case LO_SUM:
8839       *total = COSTS_N_INSNS (1);
8840       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8841       return true;
8842
8843     case CONST_DOUBLE:
8844       if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8845           && (mode == SFmode || !TARGET_VFP_SINGLE))
8846         *total = COSTS_N_INSNS (1);
8847       else
8848         *total = COSTS_N_INSNS (4);
8849       return true;
8850
8851     case SET:
8852       /* The vec_extract patterns accept memory operands that require an
8853          address reload.  Account for the cost of that reload to give the
8854          auto-inc-dec pass an incentive to try to replace them.  */
8855       if (TARGET_NEON && MEM_P (SET_DEST (x))
8856           && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8857         {
8858           *total = rtx_cost (SET_DEST (x), code, 0, speed);
8859           if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8860             *total += COSTS_N_INSNS (1);
8861           return true;
8862         }
8863       /* Likewise for the vec_set patterns.  */
8864       if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8865           && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8866           && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8867         {
8868           rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8869           *total = rtx_cost (mem, code, 0, speed);
8870           if (!neon_vector_mem_operand (mem, 2, true))
8871             *total += COSTS_N_INSNS (1);
8872           return true;
8873         }
8874       return false;
8875
8876     case UNSPEC:
8877       /* We cost this as high as our memory costs to allow this to
8878          be hoisted from loops.  */
8879       if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8880         {
8881           *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8882         }
8883       return true;
8884
8885     case CONST_VECTOR:
8886       if (TARGET_NEON
8887           && TARGET_HARD_FLOAT
8888           && outer == SET
8889           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8890           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8891         *total = COSTS_N_INSNS (1);
8892       else
8893         *total = COSTS_N_INSNS (4);
8894       return true;
8895
8896     default:
8897       *total = COSTS_N_INSNS (4);
8898       return false;
8899     }
8900 }
8901
8902 /* Estimates the size cost of thumb1 instructions.
8903    For now most of the code is copied from thumb1_rtx_costs. We need more
8904    fine grain tuning when we have more related test cases.  */
8905 static inline int
8906 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8907 {
8908   machine_mode mode = GET_MODE (x);
8909   int words;
8910
8911   switch (code)
8912     {
8913     case ASHIFT:
8914     case ASHIFTRT:
8915     case LSHIFTRT:
8916     case ROTATERT:
8917       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8918
8919     case PLUS:
8920     case MINUS:
8921       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8922          defined by RTL expansion, especially for the expansion of
8923          multiplication.  */
8924       if ((GET_CODE (XEXP (x, 0)) == MULT
8925            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8926           || (GET_CODE (XEXP (x, 1)) == MULT
8927               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8928         return COSTS_N_INSNS (2);
8929       /* On purpose fall through for normal RTX.  */
8930     case COMPARE:
8931     case NEG:
8932     case NOT:
8933       return COSTS_N_INSNS (1);
8934
8935     case MULT:
8936       if (CONST_INT_P (XEXP (x, 1)))
8937         {
8938           /* Thumb1 mul instruction can't operate on const. We must Load it
8939              into a register first.  */
8940           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8941           return COSTS_N_INSNS (1) + const_size;
8942         }
8943       return COSTS_N_INSNS (1);
8944
8945     case SET:
8946       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8947          the mode.  */
8948       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8949       return COSTS_N_INSNS (words)
8950              + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8951                                     || satisfies_constraint_K (SET_SRC (x))
8952                                        /* thumb1_movdi_insn.  */
8953                                     || ((words > 1) && MEM_P (SET_SRC (x))));
8954
8955     case CONST_INT:
8956       if (outer == SET)
8957         {
8958           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8959             return COSTS_N_INSNS (1);
8960           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
8961           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8962             return COSTS_N_INSNS (2);
8963           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
8964           if (thumb_shiftable_const (INTVAL (x)))
8965             return COSTS_N_INSNS (2);
8966           return COSTS_N_INSNS (3);
8967         }
8968       else if ((outer == PLUS || outer == COMPARE)
8969                && INTVAL (x) < 256 && INTVAL (x) > -256)
8970         return 0;
8971       else if ((outer == IOR || outer == XOR || outer == AND)
8972                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8973         return COSTS_N_INSNS (1);
8974       else if (outer == AND)
8975         {
8976           int i;
8977           /* This duplicates the tests in the andsi3 expander.  */
8978           for (i = 9; i <= 31; i++)
8979             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8980                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8981               return COSTS_N_INSNS (2);
8982         }
8983       else if (outer == ASHIFT || outer == ASHIFTRT
8984                || outer == LSHIFTRT)
8985         return 0;
8986       return COSTS_N_INSNS (2);
8987
8988     case CONST:
8989     case CONST_DOUBLE:
8990     case LABEL_REF:
8991     case SYMBOL_REF:
8992       return COSTS_N_INSNS (3);
8993
8994     case UDIV:
8995     case UMOD:
8996     case DIV:
8997     case MOD:
8998       return 100;
8999
9000     case TRUNCATE:
9001       return 99;
9002
9003     case AND:
9004     case XOR:
9005     case IOR:
9006       return COSTS_N_INSNS (1);
9007
9008     case MEM:
9009       return (COSTS_N_INSNS (1)
9010               + COSTS_N_INSNS (1)
9011                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9012               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9013                  ? COSTS_N_INSNS (1) : 0));
9014
9015     case IF_THEN_ELSE:
9016       /* XXX a guess.  */
9017       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9018         return 14;
9019       return 2;
9020
9021     case ZERO_EXTEND:
9022       /* XXX still guessing.  */
9023       switch (GET_MODE (XEXP (x, 0)))
9024         {
9025           case QImode:
9026             return (1 + (mode == DImode ? 4 : 0)
9027                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9028
9029           case HImode:
9030             return (4 + (mode == DImode ? 4 : 0)
9031                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9032
9033           case SImode:
9034             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9035
9036           default:
9037             return 99;
9038         }
9039
9040     default:
9041       return 99;
9042     }
9043 }
9044
9045 /* RTX costs when optimizing for size.  */
9046 static bool
9047 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9048                     int *total)
9049 {
9050   machine_mode mode = GET_MODE (x);
9051   if (TARGET_THUMB1)
9052     {
9053       *total = thumb1_size_rtx_costs (x, code, outer_code);
9054       return true;
9055     }
9056
9057   /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
9058   switch (code)
9059     {
9060     case MEM:
9061       /* A memory access costs 1 insn if the mode is small, or the address is
9062          a single register, otherwise it costs one insn per word.  */
9063       if (REG_P (XEXP (x, 0)))
9064         *total = COSTS_N_INSNS (1);
9065       else if (flag_pic
9066                && GET_CODE (XEXP (x, 0)) == PLUS
9067                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9068         /* This will be split into two instructions.
9069            See arm.md:calculate_pic_address.  */
9070         *total = COSTS_N_INSNS (2);
9071       else
9072         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9073       return true;
9074
9075     case DIV:
9076     case MOD:
9077     case UDIV:
9078     case UMOD:
9079       /* Needs a libcall, so it costs about this.  */
9080       *total = COSTS_N_INSNS (2);
9081       return false;
9082
9083     case ROTATE:
9084       if (mode == SImode && REG_P (XEXP (x, 1)))
9085         {
9086           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9087           return true;
9088         }
9089       /* Fall through */
9090     case ROTATERT:
9091     case ASHIFT:
9092     case LSHIFTRT:
9093     case ASHIFTRT:
9094       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9095         {
9096           *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9097           return true;
9098         }
9099       else if (mode == SImode)
9100         {
9101           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9102           /* Slightly disparage register shifts, but not by much.  */
9103           if (!CONST_INT_P (XEXP (x, 1)))
9104             *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9105           return true;
9106         }
9107
9108       /* Needs a libcall.  */
9109       *total = COSTS_N_INSNS (2);
9110       return false;
9111
9112     case MINUS:
9113       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9114           && (mode == SFmode || !TARGET_VFP_SINGLE))
9115         {
9116           *total = COSTS_N_INSNS (1);
9117           return false;
9118         }
9119
9120       if (mode == SImode)
9121         {
9122           enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9123           enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9124
9125           if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9126               || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9127               || subcode1 == ROTATE || subcode1 == ROTATERT
9128               || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9129               || subcode1 == ASHIFTRT)
9130             {
9131               /* It's just the cost of the two operands.  */
9132               *total = 0;
9133               return false;
9134             }
9135
9136           *total = COSTS_N_INSNS (1);
9137           return false;
9138         }
9139
9140       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9141       return false;
9142
9143     case PLUS:
9144       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9145           && (mode == SFmode || !TARGET_VFP_SINGLE))
9146         {
9147           *total = COSTS_N_INSNS (1);
9148           return false;
9149         }
9150
9151       /* A shift as a part of ADD costs nothing.  */
9152       if (GET_CODE (XEXP (x, 0)) == MULT
9153           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9154         {
9155           *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9156           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9157           *total += rtx_cost (XEXP (x, 1), code, 1, false);
9158           return true;
9159         }
9160
9161       /* Fall through */
9162     case AND: case XOR: case IOR:
9163       if (mode == SImode)
9164         {
9165           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9166
9167           if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9168               || subcode == LSHIFTRT || subcode == ASHIFTRT
9169               || (code == AND && subcode == NOT))
9170             {
9171               /* It's just the cost of the two operands.  */
9172               *total = 0;
9173               return false;
9174             }
9175         }
9176
9177       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9178       return false;
9179
9180     case MULT:
9181       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9182       return false;
9183
9184     case NEG:
9185       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9186           && (mode == SFmode || !TARGET_VFP_SINGLE))
9187         {
9188           *total = COSTS_N_INSNS (1);
9189           return false;
9190         }
9191
9192       /* Fall through */
9193     case NOT:
9194       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9195
9196       return false;
9197
9198     case IF_THEN_ELSE:
9199       *total = 0;
9200       return false;
9201
9202     case COMPARE:
9203       if (cc_register (XEXP (x, 0), VOIDmode))
9204         * total = 0;
9205       else
9206         *total = COSTS_N_INSNS (1);
9207       return false;
9208
9209     case ABS:
9210       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9211           && (mode == SFmode || !TARGET_VFP_SINGLE))
9212         *total = COSTS_N_INSNS (1);
9213       else
9214         *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9215       return false;
9216
9217     case SIGN_EXTEND:
9218     case ZERO_EXTEND:
9219       return arm_rtx_costs_1 (x, outer_code, total, 0);
9220
9221     case CONST_INT:
9222       if (const_ok_for_arm (INTVAL (x)))
9223         /* A multiplication by a constant requires another instruction
9224            to load the constant to a register.  */
9225         *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9226                                 ? 1 : 0);
9227       else if (const_ok_for_arm (~INTVAL (x)))
9228         *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9229       else if (const_ok_for_arm (-INTVAL (x)))
9230         {
9231           if (outer_code == COMPARE || outer_code == PLUS
9232               || outer_code == MINUS)
9233             *total = 0;
9234           else
9235             *total = COSTS_N_INSNS (1);
9236         }
9237       else
9238         *total = COSTS_N_INSNS (2);
9239       return true;
9240
9241     case CONST:
9242     case LABEL_REF:
9243     case SYMBOL_REF:
9244       *total = COSTS_N_INSNS (2);
9245       return true;
9246
9247     case CONST_DOUBLE:
9248       *total = COSTS_N_INSNS (4);
9249       return true;
9250
9251     case CONST_VECTOR:
9252       if (TARGET_NEON
9253           && TARGET_HARD_FLOAT
9254           && outer_code == SET
9255           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9256           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9257         *total = COSTS_N_INSNS (1);
9258       else
9259         *total = COSTS_N_INSNS (4);
9260       return true;
9261
9262     case HIGH:
9263     case LO_SUM:
9264       /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9265          cost of these slightly.  */
9266       *total = COSTS_N_INSNS (1) + 1;
9267       return true;
9268
9269     case SET:
9270       return false;
9271
9272     default:
9273       if (mode != VOIDmode)
9274         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9275       else
9276         *total = COSTS_N_INSNS (4); /* How knows?  */
9277       return false;
9278     }
9279 }
9280
9281 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9282    operand, then return the operand that is being shifted.  If the shift
9283    is not by a constant, then set SHIFT_REG to point to the operand.
9284    Return NULL if OP is not a shifter operand.  */
9285 static rtx
9286 shifter_op_p (rtx op, rtx *shift_reg)
9287 {
9288   enum rtx_code code = GET_CODE (op);
9289
9290   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9291       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9292     return XEXP (op, 0);
9293   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9294     return XEXP (op, 0);
9295   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9296            || code == ASHIFTRT)
9297     {
9298       if (!CONST_INT_P (XEXP (op, 1)))
9299         *shift_reg = XEXP (op, 1);
9300       return XEXP (op, 0);
9301     }
9302
9303   return NULL;
9304 }
9305
9306 static bool
9307 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9308 {
9309   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9310   gcc_assert (GET_CODE (x) == UNSPEC);
9311
9312   switch (XINT (x, 1))
9313     {
9314     case UNSPEC_UNALIGNED_LOAD:
9315       /* We can only do unaligned loads into the integer unit, and we can't
9316          use LDM or LDRD.  */
9317       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9318       if (speed_p)
9319         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9320                   + extra_cost->ldst.load_unaligned);
9321
9322 #ifdef NOT_YET
9323       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9324                                  ADDR_SPACE_GENERIC, speed_p);
9325 #endif
9326       return true;
9327
9328     case UNSPEC_UNALIGNED_STORE:
9329       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9330       if (speed_p)
9331         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9332                   + extra_cost->ldst.store_unaligned);
9333
9334       *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9335 #ifdef NOT_YET
9336       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9337                                  ADDR_SPACE_GENERIC, speed_p);
9338 #endif
9339       return true;
9340
9341     case UNSPEC_VRINTZ:
9342     case UNSPEC_VRINTP:
9343     case UNSPEC_VRINTM:
9344     case UNSPEC_VRINTR:
9345     case UNSPEC_VRINTX:
9346     case UNSPEC_VRINTA:
9347       *cost = COSTS_N_INSNS (1);
9348       if (speed_p)
9349         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9350
9351       return true;
9352     default:
9353       *cost = COSTS_N_INSNS (2);
9354       break;
9355     }
9356   return false;
9357 }
9358
9359 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9360    call (one insn for -Os) and then one for processing the result.  */
9361 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9362
9363 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9364         do                                                              \
9365           {                                                             \
9366             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9367             if (shift_op != NULL                                        \
9368                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9369               {                                                         \
9370                 if (shift_reg)                                          \
9371                   {                                                     \
9372                     if (speed_p)                                        \
9373                       *cost += extra_cost->alu.arith_shift_reg; \
9374                     *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);  \
9375                   }                                                     \
9376                 else if (speed_p)                                       \
9377                   *cost += extra_cost->alu.arith_shift;         \
9378                                                                         \
9379                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)     \
9380                           + rtx_cost (XEXP (x, 1 - IDX),                \
9381                                       OP, 1, speed_p));         \
9382                 return true;                                            \
9383               }                                                         \
9384           }                                                             \
9385         while (0);
9386
9387 /* RTX costs.  Make an estimate of the cost of executing the operation
9388    X, which is contained with an operation with code OUTER_CODE.
9389    SPEED_P indicates whether the cost desired is the performance cost,
9390    or the size cost.  The estimate is stored in COST and the return
9391    value is TRUE if the cost calculation is final, or FALSE if the
9392    caller should recurse through the operands of X to add additional
9393    costs.
9394
9395    We currently make no attempt to model the size savings of Thumb-2
9396    16-bit instructions.  At the normal points in compilation where
9397    this code is called we have no measure of whether the condition
9398    flags are live or not, and thus no realistic way to determine what
9399    the size will eventually be.  */
9400 static bool
9401 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9402                    const struct cpu_cost_table *extra_cost,
9403                    int *cost, bool speed_p)
9404 {
9405   machine_mode mode = GET_MODE (x);
9406
9407   if (TARGET_THUMB1)
9408     {
9409       if (speed_p)
9410         *cost = thumb1_rtx_costs (x, code, outer_code);
9411       else
9412         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9413       return true;
9414     }
9415
9416   switch (code)
9417     {
9418     case SET:
9419       *cost = 0;
9420       /* SET RTXs don't have a mode so we get it from the destination.  */
9421       mode = GET_MODE (SET_DEST (x));
9422
9423       if (REG_P (SET_SRC (x))
9424           && REG_P (SET_DEST (x)))
9425         {
9426           /* Assume that most copies can be done with a single insn,
9427              unless we don't have HW FP, in which case everything
9428              larger than word mode will require two insns.  */
9429           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9430                                    && GET_MODE_SIZE (mode) > 4)
9431                                   || mode == DImode)
9432                                  ? 2 : 1);
9433           /* Conditional register moves can be encoded
9434              in 16 bits in Thumb mode.  */
9435           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9436             *cost >>= 1;
9437
9438           return true;
9439         }
9440
9441       if (CONST_INT_P (SET_SRC (x)))
9442         {
9443           /* Handle CONST_INT here, since the value doesn't have a mode
9444              and we would otherwise be unable to work out the true cost.  */
9445           *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9446           outer_code = SET;
9447           /* Slightly lower the cost of setting a core reg to a constant.
9448              This helps break up chains and allows for better scheduling.  */
9449           if (REG_P (SET_DEST (x))
9450               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9451             *cost -= 1;
9452           x = SET_SRC (x);
9453           /* Immediate moves with an immediate in the range [0, 255] can be
9454              encoded in 16 bits in Thumb mode.  */
9455           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9456               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9457             *cost >>= 1;
9458           goto const_int_cost;
9459         }
9460
9461       return false;
9462
9463     case MEM:
9464       /* A memory access costs 1 insn if the mode is small, or the address is
9465          a single register, otherwise it costs one insn per word.  */
9466       if (REG_P (XEXP (x, 0)))
9467         *cost = COSTS_N_INSNS (1);
9468       else if (flag_pic
9469                && GET_CODE (XEXP (x, 0)) == PLUS
9470                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9471         /* This will be split into two instructions.
9472            See arm.md:calculate_pic_address.  */
9473         *cost = COSTS_N_INSNS (2);
9474       else
9475         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9476
9477       /* For speed optimizations, add the costs of the address and
9478          accessing memory.  */
9479       if (speed_p)
9480 #ifdef NOT_YET
9481         *cost += (extra_cost->ldst.load
9482                   + arm_address_cost (XEXP (x, 0), mode,
9483                                       ADDR_SPACE_GENERIC, speed_p));
9484 #else
9485         *cost += extra_cost->ldst.load;
9486 #endif
9487       return true;
9488
9489     case PARALLEL:
9490     {
9491    /* Calculations of LDM costs are complex.  We assume an initial cost
9492    (ldm_1st) which will load the number of registers mentioned in
9493    ldm_regs_per_insn_1st registers; then each additional
9494    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9495    formula for N regs is thus:
9496
9497    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9498                              + ldm_regs_per_insn_subsequent - 1)
9499                             / ldm_regs_per_insn_subsequent).
9500
9501    Additional costs may also be added for addressing.  A similar
9502    formula is used for STM.  */
9503
9504       bool is_ldm = load_multiple_operation (x, SImode);
9505       bool is_stm = store_multiple_operation (x, SImode);
9506
9507       *cost = COSTS_N_INSNS (1);
9508
9509       if (is_ldm || is_stm)
9510         {
9511           if (speed_p)
9512             {
9513               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9514               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9515                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9516                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9517               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9518                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9519                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9520
9521               *cost += regs_per_insn_1st
9522                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9523                                             + regs_per_insn_sub - 1)
9524                                           / regs_per_insn_sub);
9525               return true;
9526             }
9527
9528         }
9529       return false;
9530     }
9531     case DIV:
9532     case UDIV:
9533       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9534           && (mode == SFmode || !TARGET_VFP_SINGLE))
9535         *cost = COSTS_N_INSNS (speed_p
9536                                ? extra_cost->fp[mode != SFmode].div : 1);
9537       else if (mode == SImode && TARGET_IDIV)
9538         *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9539       else
9540         *cost = LIBCALL_COST (2);
9541       return false;     /* All arguments must be in registers.  */
9542
9543     case MOD:
9544     case UMOD:
9545       *cost = LIBCALL_COST (2);
9546       return false;     /* All arguments must be in registers.  */
9547
9548     case ROTATE:
9549       if (mode == SImode && REG_P (XEXP (x, 1)))
9550         {
9551           *cost = (COSTS_N_INSNS (2)
9552                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9553           if (speed_p)
9554             *cost += extra_cost->alu.shift_reg;
9555           return true;
9556         }
9557       /* Fall through */
9558     case ROTATERT:
9559     case ASHIFT:
9560     case LSHIFTRT:
9561     case ASHIFTRT:
9562       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9563         {
9564           *cost = (COSTS_N_INSNS (3)
9565                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9566           if (speed_p)
9567             *cost += 2 * extra_cost->alu.shift;
9568           return true;
9569         }
9570       else if (mode == SImode)
9571         {
9572           *cost = (COSTS_N_INSNS (1)
9573                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9574           /* Slightly disparage register shifts at -Os, but not by much.  */
9575           if (!CONST_INT_P (XEXP (x, 1)))
9576             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9577                       + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9578           return true;
9579         }
9580       else if (GET_MODE_CLASS (mode) == MODE_INT
9581                && GET_MODE_SIZE (mode) < 4)
9582         {
9583           if (code == ASHIFT)
9584             {
9585               *cost = (COSTS_N_INSNS (1)
9586                        + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9587               /* Slightly disparage register shifts at -Os, but not by
9588                  much.  */
9589               if (!CONST_INT_P (XEXP (x, 1)))
9590                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9591                           + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9592             }
9593           else if (code == LSHIFTRT || code == ASHIFTRT)
9594             {
9595               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9596                 {
9597                   /* Can use SBFX/UBFX.  */
9598                   *cost = COSTS_N_INSNS (1);
9599                   if (speed_p)
9600                     *cost += extra_cost->alu.bfx;
9601                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9602                 }
9603               else
9604                 {
9605                   *cost = COSTS_N_INSNS (2);
9606                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9607                   if (speed_p)
9608                     {
9609                       if (CONST_INT_P (XEXP (x, 1)))
9610                         *cost += 2 * extra_cost->alu.shift;
9611                       else
9612                         *cost += (extra_cost->alu.shift
9613                                   + extra_cost->alu.shift_reg);
9614                     }
9615                   else
9616                     /* Slightly disparage register shifts.  */
9617                     *cost += !CONST_INT_P (XEXP (x, 1));
9618                 }
9619             }
9620           else /* Rotates.  */
9621             {
9622               *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9623               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9624               if (speed_p)
9625                 {
9626                   if (CONST_INT_P (XEXP (x, 1)))
9627                     *cost += (2 * extra_cost->alu.shift
9628                               + extra_cost->alu.log_shift);
9629                   else
9630                     *cost += (extra_cost->alu.shift
9631                               + extra_cost->alu.shift_reg
9632                               + extra_cost->alu.log_shift_reg);
9633                 }
9634             }
9635           return true;
9636         }
9637
9638       *cost = LIBCALL_COST (2);
9639       return false;
9640
9641     case BSWAP:
9642       if (arm_arch6)
9643         {
9644           if (mode == SImode)
9645             {
9646               *cost = COSTS_N_INSNS (1);
9647               if (speed_p)
9648                 *cost += extra_cost->alu.rev;
9649
9650               return false;
9651             }
9652         }
9653       else
9654         {
9655         /* No rev instruction available.  Look at arm_legacy_rev
9656            and thumb_legacy_rev for the form of RTL used then.  */
9657           if (TARGET_THUMB)
9658             {
9659               *cost = COSTS_N_INSNS (10);
9660
9661               if (speed_p)
9662                 {
9663                   *cost += 6 * extra_cost->alu.shift;
9664                   *cost += 3 * extra_cost->alu.logical;
9665                 }
9666             }
9667           else
9668             {
9669               *cost = COSTS_N_INSNS (5);
9670
9671               if (speed_p)
9672                 {
9673                   *cost += 2 * extra_cost->alu.shift;
9674                   *cost += extra_cost->alu.arith_shift;
9675                   *cost += 2 * extra_cost->alu.logical;
9676                 }
9677             }
9678           return true;
9679         }
9680       return false;
9681
9682     case MINUS:
9683       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9684           && (mode == SFmode || !TARGET_VFP_SINGLE))
9685         {
9686           *cost = COSTS_N_INSNS (1);
9687           if (GET_CODE (XEXP (x, 0)) == MULT
9688               || GET_CODE (XEXP (x, 1)) == MULT)
9689             {
9690               rtx mul_op0, mul_op1, sub_op;
9691
9692               if (speed_p)
9693                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9694
9695               if (GET_CODE (XEXP (x, 0)) == MULT)
9696                 {
9697                   mul_op0 = XEXP (XEXP (x, 0), 0);
9698                   mul_op1 = XEXP (XEXP (x, 0), 1);
9699                   sub_op = XEXP (x, 1);
9700                 }
9701               else
9702                 {
9703                   mul_op0 = XEXP (XEXP (x, 1), 0);
9704                   mul_op1 = XEXP (XEXP (x, 1), 1);
9705                   sub_op = XEXP (x, 0);
9706                 }
9707
9708               /* The first operand of the multiply may be optionally
9709                  negated.  */
9710               if (GET_CODE (mul_op0) == NEG)
9711                 mul_op0 = XEXP (mul_op0, 0);
9712
9713               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9714                         + rtx_cost (mul_op1, code, 0, speed_p)
9715                         + rtx_cost (sub_op, code, 0, speed_p));
9716
9717               return true;
9718             }
9719
9720           if (speed_p)
9721             *cost += extra_cost->fp[mode != SFmode].addsub;
9722           return false;
9723         }
9724
9725       if (mode == SImode)
9726         {
9727           rtx shift_by_reg = NULL;
9728           rtx shift_op;
9729           rtx non_shift_op;
9730
9731           *cost = COSTS_N_INSNS (1);
9732
9733           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9734           if (shift_op == NULL)
9735             {
9736               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9737               non_shift_op = XEXP (x, 0);
9738             }
9739           else
9740             non_shift_op = XEXP (x, 1);
9741
9742           if (shift_op != NULL)
9743             {
9744               if (shift_by_reg != NULL)
9745                 {
9746                   if (speed_p)
9747                     *cost += extra_cost->alu.arith_shift_reg;
9748                   *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9749                 }
9750               else if (speed_p)
9751                 *cost += extra_cost->alu.arith_shift;
9752
9753               *cost += (rtx_cost (shift_op, code, 0, speed_p)
9754                         + rtx_cost (non_shift_op, code, 0, speed_p));
9755               return true;
9756             }
9757
9758           if (arm_arch_thumb2
9759               && GET_CODE (XEXP (x, 1)) == MULT)
9760             {
9761               /* MLS.  */
9762               if (speed_p)
9763                 *cost += extra_cost->mult[0].add;
9764               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9765                         + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9766                         + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9767               return true;
9768             }
9769
9770           if (CONST_INT_P (XEXP (x, 0)))
9771             {
9772               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9773                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9774                                             NULL_RTX, 1, 0);
9775               *cost = COSTS_N_INSNS (insns);
9776               if (speed_p)
9777                 *cost += insns * extra_cost->alu.arith;
9778               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9779               return true;
9780             }
9781
9782           return false;
9783         }
9784
9785       if (GET_MODE_CLASS (mode) == MODE_INT
9786           && GET_MODE_SIZE (mode) < 4)
9787         {
9788           rtx shift_op, shift_reg;
9789           shift_reg = NULL;
9790
9791           /* We check both sides of the MINUS for shifter operands since,
9792              unlike PLUS, it's not commutative.  */
9793
9794           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9795           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9796
9797           /* Slightly disparage, as we might need to widen the result.  */
9798           *cost = 1 + COSTS_N_INSNS (1);
9799           if (speed_p)
9800             *cost += extra_cost->alu.arith;
9801
9802           if (CONST_INT_P (XEXP (x, 0)))
9803             {
9804               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9805               return true;
9806             }
9807
9808           return false;
9809         }
9810
9811       if (mode == DImode)
9812         {
9813           *cost = COSTS_N_INSNS (2);
9814
9815           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9816             {
9817               rtx op1 = XEXP (x, 1);
9818
9819               if (speed_p)
9820                 *cost += 2 * extra_cost->alu.arith;
9821
9822               if (GET_CODE (op1) == ZERO_EXTEND)
9823                 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9824               else
9825                 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9826               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9827                                  0, speed_p);
9828               return true;
9829             }
9830           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9831             {
9832               if (speed_p)
9833                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9834               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9835                                   0, speed_p)
9836                         + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9837               return true;
9838             }
9839           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9840                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9841             {
9842               if (speed_p)
9843                 *cost += (extra_cost->alu.arith
9844                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9845                              ? extra_cost->alu.arith
9846                              : extra_cost->alu.arith_shift));
9847               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9848                         + rtx_cost (XEXP (XEXP (x, 1), 0),
9849                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9850               return true;
9851             }
9852
9853           if (speed_p)
9854             *cost += 2 * extra_cost->alu.arith;
9855           return false;
9856         }
9857
9858       /* Vector mode?  */
9859
9860       *cost = LIBCALL_COST (2);
9861       return false;
9862
9863     case PLUS:
9864       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9865           && (mode == SFmode || !TARGET_VFP_SINGLE))
9866         {
9867           *cost = COSTS_N_INSNS (1);
9868           if (GET_CODE (XEXP (x, 0)) == MULT)
9869             {
9870               rtx mul_op0, mul_op1, add_op;
9871
9872               if (speed_p)
9873                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9874
9875               mul_op0 = XEXP (XEXP (x, 0), 0);
9876               mul_op1 = XEXP (XEXP (x, 0), 1);
9877               add_op = XEXP (x, 1);
9878
9879               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9880                         + rtx_cost (mul_op1, code, 0, speed_p)
9881                         + rtx_cost (add_op, code, 0, speed_p));
9882
9883               return true;
9884             }
9885
9886           if (speed_p)
9887             *cost += extra_cost->fp[mode != SFmode].addsub;
9888           return false;
9889         }
9890       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9891         {
9892           *cost = LIBCALL_COST (2);
9893           return false;
9894         }
9895
9896         /* Narrow modes can be synthesized in SImode, but the range
9897            of useful sub-operations is limited.  Check for shift operations
9898            on one of the operands.  Only left shifts can be used in the
9899            narrow modes.  */
9900       if (GET_MODE_CLASS (mode) == MODE_INT
9901           && GET_MODE_SIZE (mode) < 4)
9902         {
9903           rtx shift_op, shift_reg;
9904           shift_reg = NULL;
9905
9906           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9907
9908           if (CONST_INT_P (XEXP (x, 1)))
9909             {
9910               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9911                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9912                                             NULL_RTX, 1, 0);
9913               *cost = COSTS_N_INSNS (insns);
9914               if (speed_p)
9915                 *cost += insns * extra_cost->alu.arith;
9916               /* Slightly penalize a narrow operation as the result may
9917                  need widening.  */
9918               *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9919               return true;
9920             }
9921
9922           /* Slightly penalize a narrow operation as the result may
9923              need widening.  */
9924           *cost = 1 + COSTS_N_INSNS (1);
9925           if (speed_p)
9926             *cost += extra_cost->alu.arith;
9927
9928           return false;
9929         }
9930
9931       if (mode == SImode)
9932         {
9933           rtx shift_op, shift_reg;
9934
9935           *cost = COSTS_N_INSNS (1);
9936           if (TARGET_INT_SIMD
9937               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9938                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9939             {
9940               /* UXTA[BH] or SXTA[BH].  */
9941               if (speed_p)
9942                 *cost += extra_cost->alu.extend_arith;
9943               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9944                                   speed_p)
9945                         + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9946               return true;
9947             }
9948
9949           shift_reg = NULL;
9950           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9951           if (shift_op != NULL)
9952             {
9953               if (shift_reg)
9954                 {
9955                   if (speed_p)
9956                     *cost += extra_cost->alu.arith_shift_reg;
9957                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9958                 }
9959               else if (speed_p)
9960                 *cost += extra_cost->alu.arith_shift;
9961
9962               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9963                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9964               return true;
9965             }
9966           if (GET_CODE (XEXP (x, 0)) == MULT)
9967             {
9968               rtx mul_op = XEXP (x, 0);
9969
9970               *cost = COSTS_N_INSNS (1);
9971
9972               if (TARGET_DSP_MULTIPLY
9973                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9974                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9975                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9976                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9977                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9978                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9979                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9980                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9981                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9982                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9983                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9984                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9985                                       == 16))))))
9986                 {
9987                   /* SMLA[BT][BT].  */
9988                   if (speed_p)
9989                     *cost += extra_cost->mult[0].extend_add;
9990                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9991                                       SIGN_EXTEND, 0, speed_p)
9992                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9993                                         SIGN_EXTEND, 0, speed_p)
9994                             + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9995                   return true;
9996                 }
9997
9998               if (speed_p)
9999                 *cost += extra_cost->mult[0].add;
10000               *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
10001                         + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
10002                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10003               return true;
10004             }
10005           if (CONST_INT_P (XEXP (x, 1)))
10006             {
10007               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10008                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10009                                             NULL_RTX, 1, 0);
10010               *cost = COSTS_N_INSNS (insns);
10011               if (speed_p)
10012                 *cost += insns * extra_cost->alu.arith;
10013               *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
10014               return true;
10015             }
10016           return false;
10017         }
10018
10019       if (mode == DImode)
10020         {
10021           if (arm_arch3m
10022               && GET_CODE (XEXP (x, 0)) == MULT
10023               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10024                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10025                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10026                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10027             {
10028               *cost = COSTS_N_INSNS (1);
10029               if (speed_p)
10030                 *cost += extra_cost->mult[1].extend_add;
10031               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10032                                   ZERO_EXTEND, 0, speed_p)
10033                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10034                                     ZERO_EXTEND, 0, speed_p)
10035                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10036               return true;
10037             }
10038
10039           *cost = COSTS_N_INSNS (2);
10040
10041           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10042               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10043             {
10044               if (speed_p)
10045                 *cost += (extra_cost->alu.arith
10046                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10047                              ? extra_cost->alu.arith
10048                              : extra_cost->alu.arith_shift));
10049
10050               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10051                                   speed_p)
10052                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10053               return true;
10054             }
10055
10056           if (speed_p)
10057             *cost += 2 * extra_cost->alu.arith;
10058           return false;
10059         }
10060
10061       /* Vector mode?  */
10062       *cost = LIBCALL_COST (2);
10063       return false;
10064     case IOR:
10065       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10066         {
10067           *cost = COSTS_N_INSNS (1);
10068           if (speed_p)
10069             *cost += extra_cost->alu.rev;
10070
10071           return true;
10072         }
10073     /* Fall through.  */
10074     case AND: case XOR:
10075       if (mode == SImode)
10076         {
10077           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10078           rtx op0 = XEXP (x, 0);
10079           rtx shift_op, shift_reg;
10080
10081           *cost = COSTS_N_INSNS (1);
10082
10083           if (subcode == NOT
10084               && (code == AND
10085                   || (code == IOR && TARGET_THUMB2)))
10086             op0 = XEXP (op0, 0);
10087
10088           shift_reg = NULL;
10089           shift_op = shifter_op_p (op0, &shift_reg);
10090           if (shift_op != NULL)
10091             {
10092               if (shift_reg)
10093                 {
10094                   if (speed_p)
10095                     *cost += extra_cost->alu.log_shift_reg;
10096                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10097                 }
10098               else if (speed_p)
10099                 *cost += extra_cost->alu.log_shift;
10100
10101               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10102                         + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10103               return true;
10104             }
10105
10106           if (CONST_INT_P (XEXP (x, 1)))
10107             {
10108               int insns = arm_gen_constant (code, SImode, NULL_RTX,
10109                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10110                                             NULL_RTX, 1, 0);
10111
10112               *cost = COSTS_N_INSNS (insns);
10113               if (speed_p)
10114                 *cost += insns * extra_cost->alu.logical;
10115               *cost += rtx_cost (op0, code, 0, speed_p);
10116               return true;
10117             }
10118
10119           if (speed_p)
10120             *cost += extra_cost->alu.logical;
10121           *cost += (rtx_cost (op0, code, 0, speed_p)
10122                     + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10123           return true;
10124         }
10125
10126       if (mode == DImode)
10127         {
10128           rtx op0 = XEXP (x, 0);
10129           enum rtx_code subcode = GET_CODE (op0);
10130
10131           *cost = COSTS_N_INSNS (2);
10132
10133           if (subcode == NOT
10134               && (code == AND
10135                   || (code == IOR && TARGET_THUMB2)))
10136             op0 = XEXP (op0, 0);
10137
10138           if (GET_CODE (op0) == ZERO_EXTEND)
10139             {
10140               if (speed_p)
10141                 *cost += 2 * extra_cost->alu.logical;
10142
10143               *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10144                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10145               return true;
10146             }
10147           else if (GET_CODE (op0) == SIGN_EXTEND)
10148             {
10149               if (speed_p)
10150                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10151
10152               *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10153                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10154               return true;
10155             }
10156
10157           if (speed_p)
10158             *cost += 2 * extra_cost->alu.logical;
10159
10160           return true;
10161         }
10162       /* Vector mode?  */
10163
10164       *cost = LIBCALL_COST (2);
10165       return false;
10166
10167     case MULT:
10168       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10169           && (mode == SFmode || !TARGET_VFP_SINGLE))
10170         {
10171           rtx op0 = XEXP (x, 0);
10172
10173           *cost = COSTS_N_INSNS (1);
10174
10175           if (GET_CODE (op0) == NEG)
10176             op0 = XEXP (op0, 0);
10177
10178           if (speed_p)
10179             *cost += extra_cost->fp[mode != SFmode].mult;
10180
10181           *cost += (rtx_cost (op0, MULT, 0, speed_p)
10182                     + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10183           return true;
10184         }
10185       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10186         {
10187           *cost = LIBCALL_COST (2);
10188           return false;
10189         }
10190
10191       if (mode == SImode)
10192         {
10193           *cost = COSTS_N_INSNS (1);
10194           if (TARGET_DSP_MULTIPLY
10195               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10196                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10197                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10198                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10199                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10200                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10201                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10202                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10203                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10204                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10205                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10206                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10207                                   == 16))))))
10208             {
10209               /* SMUL[TB][TB].  */
10210               if (speed_p)
10211                 *cost += extra_cost->mult[0].extend;
10212               *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10213                         + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10214               return true;
10215             }
10216           if (speed_p)
10217             *cost += extra_cost->mult[0].simple;
10218           return false;
10219         }
10220
10221       if (mode == DImode)
10222         {
10223           if (arm_arch3m
10224               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10225                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10226                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10227                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10228             {
10229               *cost = COSTS_N_INSNS (1);
10230               if (speed_p)
10231                 *cost += extra_cost->mult[1].extend;
10232               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10233                                   ZERO_EXTEND, 0, speed_p)
10234                         + rtx_cost (XEXP (XEXP (x, 1), 0),
10235                                     ZERO_EXTEND, 0, speed_p));
10236               return true;
10237             }
10238
10239           *cost = LIBCALL_COST (2);
10240           return false;
10241         }
10242
10243       /* Vector mode?  */
10244       *cost = LIBCALL_COST (2);
10245       return false;
10246
10247     case NEG:
10248       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10249           && (mode == SFmode || !TARGET_VFP_SINGLE))
10250         {
10251           *cost = COSTS_N_INSNS (1);
10252           if (speed_p)
10253             *cost += extra_cost->fp[mode != SFmode].neg;
10254
10255           return false;
10256         }
10257       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10258         {
10259           *cost = LIBCALL_COST (1);
10260           return false;
10261         }
10262
10263       if (mode == SImode)
10264         {
10265           if (GET_CODE (XEXP (x, 0)) == ABS)
10266             {
10267               *cost = COSTS_N_INSNS (2);
10268               /* Assume the non-flag-changing variant.  */
10269               if (speed_p)
10270                 *cost += (extra_cost->alu.log_shift
10271                           + extra_cost->alu.arith_shift);
10272               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10273               return true;
10274             }
10275
10276           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10277               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10278             {
10279               *cost = COSTS_N_INSNS (2);
10280               /* No extra cost for MOV imm and MVN imm.  */
10281               /* If the comparison op is using the flags, there's no further
10282                  cost, otherwise we need to add the cost of the comparison.  */
10283               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10284                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10285                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10286                 {
10287                   *cost += (COSTS_N_INSNS (1)
10288                             + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10289                                         speed_p)
10290                             + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10291                                         speed_p));
10292                   if (speed_p)
10293                     *cost += extra_cost->alu.arith;
10294                 }
10295               return true;
10296             }
10297           *cost = COSTS_N_INSNS (1);
10298           if (speed_p)
10299             *cost += extra_cost->alu.arith;
10300           return false;
10301         }
10302
10303       if (GET_MODE_CLASS (mode) == MODE_INT
10304           && GET_MODE_SIZE (mode) < 4)
10305         {
10306           /* Slightly disparage, as we might need an extend operation.  */
10307           *cost = 1 + COSTS_N_INSNS (1);
10308           if (speed_p)
10309             *cost += extra_cost->alu.arith;
10310           return false;
10311         }
10312
10313       if (mode == DImode)
10314         {
10315           *cost = COSTS_N_INSNS (2);
10316           if (speed_p)
10317             *cost += 2 * extra_cost->alu.arith;
10318           return false;
10319         }
10320
10321       /* Vector mode?  */
10322       *cost = LIBCALL_COST (1);
10323       return false;
10324
10325     case NOT:
10326       if (mode == SImode)
10327         {
10328           rtx shift_op;
10329           rtx shift_reg = NULL;
10330
10331           *cost = COSTS_N_INSNS (1);
10332           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10333
10334           if (shift_op)
10335             {
10336               if (shift_reg != NULL)
10337                 {
10338                   if (speed_p)
10339                     *cost += extra_cost->alu.log_shift_reg;
10340                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10341                 }
10342               else if (speed_p)
10343                 *cost += extra_cost->alu.log_shift;
10344               *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10345               return true;
10346             }
10347
10348           if (speed_p)
10349             *cost += extra_cost->alu.logical;
10350           return false;
10351         }
10352       if (mode == DImode)
10353         {
10354           *cost = COSTS_N_INSNS (2);
10355           return false;
10356         }
10357
10358       /* Vector mode?  */
10359
10360       *cost += LIBCALL_COST (1);
10361       return false;
10362
10363     case IF_THEN_ELSE:
10364       {
10365         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10366           {
10367             *cost = COSTS_N_INSNS (4);
10368             return true;
10369           }
10370         int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10371         int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10372
10373         *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10374         /* Assume that if one arm of the if_then_else is a register,
10375            that it will be tied with the result and eliminate the
10376            conditional insn.  */
10377         if (REG_P (XEXP (x, 1)))
10378           *cost += op2cost;
10379         else if (REG_P (XEXP (x, 2)))
10380           *cost += op1cost;
10381         else
10382           {
10383             if (speed_p)
10384               {
10385                 if (extra_cost->alu.non_exec_costs_exec)
10386                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10387                 else
10388                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10389               }
10390             else
10391               *cost += op1cost + op2cost;
10392           }
10393       }
10394       return true;
10395
10396     case COMPARE:
10397       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10398         *cost = 0;
10399       else
10400         {
10401           machine_mode op0mode;
10402           /* We'll mostly assume that the cost of a compare is the cost of the
10403              LHS.  However, there are some notable exceptions.  */
10404
10405           /* Floating point compares are never done as side-effects.  */
10406           op0mode = GET_MODE (XEXP (x, 0));
10407           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10408               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10409             {
10410               *cost = COSTS_N_INSNS (1);
10411               if (speed_p)
10412                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10413
10414               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10415                 {
10416                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10417                   return true;
10418                 }
10419
10420               return false;
10421             }
10422           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10423             {
10424               *cost = LIBCALL_COST (2);
10425               return false;
10426             }
10427
10428           /* DImode compares normally take two insns.  */
10429           if (op0mode == DImode)
10430             {
10431               *cost = COSTS_N_INSNS (2);
10432               if (speed_p)
10433                 *cost += 2 * extra_cost->alu.arith;
10434               return false;
10435             }
10436
10437           if (op0mode == SImode)
10438             {
10439               rtx shift_op;
10440               rtx shift_reg;
10441
10442               if (XEXP (x, 1) == const0_rtx
10443                   && !(REG_P (XEXP (x, 0))
10444                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10445                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10446                 {
10447                   *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10448
10449                   /* Multiply operations that set the flags are often
10450                      significantly more expensive.  */
10451                   if (speed_p
10452                       && GET_CODE (XEXP (x, 0)) == MULT
10453                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10454                     *cost += extra_cost->mult[0].flag_setting;
10455
10456                   if (speed_p
10457                       && GET_CODE (XEXP (x, 0)) == PLUS
10458                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10459                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10460                                                             0), 1), mode))
10461                     *cost += extra_cost->mult[0].flag_setting;
10462                   return true;
10463                 }
10464
10465               shift_reg = NULL;
10466               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10467               if (shift_op != NULL)
10468                 {
10469                   *cost = COSTS_N_INSNS (1);
10470                   if (shift_reg != NULL)
10471                     {
10472                       *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10473                       if (speed_p)
10474                         *cost += extra_cost->alu.arith_shift_reg;
10475                     }
10476                   else if (speed_p)
10477                     *cost += extra_cost->alu.arith_shift;
10478                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10479                             + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10480                   return true;
10481                 }
10482
10483               *cost = COSTS_N_INSNS (1);
10484               if (speed_p)
10485                 *cost += extra_cost->alu.arith;
10486               if (CONST_INT_P (XEXP (x, 1))
10487                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10488                 {
10489                   *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10490                   return true;
10491                 }
10492               return false;
10493             }
10494
10495           /* Vector mode?  */
10496
10497           *cost = LIBCALL_COST (2);
10498           return false;
10499         }
10500       return true;
10501
10502     case EQ:
10503     case NE:
10504     case LT:
10505     case LE:
10506     case GT:
10507     case GE:
10508     case LTU:
10509     case LEU:
10510     case GEU:
10511     case GTU:
10512     case ORDERED:
10513     case UNORDERED:
10514     case UNEQ:
10515     case UNLE:
10516     case UNLT:
10517     case UNGE:
10518     case UNGT:
10519     case LTGT:
10520       if (outer_code == SET)
10521         {
10522           /* Is it a store-flag operation?  */
10523           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10524               && XEXP (x, 1) == const0_rtx)
10525             {
10526               /* Thumb also needs an IT insn.  */
10527               *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10528               return true;
10529             }
10530           if (XEXP (x, 1) == const0_rtx)
10531             {
10532               switch (code)
10533                 {
10534                 case LT:
10535                   /* LSR Rd, Rn, #31.  */
10536                   *cost = COSTS_N_INSNS (1);
10537                   if (speed_p)
10538                     *cost += extra_cost->alu.shift;
10539                   break;
10540
10541                 case EQ:
10542                   /* RSBS T1, Rn, #0
10543                      ADC  Rd, Rn, T1.  */
10544
10545                 case NE:
10546                   /* SUBS T1, Rn, #1
10547                      SBC  Rd, Rn, T1.  */
10548                   *cost = COSTS_N_INSNS (2);
10549                   break;
10550
10551                 case LE:
10552                   /* RSBS T1, Rn, Rn, LSR #31
10553                      ADC  Rd, Rn, T1. */
10554                   *cost = COSTS_N_INSNS (2);
10555                   if (speed_p)
10556                     *cost += extra_cost->alu.arith_shift;
10557                   break;
10558
10559                 case GT:
10560                   /* RSB  Rd, Rn, Rn, ASR #1
10561                      LSR  Rd, Rd, #31.  */
10562                   *cost = COSTS_N_INSNS (2);
10563                   if (speed_p)
10564                     *cost += (extra_cost->alu.arith_shift
10565                               + extra_cost->alu.shift);
10566                   break;
10567
10568                 case GE:
10569                   /* ASR  Rd, Rn, #31
10570                      ADD  Rd, Rn, #1.  */
10571                   *cost = COSTS_N_INSNS (2);
10572                   if (speed_p)
10573                     *cost += extra_cost->alu.shift;
10574                   break;
10575
10576                 default:
10577                   /* Remaining cases are either meaningless or would take
10578                      three insns anyway.  */
10579                   *cost = COSTS_N_INSNS (3);
10580                   break;
10581                 }
10582               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10583               return true;
10584             }
10585           else
10586             {
10587               *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10588               if (CONST_INT_P (XEXP (x, 1))
10589                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10590                 {
10591                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10592                   return true;
10593                 }
10594
10595               return false;
10596             }
10597         }
10598       /* Not directly inside a set.  If it involves the condition code
10599          register it must be the condition for a branch, cond_exec or
10600          I_T_E operation.  Since the comparison is performed elsewhere
10601          this is just the control part which has no additional
10602          cost.  */
10603       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10604                && XEXP (x, 1) == const0_rtx)
10605         {
10606           *cost = 0;
10607           return true;
10608         }
10609       return false;
10610
10611     case ABS:
10612       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10613           && (mode == SFmode || !TARGET_VFP_SINGLE))
10614         {
10615           *cost = COSTS_N_INSNS (1);
10616           if (speed_p)
10617             *cost += extra_cost->fp[mode != SFmode].neg;
10618
10619           return false;
10620         }
10621       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10622         {
10623           *cost = LIBCALL_COST (1);
10624           return false;
10625         }
10626
10627       if (mode == SImode)
10628         {
10629           *cost = COSTS_N_INSNS (1);
10630           if (speed_p)
10631             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10632           return false;
10633         }
10634       /* Vector mode?  */
10635       *cost = LIBCALL_COST (1);
10636       return false;
10637
10638     case SIGN_EXTEND:
10639       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10640           && MEM_P (XEXP (x, 0)))
10641         {
10642           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10643
10644           if (mode == DImode)
10645             *cost += COSTS_N_INSNS (1);
10646
10647           if (!speed_p)
10648             return true;
10649
10650           if (GET_MODE (XEXP (x, 0)) == SImode)
10651             *cost += extra_cost->ldst.load;
10652           else
10653             *cost += extra_cost->ldst.load_sign_extend;
10654
10655           if (mode == DImode)
10656             *cost += extra_cost->alu.shift;
10657
10658           return true;
10659         }
10660
10661       /* Widening from less than 32-bits requires an extend operation.  */
10662       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10663         {
10664           /* We have SXTB/SXTH.  */
10665           *cost = COSTS_N_INSNS (1);
10666           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10667           if (speed_p)
10668             *cost += extra_cost->alu.extend;
10669         }
10670       else if (GET_MODE (XEXP (x, 0)) != SImode)
10671         {
10672           /* Needs two shifts.  */
10673           *cost = COSTS_N_INSNS (2);
10674           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10675           if (speed_p)
10676             *cost += 2 * extra_cost->alu.shift;
10677         }
10678
10679       /* Widening beyond 32-bits requires one more insn.  */
10680       if (mode == DImode)
10681         {
10682           *cost += COSTS_N_INSNS (1);
10683           if (speed_p)
10684             *cost += extra_cost->alu.shift;
10685         }
10686
10687       return true;
10688
10689     case ZERO_EXTEND:
10690       if ((arm_arch4
10691            || GET_MODE (XEXP (x, 0)) == SImode
10692            || GET_MODE (XEXP (x, 0)) == QImode)
10693           && MEM_P (XEXP (x, 0)))
10694         {
10695           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10696
10697           if (mode == DImode)
10698             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10699
10700           return true;
10701         }
10702
10703       /* Widening from less than 32-bits requires an extend operation.  */
10704       if (GET_MODE (XEXP (x, 0)) == QImode)
10705         {
10706           /* UXTB can be a shorter instruction in Thumb2, but it might
10707              be slower than the AND Rd, Rn, #255 alternative.  When
10708              optimizing for speed it should never be slower to use
10709              AND, and we don't really model 16-bit vs 32-bit insns
10710              here.  */
10711           *cost = COSTS_N_INSNS (1);
10712           if (speed_p)
10713             *cost += extra_cost->alu.logical;
10714         }
10715       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10716         {
10717           /* We have UXTB/UXTH.  */
10718           *cost = COSTS_N_INSNS (1);
10719           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10720           if (speed_p)
10721             *cost += extra_cost->alu.extend;
10722         }
10723       else if (GET_MODE (XEXP (x, 0)) != SImode)
10724         {
10725           /* Needs two shifts.  It's marginally preferable to use
10726              shifts rather than two BIC instructions as the second
10727              shift may merge with a subsequent insn as a shifter
10728              op.  */
10729           *cost = COSTS_N_INSNS (2);
10730           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10731           if (speed_p)
10732             *cost += 2 * extra_cost->alu.shift;
10733         }
10734       else  /* GET_MODE (XEXP (x, 0)) == SImode.  */
10735         *cost = COSTS_N_INSNS (1);
10736
10737       /* Widening beyond 32-bits requires one more insn.  */
10738       if (mode == DImode)
10739         {
10740           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10741         }
10742
10743       return true;
10744
10745     case CONST_INT:
10746       *cost = 0;
10747       /* CONST_INT has no mode, so we cannot tell for sure how many
10748          insns are really going to be needed.  The best we can do is
10749          look at the value passed.  If it fits in SImode, then assume
10750          that's the mode it will be used for.  Otherwise assume it
10751          will be used in DImode.  */
10752       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10753         mode = SImode;
10754       else
10755         mode = DImode;
10756
10757       /* Avoid blowing up in arm_gen_constant ().  */
10758       if (!(outer_code == PLUS
10759             || outer_code == AND
10760             || outer_code == IOR
10761             || outer_code == XOR
10762             || outer_code == MINUS))
10763         outer_code = SET;
10764
10765     const_int_cost:
10766       if (mode == SImode)
10767         {
10768           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10769                                                     INTVAL (x), NULL, NULL,
10770                                                     0, 0));
10771           /* Extra costs?  */
10772         }
10773       else
10774         {
10775           *cost += COSTS_N_INSNS (arm_gen_constant
10776                                   (outer_code, SImode, NULL,
10777                                    trunc_int_for_mode (INTVAL (x), SImode),
10778                                    NULL, NULL, 0, 0)
10779                                   + arm_gen_constant (outer_code, SImode, NULL,
10780                                                       INTVAL (x) >> 32, NULL,
10781                                                       NULL, 0, 0));
10782           /* Extra costs?  */
10783         }
10784
10785       return true;
10786
10787     case CONST:
10788     case LABEL_REF:
10789     case SYMBOL_REF:
10790       if (speed_p)
10791         {
10792           if (arm_arch_thumb2 && !flag_pic)
10793             *cost = COSTS_N_INSNS (2);
10794           else
10795             *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10796         }
10797       else
10798         *cost = COSTS_N_INSNS (2);
10799
10800       if (flag_pic)
10801         {
10802           *cost += COSTS_N_INSNS (1);
10803           if (speed_p)
10804             *cost += extra_cost->alu.arith;
10805         }
10806
10807       return true;
10808
10809     case CONST_FIXED:
10810       *cost = COSTS_N_INSNS (4);
10811       /* Fixme.  */
10812       return true;
10813
10814     case CONST_DOUBLE:
10815       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10816           && (mode == SFmode || !TARGET_VFP_SINGLE))
10817         {
10818           if (vfp3_const_double_rtx (x))
10819             {
10820               *cost = COSTS_N_INSNS (1);
10821               if (speed_p)
10822                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10823               return true;
10824             }
10825
10826           if (speed_p)
10827             {
10828               *cost = COSTS_N_INSNS (1);
10829               if (mode == DFmode)
10830                 *cost += extra_cost->ldst.loadd;
10831               else
10832                 *cost += extra_cost->ldst.loadf;
10833             }
10834           else
10835             *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10836
10837           return true;
10838         }
10839       *cost = COSTS_N_INSNS (4);
10840       return true;
10841
10842     case CONST_VECTOR:
10843       /* Fixme.  */
10844       if (TARGET_NEON
10845           && TARGET_HARD_FLOAT
10846           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10847           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10848         *cost = COSTS_N_INSNS (1);
10849       else
10850         *cost = COSTS_N_INSNS (4);
10851       return true;
10852
10853     case HIGH:
10854     case LO_SUM:
10855       *cost = COSTS_N_INSNS (1);
10856       /* When optimizing for size, we prefer constant pool entries to
10857          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10858       if (!speed_p)
10859         *cost += 1;
10860       return true;
10861
10862     case CLZ:
10863       *cost = COSTS_N_INSNS (1);
10864       if (speed_p)
10865         *cost += extra_cost->alu.clz;
10866       return false;
10867
10868     case SMIN:
10869       if (XEXP (x, 1) == const0_rtx)
10870         {
10871           *cost = COSTS_N_INSNS (1);
10872           if (speed_p)
10873             *cost += extra_cost->alu.log_shift;
10874           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10875           return true;
10876         }
10877       /* Fall through.  */
10878     case SMAX:
10879     case UMIN:
10880     case UMAX:
10881       *cost = COSTS_N_INSNS (2);
10882       return false;
10883
10884     case TRUNCATE:
10885       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10886           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10887           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10888           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10889           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10890                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10891               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10892                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10893                       == ZERO_EXTEND))))
10894         {
10895           *cost = COSTS_N_INSNS (1);
10896           if (speed_p)
10897             *cost += extra_cost->mult[1].extend;
10898           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10899                               speed_p)
10900                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10901                                 0, speed_p));
10902           return true;
10903         }
10904       *cost = LIBCALL_COST (1);
10905       return false;
10906
10907     case UNSPEC:
10908       return arm_unspec_cost (x, outer_code, speed_p, cost);
10909
10910     case PC:
10911       /* Reading the PC is like reading any other register.  Writing it
10912          is more expensive, but we take that into account elsewhere.  */
10913       *cost = 0;
10914       return true;
10915
10916     case ZERO_EXTRACT:
10917       /* TODO: Simple zero_extract of bottom bits using AND.  */
10918       /* Fall through.  */
10919     case SIGN_EXTRACT:
10920       if (arm_arch6
10921           && mode == SImode
10922           && CONST_INT_P (XEXP (x, 1))
10923           && CONST_INT_P (XEXP (x, 2)))
10924         {
10925           *cost = COSTS_N_INSNS (1);
10926           if (speed_p)
10927             *cost += extra_cost->alu.bfx;
10928           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10929           return true;
10930         }
10931       /* Without UBFX/SBFX, need to resort to shift operations.  */
10932       *cost = COSTS_N_INSNS (2);
10933       if (speed_p)
10934         *cost += 2 * extra_cost->alu.shift;
10935       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10936       return true;
10937
10938     case FLOAT_EXTEND:
10939       if (TARGET_HARD_FLOAT)
10940         {
10941           *cost = COSTS_N_INSNS (1);
10942           if (speed_p)
10943             *cost += extra_cost->fp[mode == DFmode].widen;
10944           if (!TARGET_FPU_ARMV8
10945               && GET_MODE (XEXP (x, 0)) == HFmode)
10946             {
10947               /* Pre v8, widening HF->DF is a two-step process, first
10948                  widening to SFmode.  */
10949               *cost += COSTS_N_INSNS (1);
10950               if (speed_p)
10951                 *cost += extra_cost->fp[0].widen;
10952             }
10953           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10954           return true;
10955         }
10956
10957       *cost = LIBCALL_COST (1);
10958       return false;
10959
10960     case FLOAT_TRUNCATE:
10961       if (TARGET_HARD_FLOAT)
10962         {
10963           *cost = COSTS_N_INSNS (1);
10964           if (speed_p)
10965             *cost += extra_cost->fp[mode == DFmode].narrow;
10966           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10967           return true;
10968           /* Vector modes?  */
10969         }
10970       *cost = LIBCALL_COST (1);
10971       return false;
10972
10973     case FMA:
10974       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10975         {
10976           rtx op0 = XEXP (x, 0);
10977           rtx op1 = XEXP (x, 1);
10978           rtx op2 = XEXP (x, 2);
10979
10980           *cost = COSTS_N_INSNS (1);
10981
10982           /* vfms or vfnma.  */
10983           if (GET_CODE (op0) == NEG)
10984             op0 = XEXP (op0, 0);
10985
10986           /* vfnms or vfnma.  */
10987           if (GET_CODE (op2) == NEG)
10988             op2 = XEXP (op2, 0);
10989
10990           *cost += rtx_cost (op0, FMA, 0, speed_p);
10991           *cost += rtx_cost (op1, FMA, 1, speed_p);
10992           *cost += rtx_cost (op2, FMA, 2, speed_p);
10993
10994           if (speed_p)
10995             *cost += extra_cost->fp[mode ==DFmode].fma;
10996
10997           return true;
10998         }
10999
11000       *cost = LIBCALL_COST (3);
11001       return false;
11002
11003     case FIX:
11004     case UNSIGNED_FIX:
11005       if (TARGET_HARD_FLOAT)
11006         {
11007           if (GET_MODE_CLASS (mode) == MODE_INT)
11008             {
11009               *cost = COSTS_N_INSNS (1);
11010               if (speed_p)
11011                 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
11012               /* Strip of the 'cost' of rounding towards zero.  */
11013               if (GET_CODE (XEXP (x, 0)) == FIX)
11014                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11015               else
11016                 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11017               /* ??? Increase the cost to deal with transferring from
11018                  FP -> CORE registers?  */
11019               return true;
11020             }
11021           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11022                    && TARGET_FPU_ARMV8)
11023             {
11024               *cost = COSTS_N_INSNS (1);
11025               if (speed_p)
11026                 *cost += extra_cost->fp[mode == DFmode].roundint;
11027               return false;
11028             }
11029           /* Vector costs? */
11030         }
11031       *cost = LIBCALL_COST (1);
11032       return false;
11033
11034     case FLOAT:
11035     case UNSIGNED_FLOAT:
11036       if (TARGET_HARD_FLOAT)
11037         {
11038           /* ??? Increase the cost to deal with transferring from CORE
11039              -> FP registers?  */
11040           *cost = COSTS_N_INSNS (1);
11041           if (speed_p)
11042             *cost += extra_cost->fp[mode == DFmode].fromint;
11043           return false;
11044         }
11045       *cost = LIBCALL_COST (1);
11046       return false;
11047
11048     case CALL:
11049       *cost = COSTS_N_INSNS (1);
11050       return true;
11051
11052     case ASM_OPERANDS:
11053       {
11054       /* Just a guess.  Guess number of instructions in the asm
11055          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11056          though (see PR60663).  */
11057         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11058         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11059
11060         *cost = COSTS_N_INSNS (asm_length + num_operands);
11061         return true;
11062       }
11063     default:
11064       if (mode != VOIDmode)
11065         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11066       else
11067         *cost = COSTS_N_INSNS (4); /* Who knows?  */
11068       return false;
11069     }
11070 }
11071
11072 #undef HANDLE_NARROW_SHIFT_ARITH
11073
11074 /* RTX costs when optimizing for size.  */
11075 static bool
11076 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11077                int *total, bool speed)
11078 {
11079   bool result;
11080
11081   if (TARGET_OLD_RTX_COSTS
11082       || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11083     {
11084       /* Old way.  (Deprecated.)  */
11085       if (!speed)
11086         result = arm_size_rtx_costs (x, (enum rtx_code) code,
11087                                      (enum rtx_code) outer_code, total);
11088       else
11089         result = current_tune->rtx_costs (x,  (enum rtx_code) code,
11090                                           (enum rtx_code) outer_code, total,
11091                                           speed);
11092     }
11093   else
11094     {
11095     /* New way.  */
11096       if (current_tune->insn_extra_cost)
11097         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11098                                      (enum rtx_code) outer_code,
11099                                      current_tune->insn_extra_cost,
11100                                      total, speed);
11101     /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11102        && current_tune->insn_extra_cost != NULL  */
11103       else
11104         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11105                                     (enum rtx_code) outer_code,
11106                                     &generic_extra_costs, total, speed);
11107     }
11108
11109   if (dump_file && (dump_flags & TDF_DETAILS))
11110     {
11111       print_rtl_single (dump_file, x);
11112       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11113                *total, result ? "final" : "partial");
11114     }
11115   return result;
11116 }
11117
11118 /* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
11119    supported on any "slowmul" cores, so it can be ignored.  */
11120
11121 static bool
11122 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11123                        int *total, bool speed)
11124 {
11125   machine_mode mode = GET_MODE (x);
11126
11127   if (TARGET_THUMB)
11128     {
11129       *total = thumb1_rtx_costs (x, code, outer_code);
11130       return true;
11131     }
11132
11133   switch (code)
11134     {
11135     case MULT:
11136       if (GET_MODE_CLASS (mode) == MODE_FLOAT
11137           || mode == DImode)
11138         {
11139           *total = COSTS_N_INSNS (20);
11140           return false;
11141         }
11142
11143       if (CONST_INT_P (XEXP (x, 1)))
11144         {
11145           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11146                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11147           int cost, const_ok = const_ok_for_arm (i);
11148           int j, booth_unit_size;
11149
11150           /* Tune as appropriate.  */
11151           cost = const_ok ? 4 : 8;
11152           booth_unit_size = 2;
11153           for (j = 0; i && j < 32; j += booth_unit_size)
11154             {
11155               i >>= booth_unit_size;
11156               cost++;
11157             }
11158
11159           *total = COSTS_N_INSNS (cost);
11160           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11161           return true;
11162         }
11163
11164       *total = COSTS_N_INSNS (20);
11165       return false;
11166
11167     default:
11168       return arm_rtx_costs_1 (x, outer_code, total, speed);;
11169     }
11170 }
11171
11172
11173 /* RTX cost for cores with a fast multiply unit (M variants).  */
11174
11175 static bool
11176 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11177                        int *total, bool speed)
11178 {
11179   machine_mode mode = GET_MODE (x);
11180
11181   if (TARGET_THUMB1)
11182     {
11183       *total = thumb1_rtx_costs (x, code, outer_code);
11184       return true;
11185     }
11186
11187   /* ??? should thumb2 use different costs?  */
11188   switch (code)
11189     {
11190     case MULT:
11191       /* There is no point basing this on the tuning, since it is always the
11192          fast variant if it exists at all.  */
11193       if (mode == DImode
11194           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11195           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11196               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11197         {
11198           *total = COSTS_N_INSNS(2);
11199           return false;
11200         }
11201
11202
11203       if (mode == DImode)
11204         {
11205           *total = COSTS_N_INSNS (5);
11206           return false;
11207         }
11208
11209       if (CONST_INT_P (XEXP (x, 1)))
11210         {
11211           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11212                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11213           int cost, const_ok = const_ok_for_arm (i);
11214           int j, booth_unit_size;
11215
11216           /* Tune as appropriate.  */
11217           cost = const_ok ? 4 : 8;
11218           booth_unit_size = 8;
11219           for (j = 0; i && j < 32; j += booth_unit_size)
11220             {
11221               i >>= booth_unit_size;
11222               cost++;
11223             }
11224
11225           *total = COSTS_N_INSNS(cost);
11226           return false;
11227         }
11228
11229       if (mode == SImode)
11230         {
11231           *total = COSTS_N_INSNS (4);
11232           return false;
11233         }
11234
11235       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11236         {
11237           if (TARGET_HARD_FLOAT
11238               && (mode == SFmode
11239                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11240             {
11241               *total = COSTS_N_INSNS (1);
11242               return false;
11243             }
11244         }
11245
11246       /* Requires a lib call */
11247       *total = COSTS_N_INSNS (20);
11248       return false;
11249
11250     default:
11251       return arm_rtx_costs_1 (x, outer_code, total, speed);
11252     }
11253 }
11254
11255
11256 /* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
11257    so it can be ignored.  */
11258
11259 static bool
11260 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11261                       int *total, bool speed)
11262 {
11263   machine_mode mode = GET_MODE (x);
11264
11265   if (TARGET_THUMB)
11266     {
11267       *total = thumb1_rtx_costs (x, code, outer_code);
11268       return true;
11269     }
11270
11271   switch (code)
11272     {
11273     case COMPARE:
11274       if (GET_CODE (XEXP (x, 0)) != MULT)
11275         return arm_rtx_costs_1 (x, outer_code, total, speed);
11276
11277       /* A COMPARE of a MULT is slow on XScale; the muls instruction
11278          will stall until the multiplication is complete.  */
11279       *total = COSTS_N_INSNS (3);
11280       return false;
11281
11282     case MULT:
11283       /* There is no point basing this on the tuning, since it is always the
11284          fast variant if it exists at all.  */
11285       if (mode == DImode
11286           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11287           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11288               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11289         {
11290           *total = COSTS_N_INSNS (2);
11291           return false;
11292         }
11293
11294
11295       if (mode == DImode)
11296         {
11297           *total = COSTS_N_INSNS (5);
11298           return false;
11299         }
11300
11301       if (CONST_INT_P (XEXP (x, 1)))
11302         {
11303           /* If operand 1 is a constant we can more accurately
11304              calculate the cost of the multiply.  The multiplier can
11305              retire 15 bits on the first cycle and a further 12 on the
11306              second.  We do, of course, have to load the constant into
11307              a register first.  */
11308           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11309           /* There's a general overhead of one cycle.  */
11310           int cost = 1;
11311           unsigned HOST_WIDE_INT masked_const;
11312
11313           if (i & 0x80000000)
11314             i = ~i;
11315
11316           i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11317
11318           masked_const = i & 0xffff8000;
11319           if (masked_const != 0)
11320             {
11321               cost++;
11322               masked_const = i & 0xf8000000;
11323               if (masked_const != 0)
11324                 cost++;
11325             }
11326           *total = COSTS_N_INSNS (cost);
11327           return false;
11328         }
11329
11330       if (mode == SImode)
11331         {
11332           *total = COSTS_N_INSNS (3);
11333           return false;
11334         }
11335
11336       /* Requires a lib call */
11337       *total = COSTS_N_INSNS (20);
11338       return false;
11339
11340     default:
11341       return arm_rtx_costs_1 (x, outer_code, total, speed);
11342     }
11343 }
11344
11345
11346 /* RTX costs for 9e (and later) cores.  */
11347
11348 static bool
11349 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11350                   int *total, bool speed)
11351 {
11352   machine_mode mode = GET_MODE (x);
11353
11354   if (TARGET_THUMB1)
11355     {
11356       switch (code)
11357         {
11358         case MULT:
11359           *total = COSTS_N_INSNS (3);
11360           return true;
11361
11362         default:
11363           *total = thumb1_rtx_costs (x, code, outer_code);
11364           return true;
11365         }
11366     }
11367
11368   switch (code)
11369     {
11370     case MULT:
11371       /* There is no point basing this on the tuning, since it is always the
11372          fast variant if it exists at all.  */
11373       if (mode == DImode
11374           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11375           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11376               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11377         {
11378           *total = COSTS_N_INSNS (2);
11379           return false;
11380         }
11381
11382
11383       if (mode == DImode)
11384         {
11385           *total = COSTS_N_INSNS (5);
11386           return false;
11387         }
11388
11389       if (mode == SImode)
11390         {
11391           *total = COSTS_N_INSNS (2);
11392           return false;
11393         }
11394
11395       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11396         {
11397           if (TARGET_HARD_FLOAT
11398               && (mode == SFmode
11399                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11400             {
11401               *total = COSTS_N_INSNS (1);
11402               return false;
11403             }
11404         }
11405
11406       *total = COSTS_N_INSNS (20);
11407       return false;
11408
11409     default:
11410       return arm_rtx_costs_1 (x, outer_code, total, speed);
11411     }
11412 }
11413 /* All address computations that can be done are free, but rtx cost returns
11414    the same for practically all of them.  So we weight the different types
11415    of address here in the order (most pref first):
11416    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11417 static inline int
11418 arm_arm_address_cost (rtx x)
11419 {
11420   enum rtx_code c  = GET_CODE (x);
11421
11422   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11423     return 0;
11424   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11425     return 10;
11426
11427   if (c == PLUS)
11428     {
11429       if (CONST_INT_P (XEXP (x, 1)))
11430         return 2;
11431
11432       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11433         return 3;
11434
11435       return 4;
11436     }
11437
11438   return 6;
11439 }
11440
11441 static inline int
11442 arm_thumb_address_cost (rtx x)
11443 {
11444   enum rtx_code c  = GET_CODE (x);
11445
11446   if (c == REG)
11447     return 1;
11448   if (c == PLUS
11449       && REG_P (XEXP (x, 0))
11450       && CONST_INT_P (XEXP (x, 1)))
11451     return 1;
11452
11453   return 2;
11454 }
11455
11456 static int
11457 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11458                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11459 {
11460   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11461 }
11462
11463 /* Adjust cost hook for XScale.  */
11464 static bool
11465 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11466 {
11467   /* Some true dependencies can have a higher cost depending
11468      on precisely how certain input operands are used.  */
11469   if (REG_NOTE_KIND(link) == 0
11470       && recog_memoized (insn) >= 0
11471       && recog_memoized (dep) >= 0)
11472     {
11473       int shift_opnum = get_attr_shift (insn);
11474       enum attr_type attr_type = get_attr_type (dep);
11475
11476       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11477          operand for INSN.  If we have a shifted input operand and the
11478          instruction we depend on is another ALU instruction, then we may
11479          have to account for an additional stall.  */
11480       if (shift_opnum != 0
11481           && (attr_type == TYPE_ALU_SHIFT_IMM
11482               || attr_type == TYPE_ALUS_SHIFT_IMM
11483               || attr_type == TYPE_LOGIC_SHIFT_IMM
11484               || attr_type == TYPE_LOGICS_SHIFT_IMM
11485               || attr_type == TYPE_ALU_SHIFT_REG
11486               || attr_type == TYPE_ALUS_SHIFT_REG
11487               || attr_type == TYPE_LOGIC_SHIFT_REG
11488               || attr_type == TYPE_LOGICS_SHIFT_REG
11489               || attr_type == TYPE_MOV_SHIFT
11490               || attr_type == TYPE_MVN_SHIFT
11491               || attr_type == TYPE_MOV_SHIFT_REG
11492               || attr_type == TYPE_MVN_SHIFT_REG))
11493         {
11494           rtx shifted_operand;
11495           int opno;
11496
11497           /* Get the shifted operand.  */
11498           extract_insn (insn);
11499           shifted_operand = recog_data.operand[shift_opnum];
11500
11501           /* Iterate over all the operands in DEP.  If we write an operand
11502              that overlaps with SHIFTED_OPERAND, then we have increase the
11503              cost of this dependency.  */
11504           extract_insn (dep);
11505           preprocess_constraints (dep);
11506           for (opno = 0; opno < recog_data.n_operands; opno++)
11507             {
11508               /* We can ignore strict inputs.  */
11509               if (recog_data.operand_type[opno] == OP_IN)
11510                 continue;
11511
11512               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11513                                            shifted_operand))
11514                 {
11515                   *cost = 2;
11516                   return false;
11517                 }
11518             }
11519         }
11520     }
11521   return true;
11522 }
11523
11524 /* Adjust cost hook for Cortex A9.  */
11525 static bool
11526 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11527 {
11528   switch (REG_NOTE_KIND (link))
11529     {
11530     case REG_DEP_ANTI:
11531       *cost = 0;
11532       return false;
11533
11534     case REG_DEP_TRUE:
11535     case REG_DEP_OUTPUT:
11536         if (recog_memoized (insn) >= 0
11537             && recog_memoized (dep) >= 0)
11538           {
11539             if (GET_CODE (PATTERN (insn)) == SET)
11540               {
11541                 if (GET_MODE_CLASS
11542                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11543                   || GET_MODE_CLASS
11544                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11545                   {
11546                     enum attr_type attr_type_insn = get_attr_type (insn);
11547                     enum attr_type attr_type_dep = get_attr_type (dep);
11548
11549                     /* By default all dependencies of the form
11550                        s0 = s0 <op> s1
11551                        s0 = s0 <op> s2
11552                        have an extra latency of 1 cycle because
11553                        of the input and output dependency in this
11554                        case. However this gets modeled as an true
11555                        dependency and hence all these checks.  */
11556                     if (REG_P (SET_DEST (PATTERN (insn)))
11557                         && REG_P (SET_DEST (PATTERN (dep)))
11558                         && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11559                                                     SET_DEST (PATTERN (dep))))
11560                       {
11561                         /* FMACS is a special case where the dependent
11562                            instruction can be issued 3 cycles before
11563                            the normal latency in case of an output
11564                            dependency.  */
11565                         if ((attr_type_insn == TYPE_FMACS
11566                              || attr_type_insn == TYPE_FMACD)
11567                             && (attr_type_dep == TYPE_FMACS
11568                                 || attr_type_dep == TYPE_FMACD))
11569                           {
11570                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11571                               *cost = insn_default_latency (dep) - 3;
11572                             else
11573                               *cost = insn_default_latency (dep);
11574                             return false;
11575                           }
11576                         else
11577                           {
11578                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11579                               *cost = insn_default_latency (dep) + 1;
11580                             else
11581                               *cost = insn_default_latency (dep);
11582                           }
11583                         return false;
11584                       }
11585                   }
11586               }
11587           }
11588         break;
11589
11590     default:
11591       gcc_unreachable ();
11592     }
11593
11594   return true;
11595 }
11596
11597 /* Adjust cost hook for FA726TE.  */
11598 static bool
11599 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11600 {
11601   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11602      have penalty of 3.  */
11603   if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11604       && recog_memoized (insn) >= 0
11605       && recog_memoized (dep) >= 0
11606       && get_attr_conds (dep) == CONDS_SET)
11607     {
11608       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11609       if (get_attr_conds (insn) == CONDS_USE
11610           && get_attr_type (insn) != TYPE_BRANCH)
11611         {
11612           *cost = 3;
11613           return false;
11614         }
11615
11616       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11617           || get_attr_conds (insn) == CONDS_USE)
11618         {
11619           *cost = 0;
11620           return false;
11621         }
11622     }
11623
11624   return true;
11625 }
11626
11627 /* Implement TARGET_REGISTER_MOVE_COST.
11628
11629    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11630    it is typically more expensive than a single memory access.  We set
11631    the cost to less than two memory accesses so that floating
11632    point to integer conversion does not go through memory.  */
11633
11634 int
11635 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11636                         reg_class_t from, reg_class_t to)
11637 {
11638   if (TARGET_32BIT)
11639     {
11640       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11641           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11642         return 15;
11643       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11644                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11645         return 4;
11646       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11647         return 20;
11648       else
11649         return 2;
11650     }
11651   else
11652     {
11653       if (from == HI_REGS || to == HI_REGS)
11654         return 4;
11655       else
11656         return 2;
11657     }
11658 }
11659
11660 /* Implement TARGET_MEMORY_MOVE_COST.  */
11661
11662 int
11663 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11664                       bool in ATTRIBUTE_UNUSED)
11665 {
11666   if (TARGET_32BIT)
11667     return 10;
11668   else
11669     {
11670       if (GET_MODE_SIZE (mode) < 4)
11671         return 8;
11672       else
11673         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11674     }
11675 }
11676
11677 /* Vectorizer cost model implementation.  */
11678
11679 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11680 static int
11681 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11682                                 tree vectype,
11683                                 int misalign ATTRIBUTE_UNUSED)
11684 {
11685   unsigned elements;
11686
11687   switch (type_of_cost)
11688     {
11689       case scalar_stmt:
11690         return current_tune->vec_costs->scalar_stmt_cost;
11691
11692       case scalar_load:
11693         return current_tune->vec_costs->scalar_load_cost;
11694
11695       case scalar_store:
11696         return current_tune->vec_costs->scalar_store_cost;
11697
11698       case vector_stmt:
11699         return current_tune->vec_costs->vec_stmt_cost;
11700
11701       case vector_load:
11702         return current_tune->vec_costs->vec_align_load_cost;
11703
11704       case vector_store:
11705         return current_tune->vec_costs->vec_store_cost;
11706
11707       case vec_to_scalar:
11708         return current_tune->vec_costs->vec_to_scalar_cost;
11709
11710       case scalar_to_vec:
11711         return current_tune->vec_costs->scalar_to_vec_cost;
11712
11713       case unaligned_load:
11714         return current_tune->vec_costs->vec_unalign_load_cost;
11715
11716       case unaligned_store:
11717         return current_tune->vec_costs->vec_unalign_store_cost;
11718
11719       case cond_branch_taken:
11720         return current_tune->vec_costs->cond_taken_branch_cost;
11721
11722       case cond_branch_not_taken:
11723         return current_tune->vec_costs->cond_not_taken_branch_cost;
11724
11725       case vec_perm:
11726       case vec_promote_demote:
11727         return current_tune->vec_costs->vec_stmt_cost;
11728
11729       case vec_construct:
11730         elements = TYPE_VECTOR_SUBPARTS (vectype);
11731         return elements / 2 + 1;
11732
11733       default:
11734         gcc_unreachable ();
11735     }
11736 }
11737
11738 /* Implement targetm.vectorize.add_stmt_cost.  */
11739
11740 static unsigned
11741 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11742                    struct _stmt_vec_info *stmt_info, int misalign,
11743                    enum vect_cost_model_location where)
11744 {
11745   unsigned *cost = (unsigned *) data;
11746   unsigned retval = 0;
11747
11748   if (flag_vect_cost_model)
11749     {
11750       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11751       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11752
11753       /* Statements in an inner loop relative to the loop being
11754          vectorized are weighted more heavily.  The value here is
11755          arbitrary and could potentially be improved with analysis.  */
11756       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11757         count *= 50;  /* FIXME.  */
11758
11759       retval = (unsigned) (count * stmt_cost);
11760       cost[where] += retval;
11761     }
11762
11763   return retval;
11764 }
11765
11766 /* Return true if and only if this insn can dual-issue only as older.  */
11767 static bool
11768 cortexa7_older_only (rtx_insn *insn)
11769 {
11770   if (recog_memoized (insn) < 0)
11771     return false;
11772
11773   switch (get_attr_type (insn))
11774     {
11775     case TYPE_ALU_DSP_REG:
11776     case TYPE_ALU_SREG:
11777     case TYPE_ALUS_SREG:
11778     case TYPE_LOGIC_REG:
11779     case TYPE_LOGICS_REG:
11780     case TYPE_ADC_REG:
11781     case TYPE_ADCS_REG:
11782     case TYPE_ADR:
11783     case TYPE_BFM:
11784     case TYPE_REV:
11785     case TYPE_MVN_REG:
11786     case TYPE_SHIFT_IMM:
11787     case TYPE_SHIFT_REG:
11788     case TYPE_LOAD_BYTE:
11789     case TYPE_LOAD1:
11790     case TYPE_STORE1:
11791     case TYPE_FFARITHS:
11792     case TYPE_FADDS:
11793     case TYPE_FFARITHD:
11794     case TYPE_FADDD:
11795     case TYPE_FMOV:
11796     case TYPE_F_CVT:
11797     case TYPE_FCMPS:
11798     case TYPE_FCMPD:
11799     case TYPE_FCONSTS:
11800     case TYPE_FCONSTD:
11801     case TYPE_FMULS:
11802     case TYPE_FMACS:
11803     case TYPE_FMULD:
11804     case TYPE_FMACD:
11805     case TYPE_FDIVS:
11806     case TYPE_FDIVD:
11807     case TYPE_F_MRC:
11808     case TYPE_F_MRRC:
11809     case TYPE_F_FLAG:
11810     case TYPE_F_LOADS:
11811     case TYPE_F_STORES:
11812       return true;
11813     default:
11814       return false;
11815     }
11816 }
11817
11818 /* Return true if and only if this insn can dual-issue as younger.  */
11819 static bool
11820 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11821 {
11822   if (recog_memoized (insn) < 0)
11823     {
11824       if (verbose > 5)
11825         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11826       return false;
11827     }
11828
11829   switch (get_attr_type (insn))
11830     {
11831     case TYPE_ALU_IMM:
11832     case TYPE_ALUS_IMM:
11833     case TYPE_LOGIC_IMM:
11834     case TYPE_LOGICS_IMM:
11835     case TYPE_EXTEND:
11836     case TYPE_MVN_IMM:
11837     case TYPE_MOV_IMM:
11838     case TYPE_MOV_REG:
11839     case TYPE_MOV_SHIFT:
11840     case TYPE_MOV_SHIFT_REG:
11841     case TYPE_BRANCH:
11842     case TYPE_CALL:
11843       return true;
11844     default:
11845       return false;
11846     }
11847 }
11848
11849
11850 /* Look for an instruction that can dual issue only as an older
11851    instruction, and move it in front of any instructions that can
11852    dual-issue as younger, while preserving the relative order of all
11853    other instructions in the ready list.  This is a hueuristic to help
11854    dual-issue in later cycles, by postponing issue of more flexible
11855    instructions.  This heuristic may affect dual issue opportunities
11856    in the current cycle.  */
11857 static void
11858 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11859                         int *n_readyp, int clock)
11860 {
11861   int i;
11862   int first_older_only = -1, first_younger = -1;
11863
11864   if (verbose > 5)
11865     fprintf (file,
11866              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11867              clock,
11868              *n_readyp);
11869
11870   /* Traverse the ready list from the head (the instruction to issue
11871      first), and looking for the first instruction that can issue as
11872      younger and the first instruction that can dual-issue only as
11873      older.  */
11874   for (i = *n_readyp - 1; i >= 0; i--)
11875     {
11876       rtx_insn *insn = ready[i];
11877       if (cortexa7_older_only (insn))
11878         {
11879           first_older_only = i;
11880           if (verbose > 5)
11881             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11882           break;
11883         }
11884       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11885         first_younger = i;
11886     }
11887
11888   /* Nothing to reorder because either no younger insn found or insn
11889      that can dual-issue only as older appears before any insn that
11890      can dual-issue as younger.  */
11891   if (first_younger == -1)
11892     {
11893       if (verbose > 5)
11894         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11895       return;
11896     }
11897
11898   /* Nothing to reorder because no older-only insn in the ready list.  */
11899   if (first_older_only == -1)
11900     {
11901       if (verbose > 5)
11902         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11903       return;
11904     }
11905
11906   /* Move first_older_only insn before first_younger.  */
11907   if (verbose > 5)
11908     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11909              INSN_UID(ready [first_older_only]),
11910              INSN_UID(ready [first_younger]));
11911   rtx_insn *first_older_only_insn = ready [first_older_only];
11912   for (i = first_older_only; i < first_younger; i++)
11913     {
11914       ready[i] = ready[i+1];
11915     }
11916
11917   ready[i] = first_older_only_insn;
11918   return;
11919 }
11920
11921 /* Implement TARGET_SCHED_REORDER. */
11922 static int
11923 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11924                    int clock)
11925 {
11926   switch (arm_tune)
11927     {
11928     case cortexa7:
11929       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11930       break;
11931     default:
11932       /* Do nothing for other cores.  */
11933       break;
11934     }
11935
11936   return arm_issue_rate ();
11937 }
11938
11939 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11940    It corrects the value of COST based on the relationship between
11941    INSN and DEP through the dependence LINK.  It returns the new
11942    value. There is a per-core adjust_cost hook to adjust scheduler costs
11943    and the per-core hook can choose to completely override the generic
11944    adjust_cost function. Only put bits of code into arm_adjust_cost that
11945    are common across all cores.  */
11946 static int
11947 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11948 {
11949   rtx i_pat, d_pat;
11950
11951  /* When generating Thumb-1 code, we want to place flag-setting operations
11952     close to a conditional branch which depends on them, so that we can
11953     omit the comparison. */
11954   if (TARGET_THUMB1
11955       && REG_NOTE_KIND (link) == 0
11956       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11957       && recog_memoized (dep) >= 0
11958       && get_attr_conds (dep) == CONDS_SET)
11959     return 0;
11960
11961   if (current_tune->sched_adjust_cost != NULL)
11962     {
11963       if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11964         return cost;
11965     }
11966
11967   /* XXX Is this strictly true?  */
11968   if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11969       || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11970     return 0;
11971
11972   /* Call insns don't incur a stall, even if they follow a load.  */
11973   if (REG_NOTE_KIND (link) == 0
11974       && CALL_P (insn))
11975     return 1;
11976
11977   if ((i_pat = single_set (insn)) != NULL
11978       && MEM_P (SET_SRC (i_pat))
11979       && (d_pat = single_set (dep)) != NULL
11980       && MEM_P (SET_DEST (d_pat)))
11981     {
11982       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11983       /* This is a load after a store, there is no conflict if the load reads
11984          from a cached area.  Assume that loads from the stack, and from the
11985          constant pool are cached, and that others will miss.  This is a
11986          hack.  */
11987
11988       if ((GET_CODE (src_mem) == SYMBOL_REF
11989            && CONSTANT_POOL_ADDRESS_P (src_mem))
11990           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11991           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11992           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11993         return 1;
11994     }
11995
11996   return cost;
11997 }
11998
11999 int
12000 arm_max_conditional_execute (void)
12001 {
12002   return max_insns_skipped;
12003 }
12004
12005 static int
12006 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12007 {
12008   if (TARGET_32BIT)
12009     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12010   else
12011     return (optimize > 0) ? 2 : 0;
12012 }
12013
12014 static int
12015 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12016 {
12017   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12018 }
12019
12020 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12021    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12022    sequences of non-executed instructions in IT blocks probably take the same
12023    amount of time as executed instructions (and the IT instruction itself takes
12024    space in icache).  This function was experimentally determined to give good
12025    results on a popular embedded benchmark.  */
12026
12027 static int
12028 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12029 {
12030   return (TARGET_32BIT && speed_p) ? 1
12031          : arm_default_branch_cost (speed_p, predictable_p);
12032 }
12033
12034 static bool fp_consts_inited = false;
12035
12036 static REAL_VALUE_TYPE value_fp0;
12037
12038 static void
12039 init_fp_table (void)
12040 {
12041   REAL_VALUE_TYPE r;
12042
12043   r = REAL_VALUE_ATOF ("0", DFmode);
12044   value_fp0 = r;
12045   fp_consts_inited = true;
12046 }
12047
12048 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12049 int
12050 arm_const_double_rtx (rtx x)
12051 {
12052   REAL_VALUE_TYPE r;
12053
12054   if (!fp_consts_inited)
12055     init_fp_table ();
12056
12057   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12058   if (REAL_VALUE_MINUS_ZERO (r))
12059     return 0;
12060
12061   if (REAL_VALUES_EQUAL (r, value_fp0))
12062     return 1;
12063
12064   return 0;
12065 }
12066
12067 /* VFPv3 has a fairly wide range of representable immediates, formed from
12068    "quarter-precision" floating-point values. These can be evaluated using this
12069    formula (with ^ for exponentiation):
12070
12071      -1^s * n * 2^-r
12072
12073    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12074    16 <= n <= 31 and 0 <= r <= 7.
12075
12076    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12077
12078      - A (most-significant) is the sign bit.
12079      - BCD are the exponent (encoded as r XOR 3).
12080      - EFGH are the mantissa (encoded as n - 16).
12081 */
12082
12083 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12084    fconst[sd] instruction, or -1 if X isn't suitable.  */
12085 static int
12086 vfp3_const_double_index (rtx x)
12087 {
12088   REAL_VALUE_TYPE r, m;
12089   int sign, exponent;
12090   unsigned HOST_WIDE_INT mantissa, mant_hi;
12091   unsigned HOST_WIDE_INT mask;
12092   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12093   bool fail;
12094
12095   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12096     return -1;
12097
12098   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12099
12100   /* We can't represent these things, so detect them first.  */
12101   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12102     return -1;
12103
12104   /* Extract sign, exponent and mantissa.  */
12105   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12106   r = real_value_abs (&r);
12107   exponent = REAL_EXP (&r);
12108   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12109      highest (sign) bit, with a fixed binary point at bit point_pos.
12110      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12111      bits for the mantissa, this may fail (low bits would be lost).  */
12112   real_ldexp (&m, &r, point_pos - exponent);
12113   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12114   mantissa = w.elt (0);
12115   mant_hi = w.elt (1);
12116
12117   /* If there are bits set in the low part of the mantissa, we can't
12118      represent this value.  */
12119   if (mantissa != 0)
12120     return -1;
12121
12122   /* Now make it so that mantissa contains the most-significant bits, and move
12123      the point_pos to indicate that the least-significant bits have been
12124      discarded.  */
12125   point_pos -= HOST_BITS_PER_WIDE_INT;
12126   mantissa = mant_hi;
12127
12128   /* We can permit four significant bits of mantissa only, plus a high bit
12129      which is always 1.  */
12130   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12131   if ((mantissa & mask) != 0)
12132     return -1;
12133
12134   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12135   mantissa >>= point_pos - 5;
12136
12137   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12138      floating-point immediate zero with Neon using an integer-zero load, but
12139      that case is handled elsewhere.)  */
12140   if (mantissa == 0)
12141     return -1;
12142
12143   gcc_assert (mantissa >= 16 && mantissa <= 31);
12144
12145   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12146      normalized significands are in the range [1, 2). (Our mantissa is shifted
12147      left 4 places at this point relative to normalized IEEE754 values).  GCC
12148      internally uses [0.5, 1) (see real.c), so the exponent returned from
12149      REAL_EXP must be altered.  */
12150   exponent = 5 - exponent;
12151
12152   if (exponent < 0 || exponent > 7)
12153     return -1;
12154
12155   /* Sign, mantissa and exponent are now in the correct form to plug into the
12156      formula described in the comment above.  */
12157   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12158 }
12159
12160 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12161 int
12162 vfp3_const_double_rtx (rtx x)
12163 {
12164   if (!TARGET_VFP3)
12165     return 0;
12166
12167   return vfp3_const_double_index (x) != -1;
12168 }
12169
12170 /* Recognize immediates which can be used in various Neon instructions. Legal
12171    immediates are described by the following table (for VMVN variants, the
12172    bitwise inverse of the constant shown is recognized. In either case, VMOV
12173    is output and the correct instruction to use for a given constant is chosen
12174    by the assembler). The constant shown is replicated across all elements of
12175    the destination vector.
12176
12177    insn elems variant constant (binary)
12178    ---- ----- ------- -----------------
12179    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12180    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12181    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12182    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12183    vmov  i16     4    00000000 abcdefgh
12184    vmov  i16     5    abcdefgh 00000000
12185    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12186    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12187    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12188    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12189    vmvn  i16    10    00000000 abcdefgh
12190    vmvn  i16    11    abcdefgh 00000000
12191    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12192    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12193    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12194    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12195    vmov   i8    16    abcdefgh
12196    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12197                       eeeeeeee ffffffff gggggggg hhhhhhhh
12198    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12199    vmov  f32    19    00000000 00000000 00000000 00000000
12200
12201    For case 18, B = !b. Representable values are exactly those accepted by
12202    vfp3_const_double_index, but are output as floating-point numbers rather
12203    than indices.
12204
12205    For case 19, we will change it to vmov.i32 when assembling.
12206
12207    Variants 0-5 (inclusive) may also be used as immediates for the second
12208    operand of VORR/VBIC instructions.
12209
12210    The INVERSE argument causes the bitwise inverse of the given operand to be
12211    recognized instead (used for recognizing legal immediates for the VAND/VORN
12212    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12213    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12214    output, rather than the real insns vbic/vorr).
12215
12216    INVERSE makes no difference to the recognition of float vectors.
12217
12218    The return value is the variant of immediate as shown in the above table, or
12219    -1 if the given value doesn't match any of the listed patterns.
12220 */
12221 static int
12222 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12223                       rtx *modconst, int *elementwidth)
12224 {
12225 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12226   matches = 1;                                  \
12227   for (i = 0; i < idx; i += (STRIDE))           \
12228     if (!(TEST))                                \
12229       matches = 0;                              \
12230   if (matches)                                  \
12231     {                                           \
12232       immtype = (CLASS);                        \
12233       elsize = (ELSIZE);                        \
12234       break;                                    \
12235     }
12236
12237   unsigned int i, elsize = 0, idx = 0, n_elts;
12238   unsigned int innersize;
12239   unsigned char bytes[16];
12240   int immtype = -1, matches;
12241   unsigned int invmask = inverse ? 0xff : 0;
12242   bool vector = GET_CODE (op) == CONST_VECTOR;
12243
12244   if (vector)
12245     {
12246       n_elts = CONST_VECTOR_NUNITS (op);
12247       innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12248     }
12249   else
12250     {
12251       n_elts = 1;
12252       if (mode == VOIDmode)
12253         mode = DImode;
12254       innersize = GET_MODE_SIZE (mode);
12255     }
12256
12257   /* Vectors of float constants.  */
12258   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12259     {
12260       rtx el0 = CONST_VECTOR_ELT (op, 0);
12261       REAL_VALUE_TYPE r0;
12262
12263       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12264         return -1;
12265
12266       REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12267
12268       for (i = 1; i < n_elts; i++)
12269         {
12270           rtx elt = CONST_VECTOR_ELT (op, i);
12271           REAL_VALUE_TYPE re;
12272
12273           REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12274
12275           if (!REAL_VALUES_EQUAL (r0, re))
12276             return -1;
12277         }
12278
12279       if (modconst)
12280         *modconst = CONST_VECTOR_ELT (op, 0);
12281
12282       if (elementwidth)
12283         *elementwidth = 0;
12284
12285       if (el0 == CONST0_RTX (GET_MODE (el0)))
12286         return 19;
12287       else
12288         return 18;
12289     }
12290
12291   /* Splat vector constant out into a byte vector.  */
12292   for (i = 0; i < n_elts; i++)
12293     {
12294       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12295       unsigned HOST_WIDE_INT elpart;
12296       unsigned int part, parts;
12297
12298       if (CONST_INT_P (el))
12299         {
12300           elpart = INTVAL (el);
12301           parts = 1;
12302         }
12303       else if (CONST_DOUBLE_P (el))
12304         {
12305           elpart = CONST_DOUBLE_LOW (el);
12306           parts = 2;
12307         }
12308       else
12309         gcc_unreachable ();
12310
12311       for (part = 0; part < parts; part++)
12312         {
12313           unsigned int byte;
12314           for (byte = 0; byte < innersize; byte++)
12315             {
12316               bytes[idx++] = (elpart & 0xff) ^ invmask;
12317               elpart >>= BITS_PER_UNIT;
12318             }
12319           if (CONST_DOUBLE_P (el))
12320             elpart = CONST_DOUBLE_HIGH (el);
12321         }
12322     }
12323
12324   /* Sanity check.  */
12325   gcc_assert (idx == GET_MODE_SIZE (mode));
12326
12327   do
12328     {
12329       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12330                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12331
12332       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12333                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12334
12335       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12336                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12337
12338       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12339                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12340
12341       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12342
12343       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12344
12345       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12346                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12347
12348       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12349                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12350
12351       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12352                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12353
12354       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12355                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12356
12357       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12358
12359       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12360
12361       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12362                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12363
12364       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12365                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12366
12367       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12368                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12369
12370       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12371                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12372
12373       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12374
12375       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12376                         && bytes[i] == bytes[(i + 8) % idx]);
12377     }
12378   while (0);
12379
12380   if (immtype == -1)
12381     return -1;
12382
12383   if (elementwidth)
12384     *elementwidth = elsize;
12385
12386   if (modconst)
12387     {
12388       unsigned HOST_WIDE_INT imm = 0;
12389
12390       /* Un-invert bytes of recognized vector, if necessary.  */
12391       if (invmask != 0)
12392         for (i = 0; i < idx; i++)
12393           bytes[i] ^= invmask;
12394
12395       if (immtype == 17)
12396         {
12397           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12398           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12399
12400           for (i = 0; i < 8; i++)
12401             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12402                    << (i * BITS_PER_UNIT);
12403
12404           *modconst = GEN_INT (imm);
12405         }
12406       else
12407         {
12408           unsigned HOST_WIDE_INT imm = 0;
12409
12410           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12411             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12412
12413           *modconst = GEN_INT (imm);
12414         }
12415     }
12416
12417   return immtype;
12418 #undef CHECK
12419 }
12420
12421 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12422    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12423    float elements), and a modified constant (whatever should be output for a
12424    VMOV) in *MODCONST.  */
12425
12426 int
12427 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12428                                rtx *modconst, int *elementwidth)
12429 {
12430   rtx tmpconst;
12431   int tmpwidth;
12432   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12433
12434   if (retval == -1)
12435     return 0;
12436
12437   if (modconst)
12438     *modconst = tmpconst;
12439
12440   if (elementwidth)
12441     *elementwidth = tmpwidth;
12442
12443   return 1;
12444 }
12445
12446 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12447    the immediate is valid, write a constant suitable for using as an operand
12448    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12449    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12450
12451 int
12452 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12453                                 rtx *modconst, int *elementwidth)
12454 {
12455   rtx tmpconst;
12456   int tmpwidth;
12457   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12458
12459   if (retval < 0 || retval > 5)
12460     return 0;
12461
12462   if (modconst)
12463     *modconst = tmpconst;
12464
12465   if (elementwidth)
12466     *elementwidth = tmpwidth;
12467
12468   return 1;
12469 }
12470
12471 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12472    the immediate is valid, write a constant suitable for using as an operand
12473    to VSHR/VSHL to *MODCONST and the corresponding element width to
12474    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12475    because they have different limitations.  */
12476
12477 int
12478 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12479                                 rtx *modconst, int *elementwidth,
12480                                 bool isleftshift)
12481 {
12482   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12483   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12484   unsigned HOST_WIDE_INT last_elt = 0;
12485   unsigned HOST_WIDE_INT maxshift;
12486
12487   /* Split vector constant out into a byte vector.  */
12488   for (i = 0; i < n_elts; i++)
12489     {
12490       rtx el = CONST_VECTOR_ELT (op, i);
12491       unsigned HOST_WIDE_INT elpart;
12492
12493       if (CONST_INT_P (el))
12494         elpart = INTVAL (el);
12495       else if (CONST_DOUBLE_P (el))
12496         return 0;
12497       else
12498         gcc_unreachable ();
12499
12500       if (i != 0 && elpart != last_elt)
12501         return 0;
12502
12503       last_elt = elpart;
12504     }
12505
12506   /* Shift less than element size.  */
12507   maxshift = innersize * 8;
12508
12509   if (isleftshift)
12510     {
12511       /* Left shift immediate value can be from 0 to <size>-1.  */
12512       if (last_elt >= maxshift)
12513         return 0;
12514     }
12515   else
12516     {
12517       /* Right shift immediate value can be from 1 to <size>.  */
12518       if (last_elt == 0 || last_elt > maxshift)
12519         return 0;
12520     }
12521
12522   if (elementwidth)
12523     *elementwidth = innersize * 8;
12524
12525   if (modconst)
12526     *modconst = CONST_VECTOR_ELT (op, 0);
12527
12528   return 1;
12529 }
12530
12531 /* Return a string suitable for output of Neon immediate logic operation
12532    MNEM.  */
12533
12534 char *
12535 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12536                              int inverse, int quad)
12537 {
12538   int width, is_valid;
12539   static char templ[40];
12540
12541   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12542
12543   gcc_assert (is_valid != 0);
12544
12545   if (quad)
12546     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12547   else
12548     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12549
12550   return templ;
12551 }
12552
12553 /* Return a string suitable for output of Neon immediate shift operation
12554    (VSHR or VSHL) MNEM.  */
12555
12556 char *
12557 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12558                              machine_mode mode, int quad,
12559                              bool isleftshift)
12560 {
12561   int width, is_valid;
12562   static char templ[40];
12563
12564   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12565   gcc_assert (is_valid != 0);
12566
12567   if (quad)
12568     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12569   else
12570     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12571
12572   return templ;
12573 }
12574
12575 /* Output a sequence of pairwise operations to implement a reduction.
12576    NOTE: We do "too much work" here, because pairwise operations work on two
12577    registers-worth of operands in one go. Unfortunately we can't exploit those
12578    extra calculations to do the full operation in fewer steps, I don't think.
12579    Although all vector elements of the result but the first are ignored, we
12580    actually calculate the same result in each of the elements. An alternative
12581    such as initially loading a vector with zero to use as each of the second
12582    operands would use up an additional register and take an extra instruction,
12583    for no particular gain.  */
12584
12585 void
12586 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12587                       rtx (*reduc) (rtx, rtx, rtx))
12588 {
12589   machine_mode inner = GET_MODE_INNER (mode);
12590   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12591   rtx tmpsum = op1;
12592
12593   for (i = parts / 2; i >= 1; i /= 2)
12594     {
12595       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12596       emit_insn (reduc (dest, tmpsum, tmpsum));
12597       tmpsum = dest;
12598     }
12599 }
12600
12601 /* If VALS is a vector constant that can be loaded into a register
12602    using VDUP, generate instructions to do so and return an RTX to
12603    assign to the register.  Otherwise return NULL_RTX.  */
12604
12605 static rtx
12606 neon_vdup_constant (rtx vals)
12607 {
12608   machine_mode mode = GET_MODE (vals);
12609   machine_mode inner_mode = GET_MODE_INNER (mode);
12610   int n_elts = GET_MODE_NUNITS (mode);
12611   bool all_same = true;
12612   rtx x;
12613   int i;
12614
12615   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12616     return NULL_RTX;
12617
12618   for (i = 0; i < n_elts; ++i)
12619     {
12620       x = XVECEXP (vals, 0, i);
12621       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12622         all_same = false;
12623     }
12624
12625   if (!all_same)
12626     /* The elements are not all the same.  We could handle repeating
12627        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12628        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12629        vdup.i16).  */
12630     return NULL_RTX;
12631
12632   /* We can load this constant by using VDUP and a constant in a
12633      single ARM register.  This will be cheaper than a vector
12634      load.  */
12635
12636   x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12637   return gen_rtx_VEC_DUPLICATE (mode, x);
12638 }
12639
12640 /* Generate code to load VALS, which is a PARALLEL containing only
12641    constants (for vec_init) or CONST_VECTOR, efficiently into a
12642    register.  Returns an RTX to copy into the register, or NULL_RTX
12643    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12644
12645 rtx
12646 neon_make_constant (rtx vals)
12647 {
12648   machine_mode mode = GET_MODE (vals);
12649   rtx target;
12650   rtx const_vec = NULL_RTX;
12651   int n_elts = GET_MODE_NUNITS (mode);
12652   int n_const = 0;
12653   int i;
12654
12655   if (GET_CODE (vals) == CONST_VECTOR)
12656     const_vec = vals;
12657   else if (GET_CODE (vals) == PARALLEL)
12658     {
12659       /* A CONST_VECTOR must contain only CONST_INTs and
12660          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12661          Only store valid constants in a CONST_VECTOR.  */
12662       for (i = 0; i < n_elts; ++i)
12663         {
12664           rtx x = XVECEXP (vals, 0, i);
12665           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12666             n_const++;
12667         }
12668       if (n_const == n_elts)
12669         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12670     }
12671   else
12672     gcc_unreachable ();
12673
12674   if (const_vec != NULL
12675       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12676     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12677     return const_vec;
12678   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12679     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12680        pipeline cycle; creating the constant takes one or two ARM
12681        pipeline cycles.  */
12682     return target;
12683   else if (const_vec != NULL_RTX)
12684     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12685        (for either double or quad vectors).  We can not take advantage
12686        of single-cycle VLD1 because we need a PC-relative addressing
12687        mode.  */
12688     return const_vec;
12689   else
12690     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12691        We can not construct an initializer.  */
12692     return NULL_RTX;
12693 }
12694
12695 /* Initialize vector TARGET to VALS.  */
12696
12697 void
12698 neon_expand_vector_init (rtx target, rtx vals)
12699 {
12700   machine_mode mode = GET_MODE (target);
12701   machine_mode inner_mode = GET_MODE_INNER (mode);
12702   int n_elts = GET_MODE_NUNITS (mode);
12703   int n_var = 0, one_var = -1;
12704   bool all_same = true;
12705   rtx x, mem;
12706   int i;
12707
12708   for (i = 0; i < n_elts; ++i)
12709     {
12710       x = XVECEXP (vals, 0, i);
12711       if (!CONSTANT_P (x))
12712         ++n_var, one_var = i;
12713
12714       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12715         all_same = false;
12716     }
12717
12718   if (n_var == 0)
12719     {
12720       rtx constant = neon_make_constant (vals);
12721       if (constant != NULL_RTX)
12722         {
12723           emit_move_insn (target, constant);
12724           return;
12725         }
12726     }
12727
12728   /* Splat a single non-constant element if we can.  */
12729   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12730     {
12731       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12732       emit_insn (gen_rtx_SET (VOIDmode, target,
12733                               gen_rtx_VEC_DUPLICATE (mode, x)));
12734       return;
12735     }
12736
12737   /* One field is non-constant.  Load constant then overwrite varying
12738      field.  This is more efficient than using the stack.  */
12739   if (n_var == 1)
12740     {
12741       rtx copy = copy_rtx (vals);
12742       rtx index = GEN_INT (one_var);
12743
12744       /* Load constant part of vector, substitute neighboring value for
12745          varying element.  */
12746       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12747       neon_expand_vector_init (target, copy);
12748
12749       /* Insert variable.  */
12750       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12751       switch (mode)
12752         {
12753         case V8QImode:
12754           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12755           break;
12756         case V16QImode:
12757           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12758           break;
12759         case V4HImode:
12760           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12761           break;
12762         case V8HImode:
12763           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12764           break;
12765         case V2SImode:
12766           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12767           break;
12768         case V4SImode:
12769           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12770           break;
12771         case V2SFmode:
12772           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12773           break;
12774         case V4SFmode:
12775           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12776           break;
12777         case V2DImode:
12778           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12779           break;
12780         default:
12781           gcc_unreachable ();
12782         }
12783       return;
12784     }
12785
12786   /* Construct the vector in memory one field at a time
12787      and load the whole vector.  */
12788   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12789   for (i = 0; i < n_elts; i++)
12790     emit_move_insn (adjust_address_nv (mem, inner_mode,
12791                                     i * GET_MODE_SIZE (inner_mode)),
12792                     XVECEXP (vals, 0, i));
12793   emit_move_insn (target, mem);
12794 }
12795
12796 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12797    ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
12798    reported source locations are bogus.  */
12799
12800 static void
12801 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12802               const char *err)
12803 {
12804   HOST_WIDE_INT lane;
12805
12806   gcc_assert (CONST_INT_P (operand));
12807
12808   lane = INTVAL (operand);
12809
12810   if (lane < low || lane >= high)
12811     error (err);
12812 }
12813
12814 /* Bounds-check lanes.  */
12815
12816 void
12817 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12818 {
12819   bounds_check (operand, low, high, "lane out of range");
12820 }
12821
12822 /* Bounds-check constants.  */
12823
12824 void
12825 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12826 {
12827   bounds_check (operand, low, high, "constant out of range");
12828 }
12829
12830 HOST_WIDE_INT
12831 neon_element_bits (machine_mode mode)
12832 {
12833   if (mode == DImode)
12834     return GET_MODE_BITSIZE (mode);
12835   else
12836     return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12837 }
12838
12839 \f
12840 /* Predicates for `match_operand' and `match_operator'.  */
12841
12842 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12843    WB is true if full writeback address modes are allowed and is false
12844    if limited writeback address modes (POST_INC and PRE_DEC) are
12845    allowed.  */
12846
12847 int
12848 arm_coproc_mem_operand (rtx op, bool wb)
12849 {
12850   rtx ind;
12851
12852   /* Reject eliminable registers.  */
12853   if (! (reload_in_progress || reload_completed || lra_in_progress)
12854       && (   reg_mentioned_p (frame_pointer_rtx, op)
12855           || reg_mentioned_p (arg_pointer_rtx, op)
12856           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12857           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12858           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12859           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12860     return FALSE;
12861
12862   /* Constants are converted into offsets from labels.  */
12863   if (!MEM_P (op))
12864     return FALSE;
12865
12866   ind = XEXP (op, 0);
12867
12868   if (reload_completed
12869       && (GET_CODE (ind) == LABEL_REF
12870           || (GET_CODE (ind) == CONST
12871               && GET_CODE (XEXP (ind, 0)) == PLUS
12872               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12873               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12874     return TRUE;
12875
12876   /* Match: (mem (reg)).  */
12877   if (REG_P (ind))
12878     return arm_address_register_rtx_p (ind, 0);
12879
12880   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12881      acceptable in any case (subject to verification by
12882      arm_address_register_rtx_p).  We need WB to be true to accept
12883      PRE_INC and POST_DEC.  */
12884   if (GET_CODE (ind) == POST_INC
12885       || GET_CODE (ind) == PRE_DEC
12886       || (wb
12887           && (GET_CODE (ind) == PRE_INC
12888               || GET_CODE (ind) == POST_DEC)))
12889     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12890
12891   if (wb
12892       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12893       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12894       && GET_CODE (XEXP (ind, 1)) == PLUS
12895       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12896     ind = XEXP (ind, 1);
12897
12898   /* Match:
12899      (plus (reg)
12900            (const)).  */
12901   if (GET_CODE (ind) == PLUS
12902       && REG_P (XEXP (ind, 0))
12903       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12904       && CONST_INT_P (XEXP (ind, 1))
12905       && INTVAL (XEXP (ind, 1)) > -1024
12906       && INTVAL (XEXP (ind, 1)) <  1024
12907       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12908     return TRUE;
12909
12910   return FALSE;
12911 }
12912
12913 /* Return TRUE if OP is a memory operand which we can load or store a vector
12914    to/from. TYPE is one of the following values:
12915     0 - Vector load/stor (vldr)
12916     1 - Core registers (ldm)
12917     2 - Element/structure loads (vld1)
12918  */
12919 int
12920 neon_vector_mem_operand (rtx op, int type, bool strict)
12921 {
12922   rtx ind;
12923
12924   /* Reject eliminable registers.  */
12925   if (! (reload_in_progress || reload_completed)
12926       && (   reg_mentioned_p (frame_pointer_rtx, op)
12927           || reg_mentioned_p (arg_pointer_rtx, op)
12928           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12929           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12930           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12931           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12932     return !strict;
12933
12934   /* Constants are converted into offsets from labels.  */
12935   if (!MEM_P (op))
12936     return FALSE;
12937
12938   ind = XEXP (op, 0);
12939
12940   if (reload_completed
12941       && (GET_CODE (ind) == LABEL_REF
12942           || (GET_CODE (ind) == CONST
12943               && GET_CODE (XEXP (ind, 0)) == PLUS
12944               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12945               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12946     return TRUE;
12947
12948   /* Match: (mem (reg)).  */
12949   if (REG_P (ind))
12950     return arm_address_register_rtx_p (ind, 0);
12951
12952   /* Allow post-increment with Neon registers.  */
12953   if ((type != 1 && GET_CODE (ind) == POST_INC)
12954       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12955     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12956
12957   /* Allow post-increment by register for VLDn */
12958   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12959       && GET_CODE (XEXP (ind, 1)) == PLUS
12960       && REG_P (XEXP (XEXP (ind, 1), 1)))
12961      return true;
12962
12963   /* Match:
12964      (plus (reg)
12965           (const)).  */
12966   if (type == 0
12967       && GET_CODE (ind) == PLUS
12968       && REG_P (XEXP (ind, 0))
12969       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12970       && CONST_INT_P (XEXP (ind, 1))
12971       && INTVAL (XEXP (ind, 1)) > -1024
12972       /* For quad modes, we restrict the constant offset to be slightly less
12973          than what the instruction format permits.  We have no such constraint
12974          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12975       && (INTVAL (XEXP (ind, 1))
12976           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12977       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12978     return TRUE;
12979
12980   return FALSE;
12981 }
12982
12983 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12984    type.  */
12985 int
12986 neon_struct_mem_operand (rtx op)
12987 {
12988   rtx ind;
12989
12990   /* Reject eliminable registers.  */
12991   if (! (reload_in_progress || reload_completed)
12992       && (   reg_mentioned_p (frame_pointer_rtx, op)
12993           || reg_mentioned_p (arg_pointer_rtx, op)
12994           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12995           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12996           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12997           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12998     return FALSE;
12999
13000   /* Constants are converted into offsets from labels.  */
13001   if (!MEM_P (op))
13002     return FALSE;
13003
13004   ind = XEXP (op, 0);
13005
13006   if (reload_completed
13007       && (GET_CODE (ind) == LABEL_REF
13008           || (GET_CODE (ind) == CONST
13009               && GET_CODE (XEXP (ind, 0)) == PLUS
13010               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13011               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13012     return TRUE;
13013
13014   /* Match: (mem (reg)).  */
13015   if (REG_P (ind))
13016     return arm_address_register_rtx_p (ind, 0);
13017
13018   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13019   if (GET_CODE (ind) == POST_INC
13020       || GET_CODE (ind) == PRE_DEC)
13021     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13022
13023   return FALSE;
13024 }
13025
13026 /* Return true if X is a register that will be eliminated later on.  */
13027 int
13028 arm_eliminable_register (rtx x)
13029 {
13030   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13031                        || REGNO (x) == ARG_POINTER_REGNUM
13032                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13033                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13034 }
13035
13036 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13037    coprocessor registers.  Otherwise return NO_REGS.  */
13038
13039 enum reg_class
13040 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13041 {
13042   if (mode == HFmode)
13043     {
13044       if (!TARGET_NEON_FP16)
13045         return GENERAL_REGS;
13046       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13047         return NO_REGS;
13048       return GENERAL_REGS;
13049     }
13050
13051   /* The neon move patterns handle all legitimate vector and struct
13052      addresses.  */
13053   if (TARGET_NEON
13054       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13055       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13056           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13057           || VALID_NEON_STRUCT_MODE (mode)))
13058     return NO_REGS;
13059
13060   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13061     return NO_REGS;
13062
13063   return GENERAL_REGS;
13064 }
13065
13066 /* Values which must be returned in the most-significant end of the return
13067    register.  */
13068
13069 static bool
13070 arm_return_in_msb (const_tree valtype)
13071 {
13072   return (TARGET_AAPCS_BASED
13073           && BYTES_BIG_ENDIAN
13074           && (AGGREGATE_TYPE_P (valtype)
13075               || TREE_CODE (valtype) == COMPLEX_TYPE
13076               || FIXED_POINT_TYPE_P (valtype)));
13077 }
13078
13079 /* Return TRUE if X references a SYMBOL_REF.  */
13080 int
13081 symbol_mentioned_p (rtx x)
13082 {
13083   const char * fmt;
13084   int i;
13085
13086   if (GET_CODE (x) == SYMBOL_REF)
13087     return 1;
13088
13089   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13090      are constant offsets, not symbols.  */
13091   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13092     return 0;
13093
13094   fmt = GET_RTX_FORMAT (GET_CODE (x));
13095
13096   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13097     {
13098       if (fmt[i] == 'E')
13099         {
13100           int j;
13101
13102           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13103             if (symbol_mentioned_p (XVECEXP (x, i, j)))
13104               return 1;
13105         }
13106       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13107         return 1;
13108     }
13109
13110   return 0;
13111 }
13112
13113 /* Return TRUE if X references a LABEL_REF.  */
13114 int
13115 label_mentioned_p (rtx x)
13116 {
13117   const char * fmt;
13118   int i;
13119
13120   if (GET_CODE (x) == LABEL_REF)
13121     return 1;
13122
13123   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13124      instruction, but they are constant offsets, not symbols.  */
13125   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13126     return 0;
13127
13128   fmt = GET_RTX_FORMAT (GET_CODE (x));
13129   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13130     {
13131       if (fmt[i] == 'E')
13132         {
13133           int j;
13134
13135           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13136             if (label_mentioned_p (XVECEXP (x, i, j)))
13137               return 1;
13138         }
13139       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13140         return 1;
13141     }
13142
13143   return 0;
13144 }
13145
13146 int
13147 tls_mentioned_p (rtx x)
13148 {
13149   switch (GET_CODE (x))
13150     {
13151     case CONST:
13152       return tls_mentioned_p (XEXP (x, 0));
13153
13154     case UNSPEC:
13155       if (XINT (x, 1) == UNSPEC_TLS)
13156         return 1;
13157
13158     default:
13159       return 0;
13160     }
13161 }
13162
13163 /* Must not copy any rtx that uses a pc-relative address.  */
13164
13165 static int
13166 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
13167 {
13168   if (GET_CODE (*x) == UNSPEC
13169       && (XINT (*x, 1) == UNSPEC_PIC_BASE
13170           || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
13171     return 1;
13172   return 0;
13173 }
13174
13175 static bool
13176 arm_cannot_copy_insn_p (rtx_insn *insn)
13177 {
13178   /* The tls call insn cannot be copied, as it is paired with a data
13179      word.  */
13180   if (recog_memoized (insn) == CODE_FOR_tlscall)
13181     return true;
13182
13183   return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
13184 }
13185
13186 enum rtx_code
13187 minmax_code (rtx x)
13188 {
13189   enum rtx_code code = GET_CODE (x);
13190
13191   switch (code)
13192     {
13193     case SMAX:
13194       return GE;
13195     case SMIN:
13196       return LE;
13197     case UMIN:
13198       return LEU;
13199     case UMAX:
13200       return GEU;
13201     default:
13202       gcc_unreachable ();
13203     }
13204 }
13205
13206 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
13207
13208 bool
13209 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13210                         int *mask, bool *signed_sat)
13211 {
13212   /* The high bound must be a power of two minus one.  */
13213   int log = exact_log2 (INTVAL (hi_bound) + 1);
13214   if (log == -1)
13215     return false;
13216
13217   /* The low bound is either zero (for usat) or one less than the
13218      negation of the high bound (for ssat).  */
13219   if (INTVAL (lo_bound) == 0)
13220     {
13221       if (mask)
13222         *mask = log;
13223       if (signed_sat)
13224         *signed_sat = false;
13225
13226       return true;
13227     }
13228
13229   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13230     {
13231       if (mask)
13232         *mask = log + 1;
13233       if (signed_sat)
13234         *signed_sat = true;
13235
13236       return true;
13237     }
13238
13239   return false;
13240 }
13241
13242 /* Return 1 if memory locations are adjacent.  */
13243 int
13244 adjacent_mem_locations (rtx a, rtx b)
13245 {
13246   /* We don't guarantee to preserve the order of these memory refs.  */
13247   if (volatile_refs_p (a) || volatile_refs_p (b))
13248     return 0;
13249
13250   if ((REG_P (XEXP (a, 0))
13251        || (GET_CODE (XEXP (a, 0)) == PLUS
13252            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13253       && (REG_P (XEXP (b, 0))
13254           || (GET_CODE (XEXP (b, 0)) == PLUS
13255               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13256     {
13257       HOST_WIDE_INT val0 = 0, val1 = 0;
13258       rtx reg0, reg1;
13259       int val_diff;
13260
13261       if (GET_CODE (XEXP (a, 0)) == PLUS)
13262         {
13263           reg0 = XEXP (XEXP (a, 0), 0);
13264           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13265         }
13266       else
13267         reg0 = XEXP (a, 0);
13268
13269       if (GET_CODE (XEXP (b, 0)) == PLUS)
13270         {
13271           reg1 = XEXP (XEXP (b, 0), 0);
13272           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13273         }
13274       else
13275         reg1 = XEXP (b, 0);
13276
13277       /* Don't accept any offset that will require multiple
13278          instructions to handle, since this would cause the
13279          arith_adjacentmem pattern to output an overlong sequence.  */
13280       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13281         return 0;
13282
13283       /* Don't allow an eliminable register: register elimination can make
13284          the offset too large.  */
13285       if (arm_eliminable_register (reg0))
13286         return 0;
13287
13288       val_diff = val1 - val0;
13289
13290       if (arm_ld_sched)
13291         {
13292           /* If the target has load delay slots, then there's no benefit
13293              to using an ldm instruction unless the offset is zero and
13294              we are optimizing for size.  */
13295           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13296                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13297                   && (val_diff == 4 || val_diff == -4));
13298         }
13299
13300       return ((REGNO (reg0) == REGNO (reg1))
13301               && (val_diff == 4 || val_diff == -4));
13302     }
13303
13304   return 0;
13305 }
13306
13307 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13308    for load operations, false for store operations.  CONSECUTIVE is true
13309    if the register numbers in the operation must be consecutive in the register
13310    bank. RETURN_PC is true if value is to be loaded in PC.
13311    The pattern we are trying to match for load is:
13312      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13313       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13314        :
13315        :
13316       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13317      ]
13318      where
13319      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13320      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13321      3.  If consecutive is TRUE, then for kth register being loaded,
13322          REGNO (R_dk) = REGNO (R_d0) + k.
13323    The pattern for store is similar.  */
13324 bool
13325 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13326                      bool consecutive, bool return_pc)
13327 {
13328   HOST_WIDE_INT count = XVECLEN (op, 0);
13329   rtx reg, mem, addr;
13330   unsigned regno;
13331   unsigned first_regno;
13332   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13333   rtx elt;
13334   bool addr_reg_in_reglist = false;
13335   bool update = false;
13336   int reg_increment;
13337   int offset_adj;
13338   int regs_per_val;
13339
13340   /* If not in SImode, then registers must be consecutive
13341      (e.g., VLDM instructions for DFmode).  */
13342   gcc_assert ((mode == SImode) || consecutive);
13343   /* Setting return_pc for stores is illegal.  */
13344   gcc_assert (!return_pc || load);
13345
13346   /* Set up the increments and the regs per val based on the mode.  */
13347   reg_increment = GET_MODE_SIZE (mode);
13348   regs_per_val = reg_increment / 4;
13349   offset_adj = return_pc ? 1 : 0;
13350
13351   if (count <= 1
13352       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13353       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13354     return false;
13355
13356   /* Check if this is a write-back.  */
13357   elt = XVECEXP (op, 0, offset_adj);
13358   if (GET_CODE (SET_SRC (elt)) == PLUS)
13359     {
13360       i++;
13361       base = 1;
13362       update = true;
13363
13364       /* The offset adjustment must be the number of registers being
13365          popped times the size of a single register.  */
13366       if (!REG_P (SET_DEST (elt))
13367           || !REG_P (XEXP (SET_SRC (elt), 0))
13368           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13369           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13370           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13371              ((count - 1 - offset_adj) * reg_increment))
13372         return false;
13373     }
13374
13375   i = i + offset_adj;
13376   base = base + offset_adj;
13377   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13378      success depends on the type: VLDM can do just one reg,
13379      LDM must do at least two.  */
13380   if ((count <= i) && (mode == SImode))
13381       return false;
13382
13383   elt = XVECEXP (op, 0, i - 1);
13384   if (GET_CODE (elt) != SET)
13385     return false;
13386
13387   if (load)
13388     {
13389       reg = SET_DEST (elt);
13390       mem = SET_SRC (elt);
13391     }
13392   else
13393     {
13394       reg = SET_SRC (elt);
13395       mem = SET_DEST (elt);
13396     }
13397
13398   if (!REG_P (reg) || !MEM_P (mem))
13399     return false;
13400
13401   regno = REGNO (reg);
13402   first_regno = regno;
13403   addr = XEXP (mem, 0);
13404   if (GET_CODE (addr) == PLUS)
13405     {
13406       if (!CONST_INT_P (XEXP (addr, 1)))
13407         return false;
13408
13409       offset = INTVAL (XEXP (addr, 1));
13410       addr = XEXP (addr, 0);
13411     }
13412
13413   if (!REG_P (addr))
13414     return false;
13415
13416   /* Don't allow SP to be loaded unless it is also the base register. It
13417      guarantees that SP is reset correctly when an LDM instruction
13418      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13419   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13420     return false;
13421
13422   for (; i < count; i++)
13423     {
13424       elt = XVECEXP (op, 0, i);
13425       if (GET_CODE (elt) != SET)
13426         return false;
13427
13428       if (load)
13429         {
13430           reg = SET_DEST (elt);
13431           mem = SET_SRC (elt);
13432         }
13433       else
13434         {
13435           reg = SET_SRC (elt);
13436           mem = SET_DEST (elt);
13437         }
13438
13439       if (!REG_P (reg)
13440           || GET_MODE (reg) != mode
13441           || REGNO (reg) <= regno
13442           || (consecutive
13443               && (REGNO (reg) !=
13444                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13445           /* Don't allow SP to be loaded unless it is also the base register. It
13446              guarantees that SP is reset correctly when an LDM instruction
13447              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13448           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13449           || !MEM_P (mem)
13450           || GET_MODE (mem) != mode
13451           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13452                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13453                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13454                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13455                    offset + (i - base) * reg_increment))
13456               && (!REG_P (XEXP (mem, 0))
13457                   || offset + (i - base) * reg_increment != 0)))
13458         return false;
13459
13460       regno = REGNO (reg);
13461       if (regno == REGNO (addr))
13462         addr_reg_in_reglist = true;
13463     }
13464
13465   if (load)
13466     {
13467       if (update && addr_reg_in_reglist)
13468         return false;
13469
13470       /* For Thumb-1, address register is always modified - either by write-back
13471          or by explicit load.  If the pattern does not describe an update,
13472          then the address register must be in the list of loaded registers.  */
13473       if (TARGET_THUMB1)
13474         return update || addr_reg_in_reglist;
13475     }
13476
13477   return true;
13478 }
13479
13480 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13481    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13482    instruction.  ADD_OFFSET is nonzero if the base address register needs
13483    to be modified with an add instruction before we can use it.  */
13484
13485 static bool
13486 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13487                                  int nops, HOST_WIDE_INT add_offset)
13488  {
13489   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13490      if the offset isn't small enough.  The reason 2 ldrs are faster
13491      is because these ARMs are able to do more than one cache access
13492      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13493      whilst the ARM8 has a double bandwidth cache.  This means that
13494      these cores can do both an instruction fetch and a data fetch in
13495      a single cycle, so the trick of calculating the address into a
13496      scratch register (one of the result regs) and then doing a load
13497      multiple actually becomes slower (and no smaller in code size).
13498      That is the transformation
13499
13500         ldr     rd1, [rbase + offset]
13501         ldr     rd2, [rbase + offset + 4]
13502
13503      to
13504
13505         add     rd1, rbase, offset
13506         ldmia   rd1, {rd1, rd2}
13507
13508      produces worse code -- '3 cycles + any stalls on rd2' instead of
13509      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13510      access per cycle, the first sequence could never complete in less
13511      than 6 cycles, whereas the ldm sequence would only take 5 and
13512      would make better use of sequential accesses if not hitting the
13513      cache.
13514
13515      We cheat here and test 'arm_ld_sched' which we currently know to
13516      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13517      changes, then the test below needs to be reworked.  */
13518   if (nops == 2 && arm_ld_sched && add_offset != 0)
13519     return false;
13520
13521   /* XScale has load-store double instructions, but they have stricter
13522      alignment requirements than load-store multiple, so we cannot
13523      use them.
13524
13525      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13526      the pipeline until completion.
13527
13528         NREGS           CYCLES
13529           1               3
13530           2               4
13531           3               5
13532           4               6
13533
13534      An ldr instruction takes 1-3 cycles, but does not block the
13535      pipeline.
13536
13537         NREGS           CYCLES
13538           1              1-3
13539           2              2-6
13540           3              3-9
13541           4              4-12
13542
13543      Best case ldr will always win.  However, the more ldr instructions
13544      we issue, the less likely we are to be able to schedule them well.
13545      Using ldr instructions also increases code size.
13546
13547      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13548      for counts of 3 or 4 regs.  */
13549   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13550     return false;
13551   return true;
13552 }
13553
13554 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13555    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13556    an array ORDER which describes the sequence to use when accessing the
13557    offsets that produces an ascending order.  In this sequence, each
13558    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13559    must have been filled in with the lowest offset by the caller.
13560    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13561    we use to verify that ORDER produces an ascending order of registers.
13562    Return true if it was possible to construct such an order, false if
13563    not.  */
13564
13565 static bool
13566 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13567                       int *unsorted_regs)
13568 {
13569   int i;
13570   for (i = 1; i < nops; i++)
13571     {
13572       int j;
13573
13574       order[i] = order[i - 1];
13575       for (j = 0; j < nops; j++)
13576         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13577           {
13578             /* We must find exactly one offset that is higher than the
13579                previous one by 4.  */
13580             if (order[i] != order[i - 1])
13581               return false;
13582             order[i] = j;
13583           }
13584       if (order[i] == order[i - 1])
13585         return false;
13586       /* The register numbers must be ascending.  */
13587       if (unsorted_regs != NULL
13588           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13589         return false;
13590     }
13591   return true;
13592 }
13593
13594 /* Used to determine in a peephole whether a sequence of load
13595    instructions can be changed into a load-multiple instruction.
13596    NOPS is the number of separate load instructions we are examining.  The
13597    first NOPS entries in OPERANDS are the destination registers, the
13598    next NOPS entries are memory operands.  If this function is
13599    successful, *BASE is set to the common base register of the memory
13600    accesses; *LOAD_OFFSET is set to the first memory location's offset
13601    from that base register.
13602    REGS is an array filled in with the destination register numbers.
13603    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13604    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13605    the sequence of registers in REGS matches the loads from ascending memory
13606    locations, and the function verifies that the register numbers are
13607    themselves ascending.  If CHECK_REGS is false, the register numbers
13608    are stored in the order they are found in the operands.  */
13609 static int
13610 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13611                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13612 {
13613   int unsorted_regs[MAX_LDM_STM_OPS];
13614   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13615   int order[MAX_LDM_STM_OPS];
13616   rtx base_reg_rtx = NULL;
13617   int base_reg = -1;
13618   int i, ldm_case;
13619
13620   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13621      easily extended if required.  */
13622   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13623
13624   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13625
13626   /* Loop over the operands and check that the memory references are
13627      suitable (i.e. immediate offsets from the same base register).  At
13628      the same time, extract the target register, and the memory
13629      offsets.  */
13630   for (i = 0; i < nops; i++)
13631     {
13632       rtx reg;
13633       rtx offset;
13634
13635       /* Convert a subreg of a mem into the mem itself.  */
13636       if (GET_CODE (operands[nops + i]) == SUBREG)
13637         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13638
13639       gcc_assert (MEM_P (operands[nops + i]));
13640
13641       /* Don't reorder volatile memory references; it doesn't seem worth
13642          looking for the case where the order is ok anyway.  */
13643       if (MEM_VOLATILE_P (operands[nops + i]))
13644         return 0;
13645
13646       offset = const0_rtx;
13647
13648       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13649            || (GET_CODE (reg) == SUBREG
13650                && REG_P (reg = SUBREG_REG (reg))))
13651           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13652               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13653                   || (GET_CODE (reg) == SUBREG
13654                       && REG_P (reg = SUBREG_REG (reg))))
13655               && (CONST_INT_P (offset
13656                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13657         {
13658           if (i == 0)
13659             {
13660               base_reg = REGNO (reg);
13661               base_reg_rtx = reg;
13662               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13663                 return 0;
13664             }
13665           else if (base_reg != (int) REGNO (reg))
13666             /* Not addressed from the same base register.  */
13667             return 0;
13668
13669           unsorted_regs[i] = (REG_P (operands[i])
13670                               ? REGNO (operands[i])
13671                               : REGNO (SUBREG_REG (operands[i])));
13672
13673           /* If it isn't an integer register, or if it overwrites the
13674              base register but isn't the last insn in the list, then
13675              we can't do this.  */
13676           if (unsorted_regs[i] < 0
13677               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13678               || unsorted_regs[i] > 14
13679               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13680             return 0;
13681
13682           /* Don't allow SP to be loaded unless it is also the base
13683              register.  It guarantees that SP is reset correctly when
13684              an LDM instruction is interrupted.  Otherwise, we might
13685              end up with a corrupt stack.  */
13686           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13687             return 0;
13688
13689           unsorted_offsets[i] = INTVAL (offset);
13690           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13691             order[0] = i;
13692         }
13693       else
13694         /* Not a suitable memory address.  */
13695         return 0;
13696     }
13697
13698   /* All the useful information has now been extracted from the
13699      operands into unsorted_regs and unsorted_offsets; additionally,
13700      order[0] has been set to the lowest offset in the list.  Sort
13701      the offsets into order, verifying that they are adjacent, and
13702      check that the register numbers are ascending.  */
13703   if (!compute_offset_order (nops, unsorted_offsets, order,
13704                              check_regs ? unsorted_regs : NULL))
13705     return 0;
13706
13707   if (saved_order)
13708     memcpy (saved_order, order, sizeof order);
13709
13710   if (base)
13711     {
13712       *base = base_reg;
13713
13714       for (i = 0; i < nops; i++)
13715         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13716
13717       *load_offset = unsorted_offsets[order[0]];
13718     }
13719
13720   if (TARGET_THUMB1
13721       && !peep2_reg_dead_p (nops, base_reg_rtx))
13722     return 0;
13723
13724   if (unsorted_offsets[order[0]] == 0)
13725     ldm_case = 1; /* ldmia */
13726   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13727     ldm_case = 2; /* ldmib */
13728   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13729     ldm_case = 3; /* ldmda */
13730   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13731     ldm_case = 4; /* ldmdb */
13732   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13733            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13734     ldm_case = 5;
13735   else
13736     return 0;
13737
13738   if (!multiple_operation_profitable_p (false, nops,
13739                                         ldm_case == 5
13740                                         ? unsorted_offsets[order[0]] : 0))
13741     return 0;
13742
13743   return ldm_case;
13744 }
13745
13746 /* Used to determine in a peephole whether a sequence of store instructions can
13747    be changed into a store-multiple instruction.
13748    NOPS is the number of separate store instructions we are examining.
13749    NOPS_TOTAL is the total number of instructions recognized by the peephole
13750    pattern.
13751    The first NOPS entries in OPERANDS are the source registers, the next
13752    NOPS entries are memory operands.  If this function is successful, *BASE is
13753    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13754    to the first memory location's offset from that base register.  REGS is an
13755    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13756    likewise filled with the corresponding rtx's.
13757    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13758    numbers to an ascending order of stores.
13759    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13760    from ascending memory locations, and the function verifies that the register
13761    numbers are themselves ascending.  If CHECK_REGS is false, the register
13762    numbers are stored in the order they are found in the operands.  */
13763 static int
13764 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13765                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13766                          HOST_WIDE_INT *load_offset, bool check_regs)
13767 {
13768   int unsorted_regs[MAX_LDM_STM_OPS];
13769   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13770   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13771   int order[MAX_LDM_STM_OPS];
13772   int base_reg = -1;
13773   rtx base_reg_rtx = NULL;
13774   int i, stm_case;
13775
13776   /* Write back of base register is currently only supported for Thumb 1.  */
13777   int base_writeback = TARGET_THUMB1;
13778
13779   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13780      easily extended if required.  */
13781   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13782
13783   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13784
13785   /* Loop over the operands and check that the memory references are
13786      suitable (i.e. immediate offsets from the same base register).  At
13787      the same time, extract the target register, and the memory
13788      offsets.  */
13789   for (i = 0; i < nops; i++)
13790     {
13791       rtx reg;
13792       rtx offset;
13793
13794       /* Convert a subreg of a mem into the mem itself.  */
13795       if (GET_CODE (operands[nops + i]) == SUBREG)
13796         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13797
13798       gcc_assert (MEM_P (operands[nops + i]));
13799
13800       /* Don't reorder volatile memory references; it doesn't seem worth
13801          looking for the case where the order is ok anyway.  */
13802       if (MEM_VOLATILE_P (operands[nops + i]))
13803         return 0;
13804
13805       offset = const0_rtx;
13806
13807       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13808            || (GET_CODE (reg) == SUBREG
13809                && REG_P (reg = SUBREG_REG (reg))))
13810           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13811               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13812                   || (GET_CODE (reg) == SUBREG
13813                       && REG_P (reg = SUBREG_REG (reg))))
13814               && (CONST_INT_P (offset
13815                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13816         {
13817           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13818                                   ? operands[i] : SUBREG_REG (operands[i]));
13819           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13820
13821           if (i == 0)
13822             {
13823               base_reg = REGNO (reg);
13824               base_reg_rtx = reg;
13825               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13826                 return 0;
13827             }
13828           else if (base_reg != (int) REGNO (reg))
13829             /* Not addressed from the same base register.  */
13830             return 0;
13831
13832           /* If it isn't an integer register, then we can't do this.  */
13833           if (unsorted_regs[i] < 0
13834               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13835               /* The effects are unpredictable if the base register is
13836                  both updated and stored.  */
13837               || (base_writeback && unsorted_regs[i] == base_reg)
13838               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13839               || unsorted_regs[i] > 14)
13840             return 0;
13841
13842           unsorted_offsets[i] = INTVAL (offset);
13843           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13844             order[0] = i;
13845         }
13846       else
13847         /* Not a suitable memory address.  */
13848         return 0;
13849     }
13850
13851   /* All the useful information has now been extracted from the
13852      operands into unsorted_regs and unsorted_offsets; additionally,
13853      order[0] has been set to the lowest offset in the list.  Sort
13854      the offsets into order, verifying that they are adjacent, and
13855      check that the register numbers are ascending.  */
13856   if (!compute_offset_order (nops, unsorted_offsets, order,
13857                              check_regs ? unsorted_regs : NULL))
13858     return 0;
13859
13860   if (saved_order)
13861     memcpy (saved_order, order, sizeof order);
13862
13863   if (base)
13864     {
13865       *base = base_reg;
13866
13867       for (i = 0; i < nops; i++)
13868         {
13869           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13870           if (reg_rtxs)
13871             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13872         }
13873
13874       *load_offset = unsorted_offsets[order[0]];
13875     }
13876
13877   if (TARGET_THUMB1
13878       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13879     return 0;
13880
13881   if (unsorted_offsets[order[0]] == 0)
13882     stm_case = 1; /* stmia */
13883   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13884     stm_case = 2; /* stmib */
13885   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13886     stm_case = 3; /* stmda */
13887   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13888     stm_case = 4; /* stmdb */
13889   else
13890     return 0;
13891
13892   if (!multiple_operation_profitable_p (false, nops, 0))
13893     return 0;
13894
13895   return stm_case;
13896 }
13897 \f
13898 /* Routines for use in generating RTL.  */
13899
13900 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13901    the instruction; REGS and MEMS are arrays containing the operands.
13902    BASEREG is the base register to be used in addressing the memory operands.
13903    WBACK_OFFSET is nonzero if the instruction should update the base
13904    register.  */
13905
13906 static rtx
13907 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13908                          HOST_WIDE_INT wback_offset)
13909 {
13910   int i = 0, j;
13911   rtx result;
13912
13913   if (!multiple_operation_profitable_p (false, count, 0))
13914     {
13915       rtx seq;
13916
13917       start_sequence ();
13918
13919       for (i = 0; i < count; i++)
13920         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13921
13922       if (wback_offset != 0)
13923         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13924
13925       seq = get_insns ();
13926       end_sequence ();
13927
13928       return seq;
13929     }
13930
13931   result = gen_rtx_PARALLEL (VOIDmode,
13932                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13933   if (wback_offset != 0)
13934     {
13935       XVECEXP (result, 0, 0)
13936         = gen_rtx_SET (VOIDmode, basereg,
13937                        plus_constant (Pmode, basereg, wback_offset));
13938       i = 1;
13939       count++;
13940     }
13941
13942   for (j = 0; i < count; i++, j++)
13943     XVECEXP (result, 0, i)
13944       = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13945
13946   return result;
13947 }
13948
13949 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13950    the instruction; REGS and MEMS are arrays containing the operands.
13951    BASEREG is the base register to be used in addressing the memory operands.
13952    WBACK_OFFSET is nonzero if the instruction should update the base
13953    register.  */
13954
13955 static rtx
13956 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13957                           HOST_WIDE_INT wback_offset)
13958 {
13959   int i = 0, j;
13960   rtx result;
13961
13962   if (GET_CODE (basereg) == PLUS)
13963     basereg = XEXP (basereg, 0);
13964
13965   if (!multiple_operation_profitable_p (false, count, 0))
13966     {
13967       rtx seq;
13968
13969       start_sequence ();
13970
13971       for (i = 0; i < count; i++)
13972         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13973
13974       if (wback_offset != 0)
13975         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13976
13977       seq = get_insns ();
13978       end_sequence ();
13979
13980       return seq;
13981     }
13982
13983   result = gen_rtx_PARALLEL (VOIDmode,
13984                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13985   if (wback_offset != 0)
13986     {
13987       XVECEXP (result, 0, 0)
13988         = gen_rtx_SET (VOIDmode, basereg,
13989                        plus_constant (Pmode, basereg, wback_offset));
13990       i = 1;
13991       count++;
13992     }
13993
13994   for (j = 0; i < count; i++, j++)
13995     XVECEXP (result, 0, i)
13996       = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13997
13998   return result;
13999 }
14000
14001 /* Generate either a load-multiple or a store-multiple instruction.  This
14002    function can be used in situations where we can start with a single MEM
14003    rtx and adjust its address upwards.
14004    COUNT is the number of operations in the instruction, not counting a
14005    possible update of the base register.  REGS is an array containing the
14006    register operands.
14007    BASEREG is the base register to be used in addressing the memory operands,
14008    which are constructed from BASEMEM.
14009    WRITE_BACK specifies whether the generated instruction should include an
14010    update of the base register.
14011    OFFSETP is used to pass an offset to and from this function; this offset
14012    is not used when constructing the address (instead BASEMEM should have an
14013    appropriate offset in its address), it is used only for setting
14014    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14015
14016 static rtx
14017 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14018                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14019 {
14020   rtx mems[MAX_LDM_STM_OPS];
14021   HOST_WIDE_INT offset = *offsetp;
14022   int i;
14023
14024   gcc_assert (count <= MAX_LDM_STM_OPS);
14025
14026   if (GET_CODE (basereg) == PLUS)
14027     basereg = XEXP (basereg, 0);
14028
14029   for (i = 0; i < count; i++)
14030     {
14031       rtx addr = plus_constant (Pmode, basereg, i * 4);
14032       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14033       offset += 4;
14034     }
14035
14036   if (write_back)
14037     *offsetp = offset;
14038
14039   if (is_load)
14040     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14041                                     write_back ? 4 * count : 0);
14042   else
14043     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14044                                      write_back ? 4 * count : 0);
14045 }
14046
14047 rtx
14048 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14049                        rtx basemem, HOST_WIDE_INT *offsetp)
14050 {
14051   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14052                               offsetp);
14053 }
14054
14055 rtx
14056 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14057                         rtx basemem, HOST_WIDE_INT *offsetp)
14058 {
14059   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14060                               offsetp);
14061 }
14062
14063 /* Called from a peephole2 expander to turn a sequence of loads into an
14064    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14065    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14066    is true if we can reorder the registers because they are used commutatively
14067    subsequently.
14068    Returns true iff we could generate a new instruction.  */
14069
14070 bool
14071 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14072 {
14073   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14074   rtx mems[MAX_LDM_STM_OPS];
14075   int i, j, base_reg;
14076   rtx base_reg_rtx;
14077   HOST_WIDE_INT offset;
14078   int write_back = FALSE;
14079   int ldm_case;
14080   rtx addr;
14081
14082   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14083                                      &base_reg, &offset, !sort_regs);
14084
14085   if (ldm_case == 0)
14086     return false;
14087
14088   if (sort_regs)
14089     for (i = 0; i < nops - 1; i++)
14090       for (j = i + 1; j < nops; j++)
14091         if (regs[i] > regs[j])
14092           {
14093             int t = regs[i];
14094             regs[i] = regs[j];
14095             regs[j] = t;
14096           }
14097   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14098
14099   if (TARGET_THUMB1)
14100     {
14101       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14102       gcc_assert (ldm_case == 1 || ldm_case == 5);
14103       write_back = TRUE;
14104     }
14105
14106   if (ldm_case == 5)
14107     {
14108       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14109       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14110       offset = 0;
14111       if (!TARGET_THUMB1)
14112         {
14113           base_reg = regs[0];
14114           base_reg_rtx = newbase;
14115         }
14116     }
14117
14118   for (i = 0; i < nops; i++)
14119     {
14120       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14121       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14122                                               SImode, addr, 0);
14123     }
14124   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14125                                       write_back ? offset + i * 4 : 0));
14126   return true;
14127 }
14128
14129 /* Called from a peephole2 expander to turn a sequence of stores into an
14130    STM instruction.  OPERANDS are the operands found by the peephole matcher;
14131    NOPS indicates how many separate stores we are trying to combine.
14132    Returns true iff we could generate a new instruction.  */
14133
14134 bool
14135 gen_stm_seq (rtx *operands, int nops)
14136 {
14137   int i;
14138   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14139   rtx mems[MAX_LDM_STM_OPS];
14140   int base_reg;
14141   rtx base_reg_rtx;
14142   HOST_WIDE_INT offset;
14143   int write_back = FALSE;
14144   int stm_case;
14145   rtx addr;
14146   bool base_reg_dies;
14147
14148   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14149                                       mem_order, &base_reg, &offset, true);
14150
14151   if (stm_case == 0)
14152     return false;
14153
14154   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14155
14156   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14157   if (TARGET_THUMB1)
14158     {
14159       gcc_assert (base_reg_dies);
14160       write_back = TRUE;
14161     }
14162
14163   if (stm_case == 5)
14164     {
14165       gcc_assert (base_reg_dies);
14166       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14167       offset = 0;
14168     }
14169
14170   addr = plus_constant (Pmode, base_reg_rtx, offset);
14171
14172   for (i = 0; i < nops; i++)
14173     {
14174       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14175       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14176                                               SImode, addr, 0);
14177     }
14178   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14179                                        write_back ? offset + i * 4 : 0));
14180   return true;
14181 }
14182
14183 /* Called from a peephole2 expander to turn a sequence of stores that are
14184    preceded by constant loads into an STM instruction.  OPERANDS are the
14185    operands found by the peephole matcher; NOPS indicates how many
14186    separate stores we are trying to combine; there are 2 * NOPS
14187    instructions in the peephole.
14188    Returns true iff we could generate a new instruction.  */
14189
14190 bool
14191 gen_const_stm_seq (rtx *operands, int nops)
14192 {
14193   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14194   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14195   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14196   rtx mems[MAX_LDM_STM_OPS];
14197   int base_reg;
14198   rtx base_reg_rtx;
14199   HOST_WIDE_INT offset;
14200   int write_back = FALSE;
14201   int stm_case;
14202   rtx addr;
14203   bool base_reg_dies;
14204   int i, j;
14205   HARD_REG_SET allocated;
14206
14207   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14208                                       mem_order, &base_reg, &offset, false);
14209
14210   if (stm_case == 0)
14211     return false;
14212
14213   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14214
14215   /* If the same register is used more than once, try to find a free
14216      register.  */
14217   CLEAR_HARD_REG_SET (allocated);
14218   for (i = 0; i < nops; i++)
14219     {
14220       for (j = i + 1; j < nops; j++)
14221         if (regs[i] == regs[j])
14222           {
14223             rtx t = peep2_find_free_register (0, nops * 2,
14224                                               TARGET_THUMB1 ? "l" : "r",
14225                                               SImode, &allocated);
14226             if (t == NULL_RTX)
14227               return false;
14228             reg_rtxs[i] = t;
14229             regs[i] = REGNO (t);
14230           }
14231     }
14232
14233   /* Compute an ordering that maps the register numbers to an ascending
14234      sequence.  */
14235   reg_order[0] = 0;
14236   for (i = 0; i < nops; i++)
14237     if (regs[i] < regs[reg_order[0]])
14238       reg_order[0] = i;
14239
14240   for (i = 1; i < nops; i++)
14241     {
14242       int this_order = reg_order[i - 1];
14243       for (j = 0; j < nops; j++)
14244         if (regs[j] > regs[reg_order[i - 1]]
14245             && (this_order == reg_order[i - 1]
14246                 || regs[j] < regs[this_order]))
14247           this_order = j;
14248       reg_order[i] = this_order;
14249     }
14250
14251   /* Ensure that registers that must be live after the instruction end
14252      up with the correct value.  */
14253   for (i = 0; i < nops; i++)
14254     {
14255       int this_order = reg_order[i];
14256       if ((this_order != mem_order[i]
14257            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14258           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14259         return false;
14260     }
14261
14262   /* Load the constants.  */
14263   for (i = 0; i < nops; i++)
14264     {
14265       rtx op = operands[2 * nops + mem_order[i]];
14266       sorted_regs[i] = regs[reg_order[i]];
14267       emit_move_insn (reg_rtxs[reg_order[i]], op);
14268     }
14269
14270   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14271
14272   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14273   if (TARGET_THUMB1)
14274     {
14275       gcc_assert (base_reg_dies);
14276       write_back = TRUE;
14277     }
14278
14279   if (stm_case == 5)
14280     {
14281       gcc_assert (base_reg_dies);
14282       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14283       offset = 0;
14284     }
14285
14286   addr = plus_constant (Pmode, base_reg_rtx, offset);
14287
14288   for (i = 0; i < nops; i++)
14289     {
14290       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14291       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14292                                               SImode, addr, 0);
14293     }
14294   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14295                                        write_back ? offset + i * 4 : 0));
14296   return true;
14297 }
14298
14299 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14300    unaligned copies on processors which support unaligned semantics for those
14301    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14302    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14303    An interleave factor of 1 (the minimum) will perform no interleaving.
14304    Load/store multiple are used for aligned addresses where possible.  */
14305
14306 static void
14307 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14308                                    HOST_WIDE_INT length,
14309                                    unsigned int interleave_factor)
14310 {
14311   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14312   int *regnos = XALLOCAVEC (int, interleave_factor);
14313   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14314   HOST_WIDE_INT i, j;
14315   HOST_WIDE_INT remaining = length, words;
14316   rtx halfword_tmp = NULL, byte_tmp = NULL;
14317   rtx dst, src;
14318   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14319   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14320   HOST_WIDE_INT srcoffset, dstoffset;
14321   HOST_WIDE_INT src_autoinc, dst_autoinc;
14322   rtx mem, addr;
14323
14324   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14325
14326   /* Use hard registers if we have aligned source or destination so we can use
14327      load/store multiple with contiguous registers.  */
14328   if (dst_aligned || src_aligned)
14329     for (i = 0; i < interleave_factor; i++)
14330       regs[i] = gen_rtx_REG (SImode, i);
14331   else
14332     for (i = 0; i < interleave_factor; i++)
14333       regs[i] = gen_reg_rtx (SImode);
14334
14335   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14336   src = copy_addr_to_reg (XEXP (srcbase, 0));
14337
14338   srcoffset = dstoffset = 0;
14339
14340   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14341      For copying the last bytes we want to subtract this offset again.  */
14342   src_autoinc = dst_autoinc = 0;
14343
14344   for (i = 0; i < interleave_factor; i++)
14345     regnos[i] = i;
14346
14347   /* Copy BLOCK_SIZE_BYTES chunks.  */
14348
14349   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14350     {
14351       /* Load words.  */
14352       if (src_aligned && interleave_factor > 1)
14353         {
14354           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14355                                             TRUE, srcbase, &srcoffset));
14356           src_autoinc += UNITS_PER_WORD * interleave_factor;
14357         }
14358       else
14359         {
14360           for (j = 0; j < interleave_factor; j++)
14361             {
14362               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14363                                                  - src_autoinc));
14364               mem = adjust_automodify_address (srcbase, SImode, addr,
14365                                                srcoffset + j * UNITS_PER_WORD);
14366               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14367             }
14368           srcoffset += block_size_bytes;
14369         }
14370
14371       /* Store words.  */
14372       if (dst_aligned && interleave_factor > 1)
14373         {
14374           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14375                                              TRUE, dstbase, &dstoffset));
14376           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14377         }
14378       else
14379         {
14380           for (j = 0; j < interleave_factor; j++)
14381             {
14382               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14383                                                  - dst_autoinc));
14384               mem = adjust_automodify_address (dstbase, SImode, addr,
14385                                                dstoffset + j * UNITS_PER_WORD);
14386               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14387             }
14388           dstoffset += block_size_bytes;
14389         }
14390
14391       remaining -= block_size_bytes;
14392     }
14393
14394   /* Copy any whole words left (note these aren't interleaved with any
14395      subsequent halfword/byte load/stores in the interests of simplicity).  */
14396
14397   words = remaining / UNITS_PER_WORD;
14398
14399   gcc_assert (words < interleave_factor);
14400
14401   if (src_aligned && words > 1)
14402     {
14403       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14404                                         &srcoffset));
14405       src_autoinc += UNITS_PER_WORD * words;
14406     }
14407   else
14408     {
14409       for (j = 0; j < words; j++)
14410         {
14411           addr = plus_constant (Pmode, src,
14412                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14413           mem = adjust_automodify_address (srcbase, SImode, addr,
14414                                            srcoffset + j * UNITS_PER_WORD);
14415           emit_insn (gen_unaligned_loadsi (regs[j], mem));
14416         }
14417       srcoffset += words * UNITS_PER_WORD;
14418     }
14419
14420   if (dst_aligned && words > 1)
14421     {
14422       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14423                                          &dstoffset));
14424       dst_autoinc += words * UNITS_PER_WORD;
14425     }
14426   else
14427     {
14428       for (j = 0; j < words; j++)
14429         {
14430           addr = plus_constant (Pmode, dst,
14431                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14432           mem = adjust_automodify_address (dstbase, SImode, addr,
14433                                            dstoffset + j * UNITS_PER_WORD);
14434           emit_insn (gen_unaligned_storesi (mem, regs[j]));
14435         }
14436       dstoffset += words * UNITS_PER_WORD;
14437     }
14438
14439   remaining -= words * UNITS_PER_WORD;
14440
14441   gcc_assert (remaining < 4);
14442
14443   /* Copy a halfword if necessary.  */
14444
14445   if (remaining >= 2)
14446     {
14447       halfword_tmp = gen_reg_rtx (SImode);
14448
14449       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14450       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14451       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14452
14453       /* Either write out immediately, or delay until we've loaded the last
14454          byte, depending on interleave factor.  */
14455       if (interleave_factor == 1)
14456         {
14457           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14458           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14459           emit_insn (gen_unaligned_storehi (mem,
14460                        gen_lowpart (HImode, halfword_tmp)));
14461           halfword_tmp = NULL;
14462           dstoffset += 2;
14463         }
14464
14465       remaining -= 2;
14466       srcoffset += 2;
14467     }
14468
14469   gcc_assert (remaining < 2);
14470
14471   /* Copy last byte.  */
14472
14473   if ((remaining & 1) != 0)
14474     {
14475       byte_tmp = gen_reg_rtx (SImode);
14476
14477       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14478       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14479       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14480
14481       if (interleave_factor == 1)
14482         {
14483           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14484           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14485           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14486           byte_tmp = NULL;
14487           dstoffset++;
14488         }
14489
14490       remaining--;
14491       srcoffset++;
14492     }
14493
14494   /* Store last halfword if we haven't done so already.  */
14495
14496   if (halfword_tmp)
14497     {
14498       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14499       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14500       emit_insn (gen_unaligned_storehi (mem,
14501                    gen_lowpart (HImode, halfword_tmp)));
14502       dstoffset += 2;
14503     }
14504
14505   /* Likewise for last byte.  */
14506
14507   if (byte_tmp)
14508     {
14509       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14510       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14511       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14512       dstoffset++;
14513     }
14514
14515   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14516 }
14517
14518 /* From mips_adjust_block_mem:
14519
14520    Helper function for doing a loop-based block operation on memory
14521    reference MEM.  Each iteration of the loop will operate on LENGTH
14522    bytes of MEM.
14523
14524    Create a new base register for use within the loop and point it to
14525    the start of MEM.  Create a new memory reference that uses this
14526    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14527
14528 static void
14529 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14530                       rtx *loop_mem)
14531 {
14532   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14533
14534   /* Although the new mem does not refer to a known location,
14535      it does keep up to LENGTH bytes of alignment.  */
14536   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14537   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14538 }
14539
14540 /* From mips_block_move_loop:
14541
14542    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14543    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14544    the memory regions do not overlap.  */
14545
14546 static void
14547 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14548                                unsigned int interleave_factor,
14549                                HOST_WIDE_INT bytes_per_iter)
14550 {
14551   rtx src_reg, dest_reg, final_src, test;
14552   HOST_WIDE_INT leftover;
14553
14554   leftover = length % bytes_per_iter;
14555   length -= leftover;
14556
14557   /* Create registers and memory references for use within the loop.  */
14558   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14559   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14560
14561   /* Calculate the value that SRC_REG should have after the last iteration of
14562      the loop.  */
14563   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14564                                    0, 0, OPTAB_WIDEN);
14565
14566   /* Emit the start of the loop.  */
14567   rtx_code_label *label = gen_label_rtx ();
14568   emit_label (label);
14569
14570   /* Emit the loop body.  */
14571   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14572                                      interleave_factor);
14573
14574   /* Move on to the next block.  */
14575   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14576   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14577
14578   /* Emit the loop condition.  */
14579   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14580   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14581
14582   /* Mop up any left-over bytes.  */
14583   if (leftover)
14584     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14585 }
14586
14587 /* Emit a block move when either the source or destination is unaligned (not
14588    aligned to a four-byte boundary).  This may need further tuning depending on
14589    core type, optimize_size setting, etc.  */
14590
14591 static int
14592 arm_movmemqi_unaligned (rtx *operands)
14593 {
14594   HOST_WIDE_INT length = INTVAL (operands[2]);
14595
14596   if (optimize_size)
14597     {
14598       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14599       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14600       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14601          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14602          or dst_aligned though: allow more interleaving in those cases since the
14603          resulting code can be smaller.  */
14604       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14605       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14606
14607       if (length > 12)
14608         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14609                                        interleave_factor, bytes_per_iter);
14610       else
14611         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14612                                            interleave_factor);
14613     }
14614   else
14615     {
14616       /* Note that the loop created by arm_block_move_unaligned_loop may be
14617          subject to loop unrolling, which makes tuning this condition a little
14618          redundant.  */
14619       if (length > 32)
14620         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14621       else
14622         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14623     }
14624
14625   return 1;
14626 }
14627
14628 int
14629 arm_gen_movmemqi (rtx *operands)
14630 {
14631   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14632   HOST_WIDE_INT srcoffset, dstoffset;
14633   int i;
14634   rtx src, dst, srcbase, dstbase;
14635   rtx part_bytes_reg = NULL;
14636   rtx mem;
14637
14638   if (!CONST_INT_P (operands[2])
14639       || !CONST_INT_P (operands[3])
14640       || INTVAL (operands[2]) > 64)
14641     return 0;
14642
14643   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14644     return arm_movmemqi_unaligned (operands);
14645
14646   if (INTVAL (operands[3]) & 3)
14647     return 0;
14648
14649   dstbase = operands[0];
14650   srcbase = operands[1];
14651
14652   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14653   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14654
14655   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14656   out_words_to_go = INTVAL (operands[2]) / 4;
14657   last_bytes = INTVAL (operands[2]) & 3;
14658   dstoffset = srcoffset = 0;
14659
14660   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14661     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14662
14663   for (i = 0; in_words_to_go >= 2; i+=4)
14664     {
14665       if (in_words_to_go > 4)
14666         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14667                                           TRUE, srcbase, &srcoffset));
14668       else
14669         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14670                                           src, FALSE, srcbase,
14671                                           &srcoffset));
14672
14673       if (out_words_to_go)
14674         {
14675           if (out_words_to_go > 4)
14676             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14677                                                TRUE, dstbase, &dstoffset));
14678           else if (out_words_to_go != 1)
14679             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14680                                                out_words_to_go, dst,
14681                                                (last_bytes == 0
14682                                                 ? FALSE : TRUE),
14683                                                dstbase, &dstoffset));
14684           else
14685             {
14686               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14687               emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14688               if (last_bytes != 0)
14689                 {
14690                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14691                   dstoffset += 4;
14692                 }
14693             }
14694         }
14695
14696       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14697       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14698     }
14699
14700   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14701   if (out_words_to_go)
14702     {
14703       rtx sreg;
14704
14705       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14706       sreg = copy_to_reg (mem);
14707
14708       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14709       emit_move_insn (mem, sreg);
14710       in_words_to_go--;
14711
14712       gcc_assert (!in_words_to_go);     /* Sanity check */
14713     }
14714
14715   if (in_words_to_go)
14716     {
14717       gcc_assert (in_words_to_go > 0);
14718
14719       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14720       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14721     }
14722
14723   gcc_assert (!last_bytes || part_bytes_reg);
14724
14725   if (BYTES_BIG_ENDIAN && last_bytes)
14726     {
14727       rtx tmp = gen_reg_rtx (SImode);
14728
14729       /* The bytes we want are in the top end of the word.  */
14730       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14731                               GEN_INT (8 * (4 - last_bytes))));
14732       part_bytes_reg = tmp;
14733
14734       while (last_bytes)
14735         {
14736           mem = adjust_automodify_address (dstbase, QImode,
14737                                            plus_constant (Pmode, dst,
14738                                                           last_bytes - 1),
14739                                            dstoffset + last_bytes - 1);
14740           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14741
14742           if (--last_bytes)
14743             {
14744               tmp = gen_reg_rtx (SImode);
14745               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14746               part_bytes_reg = tmp;
14747             }
14748         }
14749
14750     }
14751   else
14752     {
14753       if (last_bytes > 1)
14754         {
14755           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14756           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14757           last_bytes -= 2;
14758           if (last_bytes)
14759             {
14760               rtx tmp = gen_reg_rtx (SImode);
14761               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14762               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14763               part_bytes_reg = tmp;
14764               dstoffset += 2;
14765             }
14766         }
14767
14768       if (last_bytes)
14769         {
14770           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14771           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14772         }
14773     }
14774
14775   return 1;
14776 }
14777
14778 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14779 by mode size.  */
14780 inline static rtx
14781 next_consecutive_mem (rtx mem)
14782 {
14783   machine_mode mode = GET_MODE (mem);
14784   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14785   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14786
14787   return adjust_automodify_address (mem, mode, addr, offset);
14788 }
14789
14790 /* Copy using LDRD/STRD instructions whenever possible.
14791    Returns true upon success. */
14792 bool
14793 gen_movmem_ldrd_strd (rtx *operands)
14794 {
14795   unsigned HOST_WIDE_INT len;
14796   HOST_WIDE_INT align;
14797   rtx src, dst, base;
14798   rtx reg0;
14799   bool src_aligned, dst_aligned;
14800   bool src_volatile, dst_volatile;
14801
14802   gcc_assert (CONST_INT_P (operands[2]));
14803   gcc_assert (CONST_INT_P (operands[3]));
14804
14805   len = UINTVAL (operands[2]);
14806   if (len > 64)
14807     return false;
14808
14809   /* Maximum alignment we can assume for both src and dst buffers.  */
14810   align = INTVAL (operands[3]);
14811
14812   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14813     return false;
14814
14815   /* Place src and dst addresses in registers
14816      and update the corresponding mem rtx.  */
14817   dst = operands[0];
14818   dst_volatile = MEM_VOLATILE_P (dst);
14819   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14820   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14821   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14822
14823   src = operands[1];
14824   src_volatile = MEM_VOLATILE_P (src);
14825   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14826   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14827   src = adjust_automodify_address (src, VOIDmode, base, 0);
14828
14829   if (!unaligned_access && !(src_aligned && dst_aligned))
14830     return false;
14831
14832   if (src_volatile || dst_volatile)
14833     return false;
14834
14835   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14836   if (!(dst_aligned || src_aligned))
14837     return arm_gen_movmemqi (operands);
14838
14839   src = adjust_address (src, DImode, 0);
14840   dst = adjust_address (dst, DImode, 0);
14841   while (len >= 8)
14842     {
14843       len -= 8;
14844       reg0 = gen_reg_rtx (DImode);
14845       if (src_aligned)
14846         emit_move_insn (reg0, src);
14847       else
14848         emit_insn (gen_unaligned_loaddi (reg0, src));
14849
14850       if (dst_aligned)
14851         emit_move_insn (dst, reg0);
14852       else
14853         emit_insn (gen_unaligned_storedi (dst, reg0));
14854
14855       src = next_consecutive_mem (src);
14856       dst = next_consecutive_mem (dst);
14857     }
14858
14859   gcc_assert (len < 8);
14860   if (len >= 4)
14861     {
14862       /* More than a word but less than a double-word to copy.  Copy a word.  */
14863       reg0 = gen_reg_rtx (SImode);
14864       src = adjust_address (src, SImode, 0);
14865       dst = adjust_address (dst, SImode, 0);
14866       if (src_aligned)
14867         emit_move_insn (reg0, src);
14868       else
14869         emit_insn (gen_unaligned_loadsi (reg0, src));
14870
14871       if (dst_aligned)
14872         emit_move_insn (dst, reg0);
14873       else
14874         emit_insn (gen_unaligned_storesi (dst, reg0));
14875
14876       src = next_consecutive_mem (src);
14877       dst = next_consecutive_mem (dst);
14878       len -= 4;
14879     }
14880
14881   if (len == 0)
14882     return true;
14883
14884   /* Copy the remaining bytes.  */
14885   if (len >= 2)
14886     {
14887       dst = adjust_address (dst, HImode, 0);
14888       src = adjust_address (src, HImode, 0);
14889       reg0 = gen_reg_rtx (SImode);
14890       if (src_aligned)
14891         emit_insn (gen_zero_extendhisi2 (reg0, src));
14892       else
14893         emit_insn (gen_unaligned_loadhiu (reg0, src));
14894
14895       if (dst_aligned)
14896         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14897       else
14898         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14899
14900       src = next_consecutive_mem (src);
14901       dst = next_consecutive_mem (dst);
14902       if (len == 2)
14903         return true;
14904     }
14905
14906   dst = adjust_address (dst, QImode, 0);
14907   src = adjust_address (src, QImode, 0);
14908   reg0 = gen_reg_rtx (QImode);
14909   emit_move_insn (reg0, src);
14910   emit_move_insn (dst, reg0);
14911   return true;
14912 }
14913
14914 /* Select a dominance comparison mode if possible for a test of the general
14915    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14916    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14917    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14918    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14919    In all cases OP will be either EQ or NE, but we don't need to know which
14920    here.  If we are unable to support a dominance comparison we return
14921    CC mode.  This will then fail to match for the RTL expressions that
14922    generate this call.  */
14923 machine_mode
14924 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14925 {
14926   enum rtx_code cond1, cond2;
14927   int swapped = 0;
14928
14929   /* Currently we will probably get the wrong result if the individual
14930      comparisons are not simple.  This also ensures that it is safe to
14931      reverse a comparison if necessary.  */
14932   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14933        != CCmode)
14934       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14935           != CCmode))
14936     return CCmode;
14937
14938   /* The if_then_else variant of this tests the second condition if the
14939      first passes, but is true if the first fails.  Reverse the first
14940      condition to get a true "inclusive-or" expression.  */
14941   if (cond_or == DOM_CC_NX_OR_Y)
14942     cond1 = reverse_condition (cond1);
14943
14944   /* If the comparisons are not equal, and one doesn't dominate the other,
14945      then we can't do this.  */
14946   if (cond1 != cond2
14947       && !comparison_dominates_p (cond1, cond2)
14948       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14949     return CCmode;
14950
14951   if (swapped)
14952     {
14953       enum rtx_code temp = cond1;
14954       cond1 = cond2;
14955       cond2 = temp;
14956     }
14957
14958   switch (cond1)
14959     {
14960     case EQ:
14961       if (cond_or == DOM_CC_X_AND_Y)
14962         return CC_DEQmode;
14963
14964       switch (cond2)
14965         {
14966         case EQ: return CC_DEQmode;
14967         case LE: return CC_DLEmode;
14968         case LEU: return CC_DLEUmode;
14969         case GE: return CC_DGEmode;
14970         case GEU: return CC_DGEUmode;
14971         default: gcc_unreachable ();
14972         }
14973
14974     case LT:
14975       if (cond_or == DOM_CC_X_AND_Y)
14976         return CC_DLTmode;
14977
14978       switch (cond2)
14979         {
14980         case  LT:
14981             return CC_DLTmode;
14982         case LE:
14983           return CC_DLEmode;
14984         case NE:
14985           return CC_DNEmode;
14986         default:
14987           gcc_unreachable ();
14988         }
14989
14990     case GT:
14991       if (cond_or == DOM_CC_X_AND_Y)
14992         return CC_DGTmode;
14993
14994       switch (cond2)
14995         {
14996         case GT:
14997           return CC_DGTmode;
14998         case GE:
14999           return CC_DGEmode;
15000         case NE:
15001           return CC_DNEmode;
15002         default:
15003           gcc_unreachable ();
15004         }
15005
15006     case LTU:
15007       if (cond_or == DOM_CC_X_AND_Y)
15008         return CC_DLTUmode;
15009
15010       switch (cond2)
15011         {
15012         case LTU:
15013           return CC_DLTUmode;
15014         case LEU:
15015           return CC_DLEUmode;
15016         case NE:
15017           return CC_DNEmode;
15018         default:
15019           gcc_unreachable ();
15020         }
15021
15022     case GTU:
15023       if (cond_or == DOM_CC_X_AND_Y)
15024         return CC_DGTUmode;
15025
15026       switch (cond2)
15027         {
15028         case GTU:
15029           return CC_DGTUmode;
15030         case GEU:
15031           return CC_DGEUmode;
15032         case NE:
15033           return CC_DNEmode;
15034         default:
15035           gcc_unreachable ();
15036         }
15037
15038     /* The remaining cases only occur when both comparisons are the
15039        same.  */
15040     case NE:
15041       gcc_assert (cond1 == cond2);
15042       return CC_DNEmode;
15043
15044     case LE:
15045       gcc_assert (cond1 == cond2);
15046       return CC_DLEmode;
15047
15048     case GE:
15049       gcc_assert (cond1 == cond2);
15050       return CC_DGEmode;
15051
15052     case LEU:
15053       gcc_assert (cond1 == cond2);
15054       return CC_DLEUmode;
15055
15056     case GEU:
15057       gcc_assert (cond1 == cond2);
15058       return CC_DGEUmode;
15059
15060     default:
15061       gcc_unreachable ();
15062     }
15063 }
15064
15065 machine_mode
15066 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15067 {
15068   /* All floating point compares return CCFP if it is an equality
15069      comparison, and CCFPE otherwise.  */
15070   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15071     {
15072       switch (op)
15073         {
15074         case EQ:
15075         case NE:
15076         case UNORDERED:
15077         case ORDERED:
15078         case UNLT:
15079         case UNLE:
15080         case UNGT:
15081         case UNGE:
15082         case UNEQ:
15083         case LTGT:
15084           return CCFPmode;
15085
15086         case LT:
15087         case LE:
15088         case GT:
15089         case GE:
15090           return CCFPEmode;
15091
15092         default:
15093           gcc_unreachable ();
15094         }
15095     }
15096
15097   /* A compare with a shifted operand.  Because of canonicalization, the
15098      comparison will have to be swapped when we emit the assembler.  */
15099   if (GET_MODE (y) == SImode
15100       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15101       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15102           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15103           || GET_CODE (x) == ROTATERT))
15104     return CC_SWPmode;
15105
15106   /* This operation is performed swapped, but since we only rely on the Z
15107      flag we don't need an additional mode.  */
15108   if (GET_MODE (y) == SImode
15109       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15110       && GET_CODE (x) == NEG
15111       && (op == EQ || op == NE))
15112     return CC_Zmode;
15113
15114   /* This is a special case that is used by combine to allow a
15115      comparison of a shifted byte load to be split into a zero-extend
15116      followed by a comparison of the shifted integer (only valid for
15117      equalities and unsigned inequalities).  */
15118   if (GET_MODE (x) == SImode
15119       && GET_CODE (x) == ASHIFT
15120       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15121       && GET_CODE (XEXP (x, 0)) == SUBREG
15122       && MEM_P (SUBREG_REG (XEXP (x, 0)))
15123       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15124       && (op == EQ || op == NE
15125           || op == GEU || op == GTU || op == LTU || op == LEU)
15126       && CONST_INT_P (y))
15127     return CC_Zmode;
15128
15129   /* A construct for a conditional compare, if the false arm contains
15130      0, then both conditions must be true, otherwise either condition
15131      must be true.  Not all conditions are possible, so CCmode is
15132      returned if it can't be done.  */
15133   if (GET_CODE (x) == IF_THEN_ELSE
15134       && (XEXP (x, 2) == const0_rtx
15135           || XEXP (x, 2) == const1_rtx)
15136       && COMPARISON_P (XEXP (x, 0))
15137       && COMPARISON_P (XEXP (x, 1)))
15138     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15139                                          INTVAL (XEXP (x, 2)));
15140
15141   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
15142   if (GET_CODE (x) == AND
15143       && (op == EQ || op == NE)
15144       && COMPARISON_P (XEXP (x, 0))
15145       && COMPARISON_P (XEXP (x, 1)))
15146     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15147                                          DOM_CC_X_AND_Y);
15148
15149   if (GET_CODE (x) == IOR
15150       && (op == EQ || op == NE)
15151       && COMPARISON_P (XEXP (x, 0))
15152       && COMPARISON_P (XEXP (x, 1)))
15153     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15154                                          DOM_CC_X_OR_Y);
15155
15156   /* An operation (on Thumb) where we want to test for a single bit.
15157      This is done by shifting that bit up into the top bit of a
15158      scratch register; we can then branch on the sign bit.  */
15159   if (TARGET_THUMB1
15160       && GET_MODE (x) == SImode
15161       && (op == EQ || op == NE)
15162       && GET_CODE (x) == ZERO_EXTRACT
15163       && XEXP (x, 1) == const1_rtx)
15164     return CC_Nmode;
15165
15166   /* An operation that sets the condition codes as a side-effect, the
15167      V flag is not set correctly, so we can only use comparisons where
15168      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15169      instead.)  */
15170   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15171   if (GET_MODE (x) == SImode
15172       && y == const0_rtx
15173       && (op == EQ || op == NE || op == LT || op == GE)
15174       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15175           || GET_CODE (x) == AND || GET_CODE (x) == IOR
15176           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15177           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15178           || GET_CODE (x) == LSHIFTRT
15179           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15180           || GET_CODE (x) == ROTATERT
15181           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15182     return CC_NOOVmode;
15183
15184   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15185     return CC_Zmode;
15186
15187   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15188       && GET_CODE (x) == PLUS
15189       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15190     return CC_Cmode;
15191
15192   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15193     {
15194       switch (op)
15195         {
15196         case EQ:
15197         case NE:
15198           /* A DImode comparison against zero can be implemented by
15199              or'ing the two halves together.  */
15200           if (y == const0_rtx)
15201             return CC_Zmode;
15202
15203           /* We can do an equality test in three Thumb instructions.  */
15204           if (!TARGET_32BIT)
15205             return CC_Zmode;
15206
15207           /* FALLTHROUGH */
15208
15209         case LTU:
15210         case LEU:
15211         case GTU:
15212         case GEU:
15213           /* DImode unsigned comparisons can be implemented by cmp +
15214              cmpeq without a scratch register.  Not worth doing in
15215              Thumb-2.  */
15216           if (TARGET_32BIT)
15217             return CC_CZmode;
15218
15219           /* FALLTHROUGH */
15220
15221         case LT:
15222         case LE:
15223         case GT:
15224         case GE:
15225           /* DImode signed and unsigned comparisons can be implemented
15226              by cmp + sbcs with a scratch register, but that does not
15227              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
15228           gcc_assert (op != EQ && op != NE);
15229           return CC_NCVmode;
15230
15231         default:
15232           gcc_unreachable ();
15233         }
15234     }
15235
15236   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15237     return GET_MODE (x);
15238
15239   return CCmode;
15240 }
15241
15242 /* X and Y are two things to compare using CODE.  Emit the compare insn and
15243    return the rtx for register 0 in the proper mode.  FP means this is a
15244    floating point compare: I don't think that it is needed on the arm.  */
15245 rtx
15246 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15247 {
15248   machine_mode mode;
15249   rtx cc_reg;
15250   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15251
15252   /* We might have X as a constant, Y as a register because of the predicates
15253      used for cmpdi.  If so, force X to a register here.  */
15254   if (dimode_comparison && !REG_P (x))
15255     x = force_reg (DImode, x);
15256
15257   mode = SELECT_CC_MODE (code, x, y);
15258   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15259
15260   if (dimode_comparison
15261       && mode != CC_CZmode)
15262     {
15263       rtx clobber, set;
15264
15265       /* To compare two non-zero values for equality, XOR them and
15266          then compare against zero.  Not used for ARM mode; there
15267          CC_CZmode is cheaper.  */
15268       if (mode == CC_Zmode && y != const0_rtx)
15269         {
15270           gcc_assert (!reload_completed);
15271           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15272           y = const0_rtx;
15273         }
15274
15275       /* A scratch register is required.  */
15276       if (reload_completed)
15277         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15278       else
15279         scratch = gen_rtx_SCRATCH (SImode);
15280
15281       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15282       set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15283       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15284     }
15285   else
15286     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15287
15288   return cc_reg;
15289 }
15290
15291 /* Generate a sequence of insns that will generate the correct return
15292    address mask depending on the physical architecture that the program
15293    is running on.  */
15294 rtx
15295 arm_gen_return_addr_mask (void)
15296 {
15297   rtx reg = gen_reg_rtx (Pmode);
15298
15299   emit_insn (gen_return_addr_mask (reg));
15300   return reg;
15301 }
15302
15303 void
15304 arm_reload_in_hi (rtx *operands)
15305 {
15306   rtx ref = operands[1];
15307   rtx base, scratch;
15308   HOST_WIDE_INT offset = 0;
15309
15310   if (GET_CODE (ref) == SUBREG)
15311     {
15312       offset = SUBREG_BYTE (ref);
15313       ref = SUBREG_REG (ref);
15314     }
15315
15316   if (REG_P (ref))
15317     {
15318       /* We have a pseudo which has been spilt onto the stack; there
15319          are two cases here: the first where there is a simple
15320          stack-slot replacement and a second where the stack-slot is
15321          out of range, or is used as a subreg.  */
15322       if (reg_equiv_mem (REGNO (ref)))
15323         {
15324           ref = reg_equiv_mem (REGNO (ref));
15325           base = find_replacement (&XEXP (ref, 0));
15326         }
15327       else
15328         /* The slot is out of range, or was dressed up in a SUBREG.  */
15329         base = reg_equiv_address (REGNO (ref));
15330     }
15331   else
15332     base = find_replacement (&XEXP (ref, 0));
15333
15334   /* Handle the case where the address is too complex to be offset by 1.  */
15335   if (GET_CODE (base) == MINUS
15336       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15337     {
15338       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15339
15340       emit_set_insn (base_plus, base);
15341       base = base_plus;
15342     }
15343   else if (GET_CODE (base) == PLUS)
15344     {
15345       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15346       HOST_WIDE_INT hi, lo;
15347
15348       offset += INTVAL (XEXP (base, 1));
15349       base = XEXP (base, 0);
15350
15351       /* Rework the address into a legal sequence of insns.  */
15352       /* Valid range for lo is -4095 -> 4095 */
15353       lo = (offset >= 0
15354             ? (offset & 0xfff)
15355             : -((-offset) & 0xfff));
15356
15357       /* Corner case, if lo is the max offset then we would be out of range
15358          once we have added the additional 1 below, so bump the msb into the
15359          pre-loading insn(s).  */
15360       if (lo == 4095)
15361         lo &= 0x7ff;
15362
15363       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15364              ^ (HOST_WIDE_INT) 0x80000000)
15365             - (HOST_WIDE_INT) 0x80000000);
15366
15367       gcc_assert (hi + lo == offset);
15368
15369       if (hi != 0)
15370         {
15371           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15372
15373           /* Get the base address; addsi3 knows how to handle constants
15374              that require more than one insn.  */
15375           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15376           base = base_plus;
15377           offset = lo;
15378         }
15379     }
15380
15381   /* Operands[2] may overlap operands[0] (though it won't overlap
15382      operands[1]), that's why we asked for a DImode reg -- so we can
15383      use the bit that does not overlap.  */
15384   if (REGNO (operands[2]) == REGNO (operands[0]))
15385     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15386   else
15387     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15388
15389   emit_insn (gen_zero_extendqisi2 (scratch,
15390                                    gen_rtx_MEM (QImode,
15391                                                 plus_constant (Pmode, base,
15392                                                                offset))));
15393   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15394                                    gen_rtx_MEM (QImode,
15395                                                 plus_constant (Pmode, base,
15396                                                                offset + 1))));
15397   if (!BYTES_BIG_ENDIAN)
15398     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15399                    gen_rtx_IOR (SImode,
15400                                 gen_rtx_ASHIFT
15401                                 (SImode,
15402                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15403                                  GEN_INT (8)),
15404                                 scratch));
15405   else
15406     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15407                    gen_rtx_IOR (SImode,
15408                                 gen_rtx_ASHIFT (SImode, scratch,
15409                                                 GEN_INT (8)),
15410                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15411 }
15412
15413 /* Handle storing a half-word to memory during reload by synthesizing as two
15414    byte stores.  Take care not to clobber the input values until after we
15415    have moved them somewhere safe.  This code assumes that if the DImode
15416    scratch in operands[2] overlaps either the input value or output address
15417    in some way, then that value must die in this insn (we absolutely need
15418    two scratch registers for some corner cases).  */
15419 void
15420 arm_reload_out_hi (rtx *operands)
15421 {
15422   rtx ref = operands[0];
15423   rtx outval = operands[1];
15424   rtx base, scratch;
15425   HOST_WIDE_INT offset = 0;
15426
15427   if (GET_CODE (ref) == SUBREG)
15428     {
15429       offset = SUBREG_BYTE (ref);
15430       ref = SUBREG_REG (ref);
15431     }
15432
15433   if (REG_P (ref))
15434     {
15435       /* We have a pseudo which has been spilt onto the stack; there
15436          are two cases here: the first where there is a simple
15437          stack-slot replacement and a second where the stack-slot is
15438          out of range, or is used as a subreg.  */
15439       if (reg_equiv_mem (REGNO (ref)))
15440         {
15441           ref = reg_equiv_mem (REGNO (ref));
15442           base = find_replacement (&XEXP (ref, 0));
15443         }
15444       else
15445         /* The slot is out of range, or was dressed up in a SUBREG.  */
15446         base = reg_equiv_address (REGNO (ref));
15447     }
15448   else
15449     base = find_replacement (&XEXP (ref, 0));
15450
15451   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15452
15453   /* Handle the case where the address is too complex to be offset by 1.  */
15454   if (GET_CODE (base) == MINUS
15455       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15456     {
15457       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15458
15459       /* Be careful not to destroy OUTVAL.  */
15460       if (reg_overlap_mentioned_p (base_plus, outval))
15461         {
15462           /* Updating base_plus might destroy outval, see if we can
15463              swap the scratch and base_plus.  */
15464           if (!reg_overlap_mentioned_p (scratch, outval))
15465             {
15466               rtx tmp = scratch;
15467               scratch = base_plus;
15468               base_plus = tmp;
15469             }
15470           else
15471             {
15472               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15473
15474               /* Be conservative and copy OUTVAL into the scratch now,
15475                  this should only be necessary if outval is a subreg
15476                  of something larger than a word.  */
15477               /* XXX Might this clobber base?  I can't see how it can,
15478                  since scratch is known to overlap with OUTVAL, and
15479                  must be wider than a word.  */
15480               emit_insn (gen_movhi (scratch_hi, outval));
15481               outval = scratch_hi;
15482             }
15483         }
15484
15485       emit_set_insn (base_plus, base);
15486       base = base_plus;
15487     }
15488   else if (GET_CODE (base) == PLUS)
15489     {
15490       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15491       HOST_WIDE_INT hi, lo;
15492
15493       offset += INTVAL (XEXP (base, 1));
15494       base = XEXP (base, 0);
15495
15496       /* Rework the address into a legal sequence of insns.  */
15497       /* Valid range for lo is -4095 -> 4095 */
15498       lo = (offset >= 0
15499             ? (offset & 0xfff)
15500             : -((-offset) & 0xfff));
15501
15502       /* Corner case, if lo is the max offset then we would be out of range
15503          once we have added the additional 1 below, so bump the msb into the
15504          pre-loading insn(s).  */
15505       if (lo == 4095)
15506         lo &= 0x7ff;
15507
15508       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15509              ^ (HOST_WIDE_INT) 0x80000000)
15510             - (HOST_WIDE_INT) 0x80000000);
15511
15512       gcc_assert (hi + lo == offset);
15513
15514       if (hi != 0)
15515         {
15516           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15517
15518           /* Be careful not to destroy OUTVAL.  */
15519           if (reg_overlap_mentioned_p (base_plus, outval))
15520             {
15521               /* Updating base_plus might destroy outval, see if we
15522                  can swap the scratch and base_plus.  */
15523               if (!reg_overlap_mentioned_p (scratch, outval))
15524                 {
15525                   rtx tmp = scratch;
15526                   scratch = base_plus;
15527                   base_plus = tmp;
15528                 }
15529               else
15530                 {
15531                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15532
15533                   /* Be conservative and copy outval into scratch now,
15534                      this should only be necessary if outval is a
15535                      subreg of something larger than a word.  */
15536                   /* XXX Might this clobber base?  I can't see how it
15537                      can, since scratch is known to overlap with
15538                      outval.  */
15539                   emit_insn (gen_movhi (scratch_hi, outval));
15540                   outval = scratch_hi;
15541                 }
15542             }
15543
15544           /* Get the base address; addsi3 knows how to handle constants
15545              that require more than one insn.  */
15546           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15547           base = base_plus;
15548           offset = lo;
15549         }
15550     }
15551
15552   if (BYTES_BIG_ENDIAN)
15553     {
15554       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15555                                          plus_constant (Pmode, base,
15556                                                         offset + 1)),
15557                             gen_lowpart (QImode, outval)));
15558       emit_insn (gen_lshrsi3 (scratch,
15559                               gen_rtx_SUBREG (SImode, outval, 0),
15560                               GEN_INT (8)));
15561       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15562                                                                 offset)),
15563                             gen_lowpart (QImode, scratch)));
15564     }
15565   else
15566     {
15567       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15568                                                                 offset)),
15569                             gen_lowpart (QImode, outval)));
15570       emit_insn (gen_lshrsi3 (scratch,
15571                               gen_rtx_SUBREG (SImode, outval, 0),
15572                               GEN_INT (8)));
15573       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15574                                          plus_constant (Pmode, base,
15575                                                         offset + 1)),
15576                             gen_lowpart (QImode, scratch)));
15577     }
15578 }
15579
15580 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15581    (padded to the size of a word) should be passed in a register.  */
15582
15583 static bool
15584 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15585 {
15586   if (TARGET_AAPCS_BASED)
15587     return must_pass_in_stack_var_size (mode, type);
15588   else
15589     return must_pass_in_stack_var_size_or_pad (mode, type);
15590 }
15591
15592
15593 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15594    Return true if an argument passed on the stack should be padded upwards,
15595    i.e. if the least-significant byte has useful data.
15596    For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
15597    aggregate types are placed in the lowest memory address.  */
15598
15599 bool
15600 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15601 {
15602   if (!TARGET_AAPCS_BASED)
15603     return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15604
15605   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15606     return false;
15607
15608   return true;
15609 }
15610
15611
15612 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15613    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15614    register has useful data, and return the opposite if the most
15615    significant byte does.  */
15616
15617 bool
15618 arm_pad_reg_upward (machine_mode mode,
15619                     tree type, int first ATTRIBUTE_UNUSED)
15620 {
15621   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15622     {
15623       /* For AAPCS, small aggregates, small fixed-point types,
15624          and small complex types are always padded upwards.  */
15625       if (type)
15626         {
15627           if ((AGGREGATE_TYPE_P (type)
15628                || TREE_CODE (type) == COMPLEX_TYPE
15629                || FIXED_POINT_TYPE_P (type))
15630               && int_size_in_bytes (type) <= 4)
15631             return true;
15632         }
15633       else
15634         {
15635           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15636               && GET_MODE_SIZE (mode) <= 4)
15637             return true;
15638         }
15639     }
15640
15641   /* Otherwise, use default padding.  */
15642   return !BYTES_BIG_ENDIAN;
15643 }
15644
15645 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15646    assuming that the address in the base register is word aligned.  */
15647 bool
15648 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15649 {
15650   HOST_WIDE_INT max_offset;
15651
15652   /* Offset must be a multiple of 4 in Thumb mode.  */
15653   if (TARGET_THUMB2 && ((offset & 3) != 0))
15654     return false;
15655
15656   if (TARGET_THUMB2)
15657     max_offset = 1020;
15658   else if (TARGET_ARM)
15659     max_offset = 255;
15660   else
15661     return false;
15662
15663   return ((offset <= max_offset) && (offset >= -max_offset));
15664 }
15665
15666 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15667    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15668    Assumes that the address in the base register RN is word aligned.  Pattern
15669    guarantees that both memory accesses use the same base register,
15670    the offsets are constants within the range, and the gap between the offsets is 4.
15671    If preload complete then check that registers are legal.  WBACK indicates whether
15672    address is updated.  LOAD indicates whether memory access is load or store.  */
15673 bool
15674 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15675                        bool wback, bool load)
15676 {
15677   unsigned int t, t2, n;
15678
15679   if (!reload_completed)
15680     return true;
15681
15682   if (!offset_ok_for_ldrd_strd (offset))
15683     return false;
15684
15685   t = REGNO (rt);
15686   t2 = REGNO (rt2);
15687   n = REGNO (rn);
15688
15689   if ((TARGET_THUMB2)
15690       && ((wback && (n == t || n == t2))
15691           || (t == SP_REGNUM)
15692           || (t == PC_REGNUM)
15693           || (t2 == SP_REGNUM)
15694           || (t2 == PC_REGNUM)
15695           || (!load && (n == PC_REGNUM))
15696           || (load && (t == t2))
15697           /* Triggers Cortex-M3 LDRD errata.  */
15698           || (!wback && load && fix_cm3_ldrd && (n == t))))
15699     return false;
15700
15701   if ((TARGET_ARM)
15702       && ((wback && (n == t || n == t2))
15703           || (t2 == PC_REGNUM)
15704           || (t % 2 != 0)   /* First destination register is not even.  */
15705           || (t2 != t + 1)
15706           /* PC can be used as base register (for offset addressing only),
15707              but it is depricated.  */
15708           || (n == PC_REGNUM)))
15709     return false;
15710
15711   return true;
15712 }
15713
15714 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15715    operand MEM's address contains an immediate offset from the base
15716    register and has no side effects, in which case it sets BASE and
15717    OFFSET accordingly.  */
15718 static bool
15719 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15720 {
15721   rtx addr;
15722
15723   gcc_assert (base != NULL && offset != NULL);
15724
15725   /* TODO: Handle more general memory operand patterns, such as
15726      PRE_DEC and PRE_INC.  */
15727
15728   if (side_effects_p (mem))
15729     return false;
15730
15731   /* Can't deal with subregs.  */
15732   if (GET_CODE (mem) == SUBREG)
15733     return false;
15734
15735   gcc_assert (MEM_P (mem));
15736
15737   *offset = const0_rtx;
15738
15739   addr = XEXP (mem, 0);
15740
15741   /* If addr isn't valid for DImode, then we can't handle it.  */
15742   if (!arm_legitimate_address_p (DImode, addr,
15743                                  reload_in_progress || reload_completed))
15744     return false;
15745
15746   if (REG_P (addr))
15747     {
15748       *base = addr;
15749       return true;
15750     }
15751   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15752     {
15753       *base = XEXP (addr, 0);
15754       *offset = XEXP (addr, 1);
15755       return (REG_P (*base) && CONST_INT_P (*offset));
15756     }
15757
15758   return false;
15759 }
15760
15761 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15762
15763 /* Called from a peephole2 to replace two word-size accesses with a
15764    single LDRD/STRD instruction.  Returns true iff we can generate a
15765    new instruction sequence.  That is, both accesses use the same base
15766    register and the gap between constant offsets is 4.  This function
15767    may reorder its operands to match ldrd/strd RTL templates.
15768    OPERANDS are the operands found by the peephole matcher;
15769    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15770    corresponding memory operands.  LOAD indicaates whether the access
15771    is load or store.  CONST_STORE indicates a store of constant
15772    integer values held in OPERANDS[4,5] and assumes that the pattern
15773    is of length 4 insn, for the purpose of checking dead registers.
15774    COMMUTE indicates that register operands may be reordered.  */
15775 bool
15776 gen_operands_ldrd_strd (rtx *operands, bool load,
15777                         bool const_store, bool commute)
15778 {
15779   int nops = 2;
15780   HOST_WIDE_INT offsets[2], offset;
15781   rtx base = NULL_RTX;
15782   rtx cur_base, cur_offset, tmp;
15783   int i, gap;
15784   HARD_REG_SET regset;
15785
15786   gcc_assert (!const_store || !load);
15787   /* Check that the memory references are immediate offsets from the
15788      same base register.  Extract the base register, the destination
15789      registers, and the corresponding memory offsets.  */
15790   for (i = 0; i < nops; i++)
15791     {
15792       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15793         return false;
15794
15795       if (i == 0)
15796         base = cur_base;
15797       else if (REGNO (base) != REGNO (cur_base))
15798         return false;
15799
15800       offsets[i] = INTVAL (cur_offset);
15801       if (GET_CODE (operands[i]) == SUBREG)
15802         {
15803           tmp = SUBREG_REG (operands[i]);
15804           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15805           operands[i] = tmp;
15806         }
15807     }
15808
15809   /* Make sure there is no dependency between the individual loads.  */
15810   if (load && REGNO (operands[0]) == REGNO (base))
15811     return false; /* RAW */
15812
15813   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15814     return false; /* WAW */
15815
15816   /* If the same input register is used in both stores
15817      when storing different constants, try to find a free register.
15818      For example, the code
15819         mov r0, 0
15820         str r0, [r2]
15821         mov r0, 1
15822         str r0, [r2, #4]
15823      can be transformed into
15824         mov r1, 0
15825         strd r1, r0, [r2]
15826      in Thumb mode assuming that r1 is free.  */
15827   if (const_store
15828       && REGNO (operands[0]) == REGNO (operands[1])
15829       && INTVAL (operands[4]) != INTVAL (operands[5]))
15830     {
15831     if (TARGET_THUMB2)
15832       {
15833         CLEAR_HARD_REG_SET (regset);
15834         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15835         if (tmp == NULL_RTX)
15836           return false;
15837
15838         /* Use the new register in the first load to ensure that
15839            if the original input register is not dead after peephole,
15840            then it will have the correct constant value.  */
15841         operands[0] = tmp;
15842       }
15843     else if (TARGET_ARM)
15844       {
15845         return false;
15846         int regno = REGNO (operands[0]);
15847         if (!peep2_reg_dead_p (4, operands[0]))
15848           {
15849             /* When the input register is even and is not dead after the
15850                pattern, it has to hold the second constant but we cannot
15851                form a legal STRD in ARM mode with this register as the second
15852                register.  */
15853             if (regno % 2 == 0)
15854               return false;
15855
15856             /* Is regno-1 free? */
15857             SET_HARD_REG_SET (regset);
15858             CLEAR_HARD_REG_BIT(regset, regno - 1);
15859             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15860             if (tmp == NULL_RTX)
15861               return false;
15862
15863             operands[0] = tmp;
15864           }
15865         else
15866           {
15867             /* Find a DImode register.  */
15868             CLEAR_HARD_REG_SET (regset);
15869             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15870             if (tmp != NULL_RTX)
15871               {
15872                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15873                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15874               }
15875             else
15876               {
15877                 /* Can we use the input register to form a DI register?  */
15878                 SET_HARD_REG_SET (regset);
15879                 CLEAR_HARD_REG_BIT(regset,
15880                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15881                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15882                 if (tmp == NULL_RTX)
15883                   return false;
15884                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15885               }
15886           }
15887
15888         gcc_assert (operands[0] != NULL_RTX);
15889         gcc_assert (operands[1] != NULL_RTX);
15890         gcc_assert (REGNO (operands[0]) % 2 == 0);
15891         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15892       }
15893     }
15894
15895   /* Make sure the instructions are ordered with lower memory access first.  */
15896   if (offsets[0] > offsets[1])
15897     {
15898       gap = offsets[0] - offsets[1];
15899       offset = offsets[1];
15900
15901       /* Swap the instructions such that lower memory is accessed first.  */
15902       SWAP_RTX (operands[0], operands[1]);
15903       SWAP_RTX (operands[2], operands[3]);
15904       if (const_store)
15905         SWAP_RTX (operands[4], operands[5]);
15906     }
15907   else
15908     {
15909       gap = offsets[1] - offsets[0];
15910       offset = offsets[0];
15911     }
15912
15913   /* Make sure accesses are to consecutive memory locations.  */
15914   if (gap != 4)
15915     return false;
15916
15917   /* Make sure we generate legal instructions.  */
15918   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15919                              false, load))
15920     return true;
15921
15922   /* In Thumb state, where registers are almost unconstrained, there
15923      is little hope to fix it.  */
15924   if (TARGET_THUMB2)
15925     return false;
15926
15927   if (load && commute)
15928     {
15929       /* Try reordering registers.  */
15930       SWAP_RTX (operands[0], operands[1]);
15931       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15932                                  false, load))
15933         return true;
15934     }
15935
15936   if (const_store)
15937     {
15938       /* If input registers are dead after this pattern, they can be
15939          reordered or replaced by other registers that are free in the
15940          current pattern.  */
15941       if (!peep2_reg_dead_p (4, operands[0])
15942           || !peep2_reg_dead_p (4, operands[1]))
15943         return false;
15944
15945       /* Try to reorder the input registers.  */
15946       /* For example, the code
15947            mov r0, 0
15948            mov r1, 1
15949            str r1, [r2]
15950            str r0, [r2, #4]
15951          can be transformed into
15952            mov r1, 0
15953            mov r0, 1
15954            strd r0, [r2]
15955       */
15956       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15957                                   false, false))
15958         {
15959           SWAP_RTX (operands[0], operands[1]);
15960           return true;
15961         }
15962
15963       /* Try to find a free DI register.  */
15964       CLEAR_HARD_REG_SET (regset);
15965       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15966       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15967       while (true)
15968         {
15969           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15970           if (tmp == NULL_RTX)
15971             return false;
15972
15973           /* DREG must be an even-numbered register in DImode.
15974              Split it into SI registers.  */
15975           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15976           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15977           gcc_assert (operands[0] != NULL_RTX);
15978           gcc_assert (operands[1] != NULL_RTX);
15979           gcc_assert (REGNO (operands[0]) % 2 == 0);
15980           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15981
15982           return (operands_ok_ldrd_strd (operands[0], operands[1],
15983                                          base, offset,
15984                                          false, load));
15985         }
15986     }
15987
15988   return false;
15989 }
15990 #undef SWAP_RTX
15991
15992
15993
15994 \f
15995 /* Print a symbolic form of X to the debug file, F.  */
15996 static void
15997 arm_print_value (FILE *f, rtx x)
15998 {
15999   switch (GET_CODE (x))
16000     {
16001     case CONST_INT:
16002       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16003       return;
16004
16005     case CONST_DOUBLE:
16006       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16007       return;
16008
16009     case CONST_VECTOR:
16010       {
16011         int i;
16012
16013         fprintf (f, "<");
16014         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16015           {
16016             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16017             if (i < (CONST_VECTOR_NUNITS (x) - 1))
16018               fputc (',', f);
16019           }
16020         fprintf (f, ">");
16021       }
16022       return;
16023
16024     case CONST_STRING:
16025       fprintf (f, "\"%s\"", XSTR (x, 0));
16026       return;
16027
16028     case SYMBOL_REF:
16029       fprintf (f, "`%s'", XSTR (x, 0));
16030       return;
16031
16032     case LABEL_REF:
16033       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16034       return;
16035
16036     case CONST:
16037       arm_print_value (f, XEXP (x, 0));
16038       return;
16039
16040     case PLUS:
16041       arm_print_value (f, XEXP (x, 0));
16042       fprintf (f, "+");
16043       arm_print_value (f, XEXP (x, 1));
16044       return;
16045
16046     case PC:
16047       fprintf (f, "pc");
16048       return;
16049
16050     default:
16051       fprintf (f, "????");
16052       return;
16053     }
16054 }
16055 \f
16056 /* Routines for manipulation of the constant pool.  */
16057
16058 /* Arm instructions cannot load a large constant directly into a
16059    register; they have to come from a pc relative load.  The constant
16060    must therefore be placed in the addressable range of the pc
16061    relative load.  Depending on the precise pc relative load
16062    instruction the range is somewhere between 256 bytes and 4k.  This
16063    means that we often have to dump a constant inside a function, and
16064    generate code to branch around it.
16065
16066    It is important to minimize this, since the branches will slow
16067    things down and make the code larger.
16068
16069    Normally we can hide the table after an existing unconditional
16070    branch so that there is no interruption of the flow, but in the
16071    worst case the code looks like this:
16072
16073         ldr     rn, L1
16074         ...
16075         b       L2
16076         align
16077         L1:     .long value
16078         L2:
16079         ...
16080
16081         ldr     rn, L3
16082         ...
16083         b       L4
16084         align
16085         L3:     .long value
16086         L4:
16087         ...
16088
16089    We fix this by performing a scan after scheduling, which notices
16090    which instructions need to have their operands fetched from the
16091    constant table and builds the table.
16092
16093    The algorithm starts by building a table of all the constants that
16094    need fixing up and all the natural barriers in the function (places
16095    where a constant table can be dropped without breaking the flow).
16096    For each fixup we note how far the pc-relative replacement will be
16097    able to reach and the offset of the instruction into the function.
16098
16099    Having built the table we then group the fixes together to form
16100    tables that are as large as possible (subject to addressing
16101    constraints) and emit each table of constants after the last
16102    barrier that is within range of all the instructions in the group.
16103    If a group does not contain a barrier, then we forcibly create one
16104    by inserting a jump instruction into the flow.  Once the table has
16105    been inserted, the insns are then modified to reference the
16106    relevant entry in the pool.
16107
16108    Possible enhancements to the algorithm (not implemented) are:
16109
16110    1) For some processors and object formats, there may be benefit in
16111    aligning the pools to the start of cache lines; this alignment
16112    would need to be taken into account when calculating addressability
16113    of a pool.  */
16114
16115 /* These typedefs are located at the start of this file, so that
16116    they can be used in the prototypes there.  This comment is to
16117    remind readers of that fact so that the following structures
16118    can be understood more easily.
16119
16120      typedef struct minipool_node    Mnode;
16121      typedef struct minipool_fixup   Mfix;  */
16122
16123 struct minipool_node
16124 {
16125   /* Doubly linked chain of entries.  */
16126   Mnode * next;
16127   Mnode * prev;
16128   /* The maximum offset into the code that this entry can be placed.  While
16129      pushing fixes for forward references, all entries are sorted in order
16130      of increasing max_address.  */
16131   HOST_WIDE_INT max_address;
16132   /* Similarly for an entry inserted for a backwards ref.  */
16133   HOST_WIDE_INT min_address;
16134   /* The number of fixes referencing this entry.  This can become zero
16135      if we "unpush" an entry.  In this case we ignore the entry when we
16136      come to emit the code.  */
16137   int refcount;
16138   /* The offset from the start of the minipool.  */
16139   HOST_WIDE_INT offset;
16140   /* The value in table.  */
16141   rtx value;
16142   /* The mode of value.  */
16143   machine_mode mode;
16144   /* The size of the value.  With iWMMXt enabled
16145      sizes > 4 also imply an alignment of 8-bytes.  */
16146   int fix_size;
16147 };
16148
16149 struct minipool_fixup
16150 {
16151   Mfix *            next;
16152   rtx_insn *        insn;
16153   HOST_WIDE_INT     address;
16154   rtx *             loc;
16155   machine_mode mode;
16156   int               fix_size;
16157   rtx               value;
16158   Mnode *           minipool;
16159   HOST_WIDE_INT     forwards;
16160   HOST_WIDE_INT     backwards;
16161 };
16162
16163 /* Fixes less than a word need padding out to a word boundary.  */
16164 #define MINIPOOL_FIX_SIZE(mode) \
16165   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16166
16167 static Mnode *  minipool_vector_head;
16168 static Mnode *  minipool_vector_tail;
16169 static rtx_code_label   *minipool_vector_label;
16170 static int      minipool_pad;
16171
16172 /* The linked list of all minipool fixes required for this function.  */
16173 Mfix *          minipool_fix_head;
16174 Mfix *          minipool_fix_tail;
16175 /* The fix entry for the current minipool, once it has been placed.  */
16176 Mfix *          minipool_barrier;
16177
16178 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16179 #define JUMP_TABLES_IN_TEXT_SECTION 0
16180 #endif
16181
16182 static HOST_WIDE_INT
16183 get_jump_table_size (rtx_jump_table_data *insn)
16184 {
16185   /* ADDR_VECs only take room if read-only data does into the text
16186      section.  */
16187   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16188     {
16189       rtx body = PATTERN (insn);
16190       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16191       HOST_WIDE_INT size;
16192       HOST_WIDE_INT modesize;
16193
16194       modesize = GET_MODE_SIZE (GET_MODE (body));
16195       size = modesize * XVECLEN (body, elt);
16196       switch (modesize)
16197         {
16198         case 1:
16199           /* Round up size  of TBB table to a halfword boundary.  */
16200           size = (size + 1) & ~(HOST_WIDE_INT)1;
16201           break;
16202         case 2:
16203           /* No padding necessary for TBH.  */
16204           break;
16205         case 4:
16206           /* Add two bytes for alignment on Thumb.  */
16207           if (TARGET_THUMB)
16208             size += 2;
16209           break;
16210         default:
16211           gcc_unreachable ();
16212         }
16213       return size;
16214     }
16215
16216   return 0;
16217 }
16218
16219 /* Return the maximum amount of padding that will be inserted before
16220    label LABEL.  */
16221
16222 static HOST_WIDE_INT
16223 get_label_padding (rtx label)
16224 {
16225   HOST_WIDE_INT align, min_insn_size;
16226
16227   align = 1 << label_to_alignment (label);
16228   min_insn_size = TARGET_THUMB ? 2 : 4;
16229   return align > min_insn_size ? align - min_insn_size : 0;
16230 }
16231
16232 /* Move a minipool fix MP from its current location to before MAX_MP.
16233    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16234    constraints may need updating.  */
16235 static Mnode *
16236 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16237                                HOST_WIDE_INT max_address)
16238 {
16239   /* The code below assumes these are different.  */
16240   gcc_assert (mp != max_mp);
16241
16242   if (max_mp == NULL)
16243     {
16244       if (max_address < mp->max_address)
16245         mp->max_address = max_address;
16246     }
16247   else
16248     {
16249       if (max_address > max_mp->max_address - mp->fix_size)
16250         mp->max_address = max_mp->max_address - mp->fix_size;
16251       else
16252         mp->max_address = max_address;
16253
16254       /* Unlink MP from its current position.  Since max_mp is non-null,
16255        mp->prev must be non-null.  */
16256       mp->prev->next = mp->next;
16257       if (mp->next != NULL)
16258         mp->next->prev = mp->prev;
16259       else
16260         minipool_vector_tail = mp->prev;
16261
16262       /* Re-insert it before MAX_MP.  */
16263       mp->next = max_mp;
16264       mp->prev = max_mp->prev;
16265       max_mp->prev = mp;
16266
16267       if (mp->prev != NULL)
16268         mp->prev->next = mp;
16269       else
16270         minipool_vector_head = mp;
16271     }
16272
16273   /* Save the new entry.  */
16274   max_mp = mp;
16275
16276   /* Scan over the preceding entries and adjust their addresses as
16277      required.  */
16278   while (mp->prev != NULL
16279          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16280     {
16281       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16282       mp = mp->prev;
16283     }
16284
16285   return max_mp;
16286 }
16287
16288 /* Add a constant to the minipool for a forward reference.  Returns the
16289    node added or NULL if the constant will not fit in this pool.  */
16290 static Mnode *
16291 add_minipool_forward_ref (Mfix *fix)
16292 {
16293   /* If set, max_mp is the first pool_entry that has a lower
16294      constraint than the one we are trying to add.  */
16295   Mnode *       max_mp = NULL;
16296   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16297   Mnode *       mp;
16298
16299   /* If the minipool starts before the end of FIX->INSN then this FIX
16300      can not be placed into the current pool.  Furthermore, adding the
16301      new constant pool entry may cause the pool to start FIX_SIZE bytes
16302      earlier.  */
16303   if (minipool_vector_head &&
16304       (fix->address + get_attr_length (fix->insn)
16305        >= minipool_vector_head->max_address - fix->fix_size))
16306     return NULL;
16307
16308   /* Scan the pool to see if a constant with the same value has
16309      already been added.  While we are doing this, also note the
16310      location where we must insert the constant if it doesn't already
16311      exist.  */
16312   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16313     {
16314       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16315           && fix->mode == mp->mode
16316           && (!LABEL_P (fix->value)
16317               || (CODE_LABEL_NUMBER (fix->value)
16318                   == CODE_LABEL_NUMBER (mp->value)))
16319           && rtx_equal_p (fix->value, mp->value))
16320         {
16321           /* More than one fix references this entry.  */
16322           mp->refcount++;
16323           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16324         }
16325
16326       /* Note the insertion point if necessary.  */
16327       if (max_mp == NULL
16328           && mp->max_address > max_address)
16329         max_mp = mp;
16330
16331       /* If we are inserting an 8-bytes aligned quantity and
16332          we have not already found an insertion point, then
16333          make sure that all such 8-byte aligned quantities are
16334          placed at the start of the pool.  */
16335       if (ARM_DOUBLEWORD_ALIGN
16336           && max_mp == NULL
16337           && fix->fix_size >= 8
16338           && mp->fix_size < 8)
16339         {
16340           max_mp = mp;
16341           max_address = mp->max_address;
16342         }
16343     }
16344
16345   /* The value is not currently in the minipool, so we need to create
16346      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16347      the end of the list since the placement is less constrained than
16348      any existing entry.  Otherwise, we insert the new fix before
16349      MAX_MP and, if necessary, adjust the constraints on the other
16350      entries.  */
16351   mp = XNEW (Mnode);
16352   mp->fix_size = fix->fix_size;
16353   mp->mode = fix->mode;
16354   mp->value = fix->value;
16355   mp->refcount = 1;
16356   /* Not yet required for a backwards ref.  */
16357   mp->min_address = -65536;
16358
16359   if (max_mp == NULL)
16360     {
16361       mp->max_address = max_address;
16362       mp->next = NULL;
16363       mp->prev = minipool_vector_tail;
16364
16365       if (mp->prev == NULL)
16366         {
16367           minipool_vector_head = mp;
16368           minipool_vector_label = gen_label_rtx ();
16369         }
16370       else
16371         mp->prev->next = mp;
16372
16373       minipool_vector_tail = mp;
16374     }
16375   else
16376     {
16377       if (max_address > max_mp->max_address - mp->fix_size)
16378         mp->max_address = max_mp->max_address - mp->fix_size;
16379       else
16380         mp->max_address = max_address;
16381
16382       mp->next = max_mp;
16383       mp->prev = max_mp->prev;
16384       max_mp->prev = mp;
16385       if (mp->prev != NULL)
16386         mp->prev->next = mp;
16387       else
16388         minipool_vector_head = mp;
16389     }
16390
16391   /* Save the new entry.  */
16392   max_mp = mp;
16393
16394   /* Scan over the preceding entries and adjust their addresses as
16395      required.  */
16396   while (mp->prev != NULL
16397          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16398     {
16399       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16400       mp = mp->prev;
16401     }
16402
16403   return max_mp;
16404 }
16405
16406 static Mnode *
16407 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16408                                 HOST_WIDE_INT  min_address)
16409 {
16410   HOST_WIDE_INT offset;
16411
16412   /* The code below assumes these are different.  */
16413   gcc_assert (mp != min_mp);
16414
16415   if (min_mp == NULL)
16416     {
16417       if (min_address > mp->min_address)
16418         mp->min_address = min_address;
16419     }
16420   else
16421     {
16422       /* We will adjust this below if it is too loose.  */
16423       mp->min_address = min_address;
16424
16425       /* Unlink MP from its current position.  Since min_mp is non-null,
16426          mp->next must be non-null.  */
16427       mp->next->prev = mp->prev;
16428       if (mp->prev != NULL)
16429         mp->prev->next = mp->next;
16430       else
16431         minipool_vector_head = mp->next;
16432
16433       /* Reinsert it after MIN_MP.  */
16434       mp->prev = min_mp;
16435       mp->next = min_mp->next;
16436       min_mp->next = mp;
16437       if (mp->next != NULL)
16438         mp->next->prev = mp;
16439       else
16440         minipool_vector_tail = mp;
16441     }
16442
16443   min_mp = mp;
16444
16445   offset = 0;
16446   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16447     {
16448       mp->offset = offset;
16449       if (mp->refcount > 0)
16450         offset += mp->fix_size;
16451
16452       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16453         mp->next->min_address = mp->min_address + mp->fix_size;
16454     }
16455
16456   return min_mp;
16457 }
16458
16459 /* Add a constant to the minipool for a backward reference.  Returns the
16460    node added or NULL if the constant will not fit in this pool.
16461
16462    Note that the code for insertion for a backwards reference can be
16463    somewhat confusing because the calculated offsets for each fix do
16464    not take into account the size of the pool (which is still under
16465    construction.  */
16466 static Mnode *
16467 add_minipool_backward_ref (Mfix *fix)
16468 {
16469   /* If set, min_mp is the last pool_entry that has a lower constraint
16470      than the one we are trying to add.  */
16471   Mnode *min_mp = NULL;
16472   /* This can be negative, since it is only a constraint.  */
16473   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16474   Mnode *mp;
16475
16476   /* If we can't reach the current pool from this insn, or if we can't
16477      insert this entry at the end of the pool without pushing other
16478      fixes out of range, then we don't try.  This ensures that we
16479      can't fail later on.  */
16480   if (min_address >= minipool_barrier->address
16481       || (minipool_vector_tail->min_address + fix->fix_size
16482           >= minipool_barrier->address))
16483     return NULL;
16484
16485   /* Scan the pool to see if a constant with the same value has
16486      already been added.  While we are doing this, also note the
16487      location where we must insert the constant if it doesn't already
16488      exist.  */
16489   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16490     {
16491       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16492           && fix->mode == mp->mode
16493           && (!LABEL_P (fix->value)
16494               || (CODE_LABEL_NUMBER (fix->value)
16495                   == CODE_LABEL_NUMBER (mp->value)))
16496           && rtx_equal_p (fix->value, mp->value)
16497           /* Check that there is enough slack to move this entry to the
16498              end of the table (this is conservative).  */
16499           && (mp->max_address
16500               > (minipool_barrier->address
16501                  + minipool_vector_tail->offset
16502                  + minipool_vector_tail->fix_size)))
16503         {
16504           mp->refcount++;
16505           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16506         }
16507
16508       if (min_mp != NULL)
16509         mp->min_address += fix->fix_size;
16510       else
16511         {
16512           /* Note the insertion point if necessary.  */
16513           if (mp->min_address < min_address)
16514             {
16515               /* For now, we do not allow the insertion of 8-byte alignment
16516                  requiring nodes anywhere but at the start of the pool.  */
16517               if (ARM_DOUBLEWORD_ALIGN
16518                   && fix->fix_size >= 8 && mp->fix_size < 8)
16519                 return NULL;
16520               else
16521                 min_mp = mp;
16522             }
16523           else if (mp->max_address
16524                    < minipool_barrier->address + mp->offset + fix->fix_size)
16525             {
16526               /* Inserting before this entry would push the fix beyond
16527                  its maximum address (which can happen if we have
16528                  re-located a forwards fix); force the new fix to come
16529                  after it.  */
16530               if (ARM_DOUBLEWORD_ALIGN
16531                   && fix->fix_size >= 8 && mp->fix_size < 8)
16532                 return NULL;
16533               else
16534                 {
16535                   min_mp = mp;
16536                   min_address = mp->min_address + fix->fix_size;
16537                 }
16538             }
16539           /* Do not insert a non-8-byte aligned quantity before 8-byte
16540              aligned quantities.  */
16541           else if (ARM_DOUBLEWORD_ALIGN
16542                    && fix->fix_size < 8
16543                    && mp->fix_size >= 8)
16544             {
16545               min_mp = mp;
16546               min_address = mp->min_address + fix->fix_size;
16547             }
16548         }
16549     }
16550
16551   /* We need to create a new entry.  */
16552   mp = XNEW (Mnode);
16553   mp->fix_size = fix->fix_size;
16554   mp->mode = fix->mode;
16555   mp->value = fix->value;
16556   mp->refcount = 1;
16557   mp->max_address = minipool_barrier->address + 65536;
16558
16559   mp->min_address = min_address;
16560
16561   if (min_mp == NULL)
16562     {
16563       mp->prev = NULL;
16564       mp->next = minipool_vector_head;
16565
16566       if (mp->next == NULL)
16567         {
16568           minipool_vector_tail = mp;
16569           minipool_vector_label = gen_label_rtx ();
16570         }
16571       else
16572         mp->next->prev = mp;
16573
16574       minipool_vector_head = mp;
16575     }
16576   else
16577     {
16578       mp->next = min_mp->next;
16579       mp->prev = min_mp;
16580       min_mp->next = mp;
16581
16582       if (mp->next != NULL)
16583         mp->next->prev = mp;
16584       else
16585         minipool_vector_tail = mp;
16586     }
16587
16588   /* Save the new entry.  */
16589   min_mp = mp;
16590
16591   if (mp->prev)
16592     mp = mp->prev;
16593   else
16594     mp->offset = 0;
16595
16596   /* Scan over the following entries and adjust their offsets.  */
16597   while (mp->next != NULL)
16598     {
16599       if (mp->next->min_address < mp->min_address + mp->fix_size)
16600         mp->next->min_address = mp->min_address + mp->fix_size;
16601
16602       if (mp->refcount)
16603         mp->next->offset = mp->offset + mp->fix_size;
16604       else
16605         mp->next->offset = mp->offset;
16606
16607       mp = mp->next;
16608     }
16609
16610   return min_mp;
16611 }
16612
16613 static void
16614 assign_minipool_offsets (Mfix *barrier)
16615 {
16616   HOST_WIDE_INT offset = 0;
16617   Mnode *mp;
16618
16619   minipool_barrier = barrier;
16620
16621   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16622     {
16623       mp->offset = offset;
16624
16625       if (mp->refcount > 0)
16626         offset += mp->fix_size;
16627     }
16628 }
16629
16630 /* Output the literal table */
16631 static void
16632 dump_minipool (rtx_insn *scan)
16633 {
16634   Mnode * mp;
16635   Mnode * nmp;
16636   int align64 = 0;
16637
16638   if (ARM_DOUBLEWORD_ALIGN)
16639     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16640       if (mp->refcount > 0 && mp->fix_size >= 8)
16641         {
16642           align64 = 1;
16643           break;
16644         }
16645
16646   if (dump_file)
16647     fprintf (dump_file,
16648              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16649              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16650
16651   scan = emit_label_after (gen_label_rtx (), scan);
16652   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16653   scan = emit_label_after (minipool_vector_label, scan);
16654
16655   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16656     {
16657       if (mp->refcount > 0)
16658         {
16659           if (dump_file)
16660             {
16661               fprintf (dump_file,
16662                        ";;  Offset %u, min %ld, max %ld ",
16663                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16664                        (unsigned long) mp->max_address);
16665               arm_print_value (dump_file, mp->value);
16666               fputc ('\n', dump_file);
16667             }
16668
16669           switch (mp->fix_size)
16670             {
16671 #ifdef HAVE_consttable_1
16672             case 1:
16673               scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16674               break;
16675
16676 #endif
16677 #ifdef HAVE_consttable_2
16678             case 2:
16679               scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16680               break;
16681
16682 #endif
16683 #ifdef HAVE_consttable_4
16684             case 4:
16685               scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16686               break;
16687
16688 #endif
16689 #ifdef HAVE_consttable_8
16690             case 8:
16691               scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16692               break;
16693
16694 #endif
16695 #ifdef HAVE_consttable_16
16696             case 16:
16697               scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16698               break;
16699
16700 #endif
16701             default:
16702               gcc_unreachable ();
16703             }
16704         }
16705
16706       nmp = mp->next;
16707       free (mp);
16708     }
16709
16710   minipool_vector_head = minipool_vector_tail = NULL;
16711   scan = emit_insn_after (gen_consttable_end (), scan);
16712   scan = emit_barrier_after (scan);
16713 }
16714
16715 /* Return the cost of forcibly inserting a barrier after INSN.  */
16716 static int
16717 arm_barrier_cost (rtx insn)
16718 {
16719   /* Basing the location of the pool on the loop depth is preferable,
16720      but at the moment, the basic block information seems to be
16721      corrupt by this stage of the compilation.  */
16722   int base_cost = 50;
16723   rtx next = next_nonnote_insn (insn);
16724
16725   if (next != NULL && LABEL_P (next))
16726     base_cost -= 20;
16727
16728   switch (GET_CODE (insn))
16729     {
16730     case CODE_LABEL:
16731       /* It will always be better to place the table before the label, rather
16732          than after it.  */
16733       return 50;
16734
16735     case INSN:
16736     case CALL_INSN:
16737       return base_cost;
16738
16739     case JUMP_INSN:
16740       return base_cost - 10;
16741
16742     default:
16743       return base_cost + 10;
16744     }
16745 }
16746
16747 /* Find the best place in the insn stream in the range
16748    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16749    Create the barrier by inserting a jump and add a new fix entry for
16750    it.  */
16751 static Mfix *
16752 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16753 {
16754   HOST_WIDE_INT count = 0;
16755   rtx_barrier *barrier;
16756   rtx_insn *from = fix->insn;
16757   /* The instruction after which we will insert the jump.  */
16758   rtx_insn *selected = NULL;
16759   int selected_cost;
16760   /* The address at which the jump instruction will be placed.  */
16761   HOST_WIDE_INT selected_address;
16762   Mfix * new_fix;
16763   HOST_WIDE_INT max_count = max_address - fix->address;
16764   rtx_code_label *label = gen_label_rtx ();
16765
16766   selected_cost = arm_barrier_cost (from);
16767   selected_address = fix->address;
16768
16769   while (from && count < max_count)
16770     {
16771       rtx_jump_table_data *tmp;
16772       int new_cost;
16773
16774       /* This code shouldn't have been called if there was a natural barrier
16775          within range.  */
16776       gcc_assert (!BARRIER_P (from));
16777
16778       /* Count the length of this insn.  This must stay in sync with the
16779          code that pushes minipool fixes.  */
16780       if (LABEL_P (from))
16781         count += get_label_padding (from);
16782       else
16783         count += get_attr_length (from);
16784
16785       /* If there is a jump table, add its length.  */
16786       if (tablejump_p (from, NULL, &tmp))
16787         {
16788           count += get_jump_table_size (tmp);
16789
16790           /* Jump tables aren't in a basic block, so base the cost on
16791              the dispatch insn.  If we select this location, we will
16792              still put the pool after the table.  */
16793           new_cost = arm_barrier_cost (from);
16794
16795           if (count < max_count
16796               && (!selected || new_cost <= selected_cost))
16797             {
16798               selected = tmp;
16799               selected_cost = new_cost;
16800               selected_address = fix->address + count;
16801             }
16802
16803           /* Continue after the dispatch table.  */
16804           from = NEXT_INSN (tmp);
16805           continue;
16806         }
16807
16808       new_cost = arm_barrier_cost (from);
16809
16810       if (count < max_count
16811           && (!selected || new_cost <= selected_cost))
16812         {
16813           selected = from;
16814           selected_cost = new_cost;
16815           selected_address = fix->address + count;
16816         }
16817
16818       from = NEXT_INSN (from);
16819     }
16820
16821   /* Make sure that we found a place to insert the jump.  */
16822   gcc_assert (selected);
16823
16824   /* Make sure we do not split a call and its corresponding
16825      CALL_ARG_LOCATION note.  */
16826   if (CALL_P (selected))
16827     {
16828       rtx_insn *next = NEXT_INSN (selected);
16829       if (next && NOTE_P (next)
16830           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16831           selected = next;
16832     }
16833
16834   /* Create a new JUMP_INSN that branches around a barrier.  */
16835   from = emit_jump_insn_after (gen_jump (label), selected);
16836   JUMP_LABEL (from) = label;
16837   barrier = emit_barrier_after (from);
16838   emit_label_after (label, barrier);
16839
16840   /* Create a minipool barrier entry for the new barrier.  */
16841   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16842   new_fix->insn = barrier;
16843   new_fix->address = selected_address;
16844   new_fix->next = fix->next;
16845   fix->next = new_fix;
16846
16847   return new_fix;
16848 }
16849
16850 /* Record that there is a natural barrier in the insn stream at
16851    ADDRESS.  */
16852 static void
16853 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16854 {
16855   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16856
16857   fix->insn = insn;
16858   fix->address = address;
16859
16860   fix->next = NULL;
16861   if (minipool_fix_head != NULL)
16862     minipool_fix_tail->next = fix;
16863   else
16864     minipool_fix_head = fix;
16865
16866   minipool_fix_tail = fix;
16867 }
16868
16869 /* Record INSN, which will need fixing up to load a value from the
16870    minipool.  ADDRESS is the offset of the insn since the start of the
16871    function; LOC is a pointer to the part of the insn which requires
16872    fixing; VALUE is the constant that must be loaded, which is of type
16873    MODE.  */
16874 static void
16875 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16876                    machine_mode mode, rtx value)
16877 {
16878   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16879
16880   fix->insn = insn;
16881   fix->address = address;
16882   fix->loc = loc;
16883   fix->mode = mode;
16884   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16885   fix->value = value;
16886   fix->forwards = get_attr_pool_range (insn);
16887   fix->backwards = get_attr_neg_pool_range (insn);
16888   fix->minipool = NULL;
16889
16890   /* If an insn doesn't have a range defined for it, then it isn't
16891      expecting to be reworked by this code.  Better to stop now than
16892      to generate duff assembly code.  */
16893   gcc_assert (fix->forwards || fix->backwards);
16894
16895   /* If an entry requires 8-byte alignment then assume all constant pools
16896      require 4 bytes of padding.  Trying to do this later on a per-pool
16897      basis is awkward because existing pool entries have to be modified.  */
16898   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16899     minipool_pad = 4;
16900
16901   if (dump_file)
16902     {
16903       fprintf (dump_file,
16904                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16905                GET_MODE_NAME (mode),
16906                INSN_UID (insn), (unsigned long) address,
16907                -1 * (long)fix->backwards, (long)fix->forwards);
16908       arm_print_value (dump_file, fix->value);
16909       fprintf (dump_file, "\n");
16910     }
16911
16912   /* Add it to the chain of fixes.  */
16913   fix->next = NULL;
16914
16915   if (minipool_fix_head != NULL)
16916     minipool_fix_tail->next = fix;
16917   else
16918     minipool_fix_head = fix;
16919
16920   minipool_fix_tail = fix;
16921 }
16922
16923 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16924    Returns the number of insns needed, or 99 if we always want to synthesize
16925    the value.  */
16926 int
16927 arm_max_const_double_inline_cost ()
16928 {
16929   /* Let the value get synthesized to avoid the use of literal pools.  */
16930   if (arm_disable_literal_pool)
16931     return 99;
16932
16933   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16934 }
16935
16936 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16937    Returns the number of insns needed, or 99 if we don't know how to
16938    do it.  */
16939 int
16940 arm_const_double_inline_cost (rtx val)
16941 {
16942   rtx lowpart, highpart;
16943   machine_mode mode;
16944
16945   mode = GET_MODE (val);
16946
16947   if (mode == VOIDmode)
16948     mode = DImode;
16949
16950   gcc_assert (GET_MODE_SIZE (mode) == 8);
16951
16952   lowpart = gen_lowpart (SImode, val);
16953   highpart = gen_highpart_mode (SImode, mode, val);
16954
16955   gcc_assert (CONST_INT_P (lowpart));
16956   gcc_assert (CONST_INT_P (highpart));
16957
16958   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16959                             NULL_RTX, NULL_RTX, 0, 0)
16960           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16961                               NULL_RTX, NULL_RTX, 0, 0));
16962 }
16963
16964 /* Cost of loading a SImode constant.  */
16965 static inline int
16966 arm_const_inline_cost (enum rtx_code code, rtx val)
16967 {
16968   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16969                            NULL_RTX, NULL_RTX, 1, 0);
16970 }
16971
16972 /* Return true if it is worthwhile to split a 64-bit constant into two
16973    32-bit operations.  This is the case if optimizing for size, or
16974    if we have load delay slots, or if one 32-bit part can be done with
16975    a single data operation.  */
16976 bool
16977 arm_const_double_by_parts (rtx val)
16978 {
16979   machine_mode mode = GET_MODE (val);
16980   rtx part;
16981
16982   if (optimize_size || arm_ld_sched)
16983     return true;
16984
16985   if (mode == VOIDmode)
16986     mode = DImode;
16987
16988   part = gen_highpart_mode (SImode, mode, val);
16989
16990   gcc_assert (CONST_INT_P (part));
16991
16992   if (const_ok_for_arm (INTVAL (part))
16993       || const_ok_for_arm (~INTVAL (part)))
16994     return true;
16995
16996   part = gen_lowpart (SImode, val);
16997
16998   gcc_assert (CONST_INT_P (part));
16999
17000   if (const_ok_for_arm (INTVAL (part))
17001       || const_ok_for_arm (~INTVAL (part)))
17002     return true;
17003
17004   return false;
17005 }
17006
17007 /* Return true if it is possible to inline both the high and low parts
17008    of a 64-bit constant into 32-bit data processing instructions.  */
17009 bool
17010 arm_const_double_by_immediates (rtx val)
17011 {
17012   machine_mode mode = GET_MODE (val);
17013   rtx part;
17014
17015   if (mode == VOIDmode)
17016     mode = DImode;
17017
17018   part = gen_highpart_mode (SImode, mode, val);
17019
17020   gcc_assert (CONST_INT_P (part));
17021
17022   if (!const_ok_for_arm (INTVAL (part)))
17023     return false;
17024
17025   part = gen_lowpart (SImode, val);
17026
17027   gcc_assert (CONST_INT_P (part));
17028
17029   if (!const_ok_for_arm (INTVAL (part)))
17030     return false;
17031
17032   return true;
17033 }
17034
17035 /* Scan INSN and note any of its operands that need fixing.
17036    If DO_PUSHES is false we do not actually push any of the fixups
17037    needed.  */
17038 static void
17039 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17040 {
17041   int opno;
17042
17043   extract_constrain_insn (insn);
17044
17045   if (recog_data.n_alternatives == 0)
17046     return;
17047
17048   /* Fill in recog_op_alt with information about the constraints of
17049      this insn.  */
17050   preprocess_constraints (insn);
17051
17052   const operand_alternative *op_alt = which_op_alt ();
17053   for (opno = 0; opno < recog_data.n_operands; opno++)
17054     {
17055       /* Things we need to fix can only occur in inputs.  */
17056       if (recog_data.operand_type[opno] != OP_IN)
17057         continue;
17058
17059       /* If this alternative is a memory reference, then any mention
17060          of constants in this alternative is really to fool reload
17061          into allowing us to accept one there.  We need to fix them up
17062          now so that we output the right code.  */
17063       if (op_alt[opno].memory_ok)
17064         {
17065           rtx op = recog_data.operand[opno];
17066
17067           if (CONSTANT_P (op))
17068             {
17069               if (do_pushes)
17070                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17071                                    recog_data.operand_mode[opno], op);
17072             }
17073           else if (MEM_P (op)
17074                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17075                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17076             {
17077               if (do_pushes)
17078                 {
17079                   rtx cop = avoid_constant_pool_reference (op);
17080
17081                   /* Casting the address of something to a mode narrower
17082                      than a word can cause avoid_constant_pool_reference()
17083                      to return the pool reference itself.  That's no good to
17084                      us here.  Lets just hope that we can use the
17085                      constant pool value directly.  */
17086                   if (op == cop)
17087                     cop = get_pool_constant (XEXP (op, 0));
17088
17089                   push_minipool_fix (insn, address,
17090                                      recog_data.operand_loc[opno],
17091                                      recog_data.operand_mode[opno], cop);
17092                 }
17093
17094             }
17095         }
17096     }
17097
17098   return;
17099 }
17100
17101 /* Rewrite move insn into subtract of 0 if the condition codes will
17102    be useful in next conditional jump insn.  */
17103
17104 static void
17105 thumb1_reorg (void)
17106 {
17107   basic_block bb;
17108
17109   FOR_EACH_BB_FN (bb, cfun)
17110     {
17111       rtx dest, src;
17112       rtx pat, op0, set = NULL;
17113       rtx_insn *prev, *insn = BB_END (bb);
17114       bool insn_clobbered = false;
17115
17116       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17117         insn = PREV_INSN (insn);
17118
17119       /* Find the last cbranchsi4_insn in basic block BB.  */
17120       if (insn == BB_HEAD (bb)
17121           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17122         continue;
17123
17124       /* Get the register with which we are comparing.  */
17125       pat = PATTERN (insn);
17126       op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17127
17128       /* Find the first flag setting insn before INSN in basic block BB.  */
17129       gcc_assert (insn != BB_HEAD (bb));
17130       for (prev = PREV_INSN (insn);
17131            (!insn_clobbered
17132             && prev != BB_HEAD (bb)
17133             && (NOTE_P (prev)
17134                 || DEBUG_INSN_P (prev)
17135                 || ((set = single_set (prev)) != NULL
17136                     && get_attr_conds (prev) == CONDS_NOCOND)));
17137            prev = PREV_INSN (prev))
17138         {
17139           if (reg_set_p (op0, prev))
17140             insn_clobbered = true;
17141         }
17142
17143       /* Skip if op0 is clobbered by insn other than prev. */
17144       if (insn_clobbered)
17145         continue;
17146
17147       if (!set)
17148         continue;
17149
17150       dest = SET_DEST (set);
17151       src = SET_SRC (set);
17152       if (!low_register_operand (dest, SImode)
17153           || !low_register_operand (src, SImode))
17154         continue;
17155
17156       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17157          in INSN.  Both src and dest of the move insn are checked.  */
17158       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17159         {
17160           dest = copy_rtx (dest);
17161           src = copy_rtx (src);
17162           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17163           PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17164           INSN_CODE (prev) = -1;
17165           /* Set test register in INSN to dest.  */
17166           XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17167           INSN_CODE (insn) = -1;
17168         }
17169     }
17170 }
17171
17172 /* Convert instructions to their cc-clobbering variant if possible, since
17173    that allows us to use smaller encodings.  */
17174
17175 static void
17176 thumb2_reorg (void)
17177 {
17178   basic_block bb;
17179   regset_head live;
17180
17181   INIT_REG_SET (&live);
17182
17183   /* We are freeing block_for_insn in the toplev to keep compatibility
17184      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17185   compute_bb_for_insn ();
17186   df_analyze ();
17187
17188   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17189
17190   FOR_EACH_BB_FN (bb, cfun)
17191     {
17192       if (current_tune->disparage_flag_setting_t16_encodings
17193           && optimize_bb_for_speed_p (bb))
17194         continue;
17195
17196       rtx_insn *insn;
17197       Convert_Action action = SKIP;
17198       Convert_Action action_for_partial_flag_setting
17199         = (current_tune->disparage_partial_flag_setting_t16_encodings
17200            && optimize_bb_for_speed_p (bb))
17201           ? SKIP : CONV;
17202
17203       COPY_REG_SET (&live, DF_LR_OUT (bb));
17204       df_simulate_initialize_backwards (bb, &live);
17205       FOR_BB_INSNS_REVERSE (bb, insn)
17206         {
17207           if (NONJUMP_INSN_P (insn)
17208               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17209               && GET_CODE (PATTERN (insn)) == SET)
17210             {
17211               action = SKIP;
17212               rtx pat = PATTERN (insn);
17213               rtx dst = XEXP (pat, 0);
17214               rtx src = XEXP (pat, 1);
17215               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17216
17217               if (!OBJECT_P (src))
17218                   op0 = XEXP (src, 0);
17219
17220               if (BINARY_P (src))
17221                   op1 = XEXP (src, 1);
17222
17223               if (low_register_operand (dst, SImode))
17224                 {
17225                   switch (GET_CODE (src))
17226                     {
17227                     case PLUS:
17228                       /* Adding two registers and storing the result
17229                          in the first source is already a 16-bit
17230                          operation.  */
17231                       if (rtx_equal_p (dst, op0)
17232                           && register_operand (op1, SImode))
17233                         break;
17234
17235                       if (low_register_operand (op0, SImode))
17236                         {
17237                           /* ADDS <Rd>,<Rn>,<Rm>  */
17238                           if (low_register_operand (op1, SImode))
17239                             action = CONV;
17240                           /* ADDS <Rdn>,#<imm8>  */
17241                           /* SUBS <Rdn>,#<imm8>  */
17242                           else if (rtx_equal_p (dst, op0)
17243                                    && CONST_INT_P (op1)
17244                                    && IN_RANGE (INTVAL (op1), -255, 255))
17245                             action = CONV;
17246                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17247                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17248                           else if (CONST_INT_P (op1)
17249                                    && IN_RANGE (INTVAL (op1), -7, 7))
17250                             action = CONV;
17251                         }
17252                       /* ADCS <Rd>, <Rn>  */
17253                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17254                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17255                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17256                                                        SImode)
17257                               && COMPARISON_P (op1)
17258                               && cc_register (XEXP (op1, 0), VOIDmode)
17259                               && maybe_get_arm_condition_code (op1) == ARM_CS
17260                               && XEXP (op1, 1) == const0_rtx)
17261                         action = CONV;
17262                       break;
17263
17264                     case MINUS:
17265                       /* RSBS <Rd>,<Rn>,#0
17266                          Not handled here: see NEG below.  */
17267                       /* SUBS <Rd>,<Rn>,#<imm3>
17268                          SUBS <Rdn>,#<imm8>
17269                          Not handled here: see PLUS above.  */
17270                       /* SUBS <Rd>,<Rn>,<Rm>  */
17271                       if (low_register_operand (op0, SImode)
17272                           && low_register_operand (op1, SImode))
17273                             action = CONV;
17274                       break;
17275
17276                     case MULT:
17277                       /* MULS <Rdm>,<Rn>,<Rdm>
17278                          As an exception to the rule, this is only used
17279                          when optimizing for size since MULS is slow on all
17280                          known implementations.  We do not even want to use
17281                          MULS in cold code, if optimizing for speed, so we
17282                          test the global flag here.  */
17283                       if (!optimize_size)
17284                         break;
17285                       /* else fall through.  */
17286                     case AND:
17287                     case IOR:
17288                     case XOR:
17289                       /* ANDS <Rdn>,<Rm>  */
17290                       if (rtx_equal_p (dst, op0)
17291                           && low_register_operand (op1, SImode))
17292                         action = action_for_partial_flag_setting;
17293                       else if (rtx_equal_p (dst, op1)
17294                                && low_register_operand (op0, SImode))
17295                         action = action_for_partial_flag_setting == SKIP
17296                                  ? SKIP : SWAP_CONV;
17297                       break;
17298
17299                     case ASHIFTRT:
17300                     case ASHIFT:
17301                     case LSHIFTRT:
17302                       /* ASRS <Rdn>,<Rm> */
17303                       /* LSRS <Rdn>,<Rm> */
17304                       /* LSLS <Rdn>,<Rm> */
17305                       if (rtx_equal_p (dst, op0)
17306                           && low_register_operand (op1, SImode))
17307                         action = action_for_partial_flag_setting;
17308                       /* ASRS <Rd>,<Rm>,#<imm5> */
17309                       /* LSRS <Rd>,<Rm>,#<imm5> */
17310                       /* LSLS <Rd>,<Rm>,#<imm5> */
17311                       else if (low_register_operand (op0, SImode)
17312                                && CONST_INT_P (op1)
17313                                && IN_RANGE (INTVAL (op1), 0, 31))
17314                         action = action_for_partial_flag_setting;
17315                       break;
17316
17317                     case ROTATERT:
17318                       /* RORS <Rdn>,<Rm>  */
17319                       if (rtx_equal_p (dst, op0)
17320                           && low_register_operand (op1, SImode))
17321                         action = action_for_partial_flag_setting;
17322                       break;
17323
17324                     case NOT:
17325                       /* MVNS <Rd>,<Rm>  */
17326                       if (low_register_operand (op0, SImode))
17327                         action = action_for_partial_flag_setting;
17328                       break;
17329
17330                     case NEG:
17331                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17332                       if (low_register_operand (op0, SImode))
17333                         action = CONV;
17334                       break;
17335
17336                     case CONST_INT:
17337                       /* MOVS <Rd>,#<imm8>  */
17338                       if (CONST_INT_P (src)
17339                           && IN_RANGE (INTVAL (src), 0, 255))
17340                         action = action_for_partial_flag_setting;
17341                       break;
17342
17343                     case REG:
17344                       /* MOVS and MOV<c> with registers have different
17345                          encodings, so are not relevant here.  */
17346                       break;
17347
17348                     default:
17349                       break;
17350                     }
17351                 }
17352
17353               if (action != SKIP)
17354                 {
17355                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17356                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17357                   rtvec vec;
17358
17359                   if (action == SWAP_CONV)
17360                     {
17361                       src = copy_rtx (src);
17362                       XEXP (src, 0) = op1;
17363                       XEXP (src, 1) = op0;
17364                       pat = gen_rtx_SET (VOIDmode, dst, src);
17365                       vec = gen_rtvec (2, pat, clobber);
17366                     }
17367                   else /* action == CONV */
17368                     vec = gen_rtvec (2, pat, clobber);
17369
17370                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17371                   INSN_CODE (insn) = -1;
17372                 }
17373             }
17374
17375           if (NONDEBUG_INSN_P (insn))
17376             df_simulate_one_insn_backwards (bb, insn, &live);
17377         }
17378     }
17379
17380   CLEAR_REG_SET (&live);
17381 }
17382
17383 /* Gcc puts the pool in the wrong place for ARM, since we can only
17384    load addresses a limited distance around the pc.  We do some
17385    special munging to move the constant pool values to the correct
17386    point in the code.  */
17387 static void
17388 arm_reorg (void)
17389 {
17390   rtx_insn *insn;
17391   HOST_WIDE_INT address = 0;
17392   Mfix * fix;
17393
17394   if (TARGET_THUMB1)
17395     thumb1_reorg ();
17396   else if (TARGET_THUMB2)
17397     thumb2_reorg ();
17398
17399   /* Ensure all insns that must be split have been split at this point.
17400      Otherwise, the pool placement code below may compute incorrect
17401      insn lengths.  Note that when optimizing, all insns have already
17402      been split at this point.  */
17403   if (!optimize)
17404     split_all_insns_noflow ();
17405
17406   minipool_fix_head = minipool_fix_tail = NULL;
17407
17408   /* The first insn must always be a note, or the code below won't
17409      scan it properly.  */
17410   insn = get_insns ();
17411   gcc_assert (NOTE_P (insn));
17412   minipool_pad = 0;
17413
17414   /* Scan all the insns and record the operands that will need fixing.  */
17415   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17416     {
17417       if (BARRIER_P (insn))
17418         push_minipool_barrier (insn, address);
17419       else if (INSN_P (insn))
17420         {
17421           rtx_jump_table_data *table;
17422
17423           note_invalid_constants (insn, address, true);
17424           address += get_attr_length (insn);
17425
17426           /* If the insn is a vector jump, add the size of the table
17427              and skip the table.  */
17428           if (tablejump_p (insn, NULL, &table))
17429             {
17430               address += get_jump_table_size (table);
17431               insn = table;
17432             }
17433         }
17434       else if (LABEL_P (insn))
17435         /* Add the worst-case padding due to alignment.  We don't add
17436            the _current_ padding because the minipool insertions
17437            themselves might change it.  */
17438         address += get_label_padding (insn);
17439     }
17440
17441   fix = minipool_fix_head;
17442
17443   /* Now scan the fixups and perform the required changes.  */
17444   while (fix)
17445     {
17446       Mfix * ftmp;
17447       Mfix * fdel;
17448       Mfix *  last_added_fix;
17449       Mfix * last_barrier = NULL;
17450       Mfix * this_fix;
17451
17452       /* Skip any further barriers before the next fix.  */
17453       while (fix && BARRIER_P (fix->insn))
17454         fix = fix->next;
17455
17456       /* No more fixes.  */
17457       if (fix == NULL)
17458         break;
17459
17460       last_added_fix = NULL;
17461
17462       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17463         {
17464           if (BARRIER_P (ftmp->insn))
17465             {
17466               if (ftmp->address >= minipool_vector_head->max_address)
17467                 break;
17468
17469               last_barrier = ftmp;
17470             }
17471           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17472             break;
17473
17474           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17475         }
17476
17477       /* If we found a barrier, drop back to that; any fixes that we
17478          could have reached but come after the barrier will now go in
17479          the next mini-pool.  */
17480       if (last_barrier != NULL)
17481         {
17482           /* Reduce the refcount for those fixes that won't go into this
17483              pool after all.  */
17484           for (fdel = last_barrier->next;
17485                fdel && fdel != ftmp;
17486                fdel = fdel->next)
17487             {
17488               fdel->minipool->refcount--;
17489               fdel->minipool = NULL;
17490             }
17491
17492           ftmp = last_barrier;
17493         }
17494       else
17495         {
17496           /* ftmp is first fix that we can't fit into this pool and
17497              there no natural barriers that we could use.  Insert a
17498              new barrier in the code somewhere between the previous
17499              fix and this one, and arrange to jump around it.  */
17500           HOST_WIDE_INT max_address;
17501
17502           /* The last item on the list of fixes must be a barrier, so
17503              we can never run off the end of the list of fixes without
17504              last_barrier being set.  */
17505           gcc_assert (ftmp);
17506
17507           max_address = minipool_vector_head->max_address;
17508           /* Check that there isn't another fix that is in range that
17509              we couldn't fit into this pool because the pool was
17510              already too large: we need to put the pool before such an
17511              instruction.  The pool itself may come just after the
17512              fix because create_fix_barrier also allows space for a
17513              jump instruction.  */
17514           if (ftmp->address < max_address)
17515             max_address = ftmp->address + 1;
17516
17517           last_barrier = create_fix_barrier (last_added_fix, max_address);
17518         }
17519
17520       assign_minipool_offsets (last_barrier);
17521
17522       while (ftmp)
17523         {
17524           if (!BARRIER_P (ftmp->insn)
17525               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17526                   == NULL))
17527             break;
17528
17529           ftmp = ftmp->next;
17530         }
17531
17532       /* Scan over the fixes we have identified for this pool, fixing them
17533          up and adding the constants to the pool itself.  */
17534       for (this_fix = fix; this_fix && ftmp != this_fix;
17535            this_fix = this_fix->next)
17536         if (!BARRIER_P (this_fix->insn))
17537           {
17538             rtx addr
17539               = plus_constant (Pmode,
17540                                gen_rtx_LABEL_REF (VOIDmode,
17541                                                   minipool_vector_label),
17542                                this_fix->minipool->offset);
17543             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17544           }
17545
17546       dump_minipool (last_barrier->insn);
17547       fix = ftmp;
17548     }
17549
17550   /* From now on we must synthesize any constants that we can't handle
17551      directly.  This can happen if the RTL gets split during final
17552      instruction generation.  */
17553   cfun->machine->after_arm_reorg = 1;
17554
17555   /* Free the minipool memory.  */
17556   obstack_free (&minipool_obstack, minipool_startobj);
17557 }
17558 \f
17559 /* Routines to output assembly language.  */
17560
17561 /* Return string representation of passed in real value.  */
17562 static const char *
17563 fp_const_from_val (REAL_VALUE_TYPE *r)
17564 {
17565   if (!fp_consts_inited)
17566     init_fp_table ();
17567
17568   gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17569   return "0";
17570 }
17571
17572 /* OPERANDS[0] is the entire list of insns that constitute pop,
17573    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17574    is in the list, UPDATE is true iff the list contains explicit
17575    update of base register.  */
17576 void
17577 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17578                          bool update)
17579 {
17580   int i;
17581   char pattern[100];
17582   int offset;
17583   const char *conditional;
17584   int num_saves = XVECLEN (operands[0], 0);
17585   unsigned int regno;
17586   unsigned int regno_base = REGNO (operands[1]);
17587
17588   offset = 0;
17589   offset += update ? 1 : 0;
17590   offset += return_pc ? 1 : 0;
17591
17592   /* Is the base register in the list?  */
17593   for (i = offset; i < num_saves; i++)
17594     {
17595       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17596       /* If SP is in the list, then the base register must be SP.  */
17597       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17598       /* If base register is in the list, there must be no explicit update.  */
17599       if (regno == regno_base)
17600         gcc_assert (!update);
17601     }
17602
17603   conditional = reverse ? "%?%D0" : "%?%d0";
17604   if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17605     {
17606       /* Output pop (not stmfd) because it has a shorter encoding.  */
17607       gcc_assert (update);
17608       sprintf (pattern, "pop%s\t{", conditional);
17609     }
17610   else
17611     {
17612       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17613          It's just a convention, their semantics are identical.  */
17614       if (regno_base == SP_REGNUM)
17615         sprintf (pattern, "ldm%sfd\t", conditional);
17616       else if (TARGET_UNIFIED_ASM)
17617         sprintf (pattern, "ldmia%s\t", conditional);
17618       else
17619         sprintf (pattern, "ldm%sia\t", conditional);
17620
17621       strcat (pattern, reg_names[regno_base]);
17622       if (update)
17623         strcat (pattern, "!, {");
17624       else
17625         strcat (pattern, ", {");
17626     }
17627
17628   /* Output the first destination register.  */
17629   strcat (pattern,
17630           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17631
17632   /* Output the rest of the destination registers.  */
17633   for (i = offset + 1; i < num_saves; i++)
17634     {
17635       strcat (pattern, ", ");
17636       strcat (pattern,
17637               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17638     }
17639
17640   strcat (pattern, "}");
17641
17642   if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17643     strcat (pattern, "^");
17644
17645   output_asm_insn (pattern, &cond);
17646 }
17647
17648
17649 /* Output the assembly for a store multiple.  */
17650
17651 const char *
17652 vfp_output_vstmd (rtx * operands)
17653 {
17654   char pattern[100];
17655   int p;
17656   int base;
17657   int i;
17658   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17659                    ? XEXP (operands[0], 0)
17660                    : XEXP (XEXP (operands[0], 0), 0);
17661   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17662
17663   if (push_p)
17664     strcpy (pattern, "vpush%?.64\t{%P1");
17665   else
17666     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17667
17668   p = strlen (pattern);
17669
17670   gcc_assert (REG_P (operands[1]));
17671
17672   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17673   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17674     {
17675       p += sprintf (&pattern[p], ", d%d", base + i);
17676     }
17677   strcpy (&pattern[p], "}");
17678
17679   output_asm_insn (pattern, operands);
17680   return "";
17681 }
17682
17683
17684 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17685    number of bytes pushed.  */
17686
17687 static int
17688 vfp_emit_fstmd (int base_reg, int count)
17689 {
17690   rtx par;
17691   rtx dwarf;
17692   rtx tmp, reg;
17693   int i;
17694
17695   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17696      register pairs are stored by a store multiple insn.  We avoid this
17697      by pushing an extra pair.  */
17698   if (count == 2 && !arm_arch6)
17699     {
17700       if (base_reg == LAST_VFP_REGNUM - 3)
17701         base_reg -= 2;
17702       count++;
17703     }
17704
17705   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17706      larger stores into multiple parts (up to a maximum of two, in
17707      practice).  */
17708   if (count > 16)
17709     {
17710       int saved;
17711       /* NOTE: base_reg is an internal register number, so each D register
17712          counts as 2.  */
17713       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17714       saved += vfp_emit_fstmd (base_reg, 16);
17715       return saved;
17716     }
17717
17718   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17719   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17720
17721   reg = gen_rtx_REG (DFmode, base_reg);
17722   base_reg += 2;
17723
17724   XVECEXP (par, 0, 0)
17725     = gen_rtx_SET (VOIDmode,
17726                    gen_frame_mem
17727                    (BLKmode,
17728                     gen_rtx_PRE_MODIFY (Pmode,
17729                                         stack_pointer_rtx,
17730                                         plus_constant
17731                                         (Pmode, stack_pointer_rtx,
17732                                          - (count * 8)))
17733                     ),
17734                    gen_rtx_UNSPEC (BLKmode,
17735                                    gen_rtvec (1, reg),
17736                                    UNSPEC_PUSH_MULT));
17737
17738   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17739                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17740   RTX_FRAME_RELATED_P (tmp) = 1;
17741   XVECEXP (dwarf, 0, 0) = tmp;
17742
17743   tmp = gen_rtx_SET (VOIDmode,
17744                      gen_frame_mem (DFmode, stack_pointer_rtx),
17745                      reg);
17746   RTX_FRAME_RELATED_P (tmp) = 1;
17747   XVECEXP (dwarf, 0, 1) = tmp;
17748
17749   for (i = 1; i < count; i++)
17750     {
17751       reg = gen_rtx_REG (DFmode, base_reg);
17752       base_reg += 2;
17753       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17754
17755       tmp = gen_rtx_SET (VOIDmode,
17756                          gen_frame_mem (DFmode,
17757                                         plus_constant (Pmode,
17758                                                        stack_pointer_rtx,
17759                                                        i * 8)),
17760                          reg);
17761       RTX_FRAME_RELATED_P (tmp) = 1;
17762       XVECEXP (dwarf, 0, i + 1) = tmp;
17763     }
17764
17765   par = emit_insn (par);
17766   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17767   RTX_FRAME_RELATED_P (par) = 1;
17768
17769   return count * 8;
17770 }
17771
17772 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17773    the call target.  */
17774
17775 void
17776 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17777 {
17778   rtx insn;
17779
17780   insn = emit_call_insn (pat);
17781
17782   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17783      If the call might use such an entry, add a use of the PIC register
17784      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17785   if (TARGET_VXWORKS_RTP
17786       && flag_pic
17787       && !sibcall
17788       && GET_CODE (addr) == SYMBOL_REF
17789       && (SYMBOL_REF_DECL (addr)
17790           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17791           : !SYMBOL_REF_LOCAL_P (addr)))
17792     {
17793       require_pic_register ();
17794       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17795     }
17796
17797   if (TARGET_AAPCS_BASED)
17798     {
17799       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17800          linker.  We need to add an IP clobber to allow setting
17801          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17802          is not needed since it's a fixed register.  */
17803       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17804       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17805     }
17806 }
17807
17808 /* Output a 'call' insn.  */
17809 const char *
17810 output_call (rtx *operands)
17811 {
17812   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17813
17814   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17815   if (REGNO (operands[0]) == LR_REGNUM)
17816     {
17817       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17818       output_asm_insn ("mov%?\t%0, %|lr", operands);
17819     }
17820
17821   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17822
17823   if (TARGET_INTERWORK || arm_arch4t)
17824     output_asm_insn ("bx%?\t%0", operands);
17825   else
17826     output_asm_insn ("mov%?\t%|pc, %0", operands);
17827
17828   return "";
17829 }
17830
17831 /* Output a 'call' insn that is a reference in memory. This is
17832    disabled for ARMv5 and we prefer a blx instead because otherwise
17833    there's a significant performance overhead.  */
17834 const char *
17835 output_call_mem (rtx *operands)
17836 {
17837   gcc_assert (!arm_arch5);
17838   if (TARGET_INTERWORK)
17839     {
17840       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17841       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17842       output_asm_insn ("bx%?\t%|ip", operands);
17843     }
17844   else if (regno_use_in (LR_REGNUM, operands[0]))
17845     {
17846       /* LR is used in the memory address.  We load the address in the
17847          first instruction.  It's safe to use IP as the target of the
17848          load since the call will kill it anyway.  */
17849       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17850       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17851       if (arm_arch4t)
17852         output_asm_insn ("bx%?\t%|ip", operands);
17853       else
17854         output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17855     }
17856   else
17857     {
17858       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17859       output_asm_insn ("ldr%?\t%|pc, %0", operands);
17860     }
17861
17862   return "";
17863 }
17864
17865
17866 /* Output a move from arm registers to arm registers of a long double
17867    OPERANDS[0] is the destination.
17868    OPERANDS[1] is the source.  */
17869 const char *
17870 output_mov_long_double_arm_from_arm (rtx *operands)
17871 {
17872   /* We have to be careful here because the two might overlap.  */
17873   int dest_start = REGNO (operands[0]);
17874   int src_start = REGNO (operands[1]);
17875   rtx ops[2];
17876   int i;
17877
17878   if (dest_start < src_start)
17879     {
17880       for (i = 0; i < 3; i++)
17881         {
17882           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17883           ops[1] = gen_rtx_REG (SImode, src_start + i);
17884           output_asm_insn ("mov%?\t%0, %1", ops);
17885         }
17886     }
17887   else
17888     {
17889       for (i = 2; i >= 0; i--)
17890         {
17891           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17892           ops[1] = gen_rtx_REG (SImode, src_start + i);
17893           output_asm_insn ("mov%?\t%0, %1", ops);
17894         }
17895     }
17896
17897   return "";
17898 }
17899
17900 void
17901 arm_emit_movpair (rtx dest, rtx src)
17902  {
17903   /* If the src is an immediate, simplify it.  */
17904   if (CONST_INT_P (src))
17905     {
17906       HOST_WIDE_INT val = INTVAL (src);
17907       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17908       if ((val >> 16) & 0x0000ffff)
17909         emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17910                                              GEN_INT (16)),
17911                        GEN_INT ((val >> 16) & 0x0000ffff));
17912       return;
17913     }
17914    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17915    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17916  }
17917
17918 /* Output a move between double words.  It must be REG<-MEM
17919    or MEM<-REG.  */
17920 const char *
17921 output_move_double (rtx *operands, bool emit, int *count)
17922 {
17923   enum rtx_code code0 = GET_CODE (operands[0]);
17924   enum rtx_code code1 = GET_CODE (operands[1]);
17925   rtx otherops[3];
17926   if (count)
17927     *count = 1;
17928
17929   /* The only case when this might happen is when
17930      you are looking at the length of a DImode instruction
17931      that has an invalid constant in it.  */
17932   if (code0 == REG && code1 != MEM)
17933     {
17934       gcc_assert (!emit);
17935       *count = 2;
17936       return "";
17937     }
17938
17939   if (code0 == REG)
17940     {
17941       unsigned int reg0 = REGNO (operands[0]);
17942
17943       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17944
17945       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17946
17947       switch (GET_CODE (XEXP (operands[1], 0)))
17948         {
17949         case REG:
17950
17951           if (emit)
17952             {
17953               if (TARGET_LDRD
17954                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17955                 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17956               else
17957                 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17958             }
17959           break;
17960
17961         case PRE_INC:
17962           gcc_assert (TARGET_LDRD);
17963           if (emit)
17964             output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17965           break;
17966
17967         case PRE_DEC:
17968           if (emit)
17969             {
17970               if (TARGET_LDRD)
17971                 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17972               else
17973                 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17974             }
17975           break;
17976
17977         case POST_INC:
17978           if (emit)
17979             {
17980               if (TARGET_LDRD)
17981                 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17982               else
17983                 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17984             }
17985           break;
17986
17987         case POST_DEC:
17988           gcc_assert (TARGET_LDRD);
17989           if (emit)
17990             output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17991           break;
17992
17993         case PRE_MODIFY:
17994         case POST_MODIFY:
17995           /* Autoicrement addressing modes should never have overlapping
17996              base and destination registers, and overlapping index registers
17997              are already prohibited, so this doesn't need to worry about
17998              fix_cm3_ldrd.  */
17999           otherops[0] = operands[0];
18000           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18001           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18002
18003           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18004             {
18005               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18006                 {
18007                   /* Registers overlap so split out the increment.  */
18008                   if (emit)
18009                     {
18010                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18011                       output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
18012                     }
18013                   if (count)
18014                     *count = 2;
18015                 }
18016               else
18017                 {
18018                   /* Use a single insn if we can.
18019                      FIXME: IWMMXT allows offsets larger than ldrd can
18020                      handle, fix these up with a pair of ldr.  */
18021                   if (TARGET_THUMB2
18022                       || !CONST_INT_P (otherops[2])
18023                       || (INTVAL (otherops[2]) > -256
18024                           && INTVAL (otherops[2]) < 256))
18025                     {
18026                       if (emit)
18027                         output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18028                     }
18029                   else
18030                     {
18031                       if (emit)
18032                         {
18033                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18034                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18035                         }
18036                       if (count)
18037                         *count = 2;
18038
18039                     }
18040                 }
18041             }
18042           else
18043             {
18044               /* Use a single insn if we can.
18045                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18046                  fix these up with a pair of ldr.  */
18047               if (TARGET_THUMB2
18048                   || !CONST_INT_P (otherops[2])
18049                   || (INTVAL (otherops[2]) > -256
18050                       && INTVAL (otherops[2]) < 256))
18051                 {
18052                   if (emit)
18053                     output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18054                 }
18055               else
18056                 {
18057                   if (emit)
18058                     {
18059                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18060                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18061                     }
18062                   if (count)
18063                     *count = 2;
18064                 }
18065             }
18066           break;
18067
18068         case LABEL_REF:
18069         case CONST:
18070           /* We might be able to use ldrd %0, %1 here.  However the range is
18071              different to ldr/adr, and it is broken on some ARMv7-M
18072              implementations.  */
18073           /* Use the second register of the pair to avoid problematic
18074              overlap.  */
18075           otherops[1] = operands[1];
18076           if (emit)
18077             output_asm_insn ("adr%?\t%0, %1", otherops);
18078           operands[1] = otherops[0];
18079           if (emit)
18080             {
18081               if (TARGET_LDRD)
18082                 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18083               else
18084                 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18085             }
18086
18087           if (count)
18088             *count = 2;
18089           break;
18090
18091           /* ??? This needs checking for thumb2.  */
18092         default:
18093           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18094                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18095             {
18096               otherops[0] = operands[0];
18097               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18098               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18099
18100               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18101                 {
18102                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18103                     {
18104                       switch ((int) INTVAL (otherops[2]))
18105                         {
18106                         case -8:
18107                           if (emit)
18108                             output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18109                           return "";
18110                         case -4:
18111                           if (TARGET_THUMB2)
18112                             break;
18113                           if (emit)
18114                             output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18115                           return "";
18116                         case 4:
18117                           if (TARGET_THUMB2)
18118                             break;
18119                           if (emit)
18120                             output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18121                           return "";
18122                         }
18123                     }
18124                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18125                   operands[1] = otherops[0];
18126                   if (TARGET_LDRD
18127                       && (REG_P (otherops[2])
18128                           || TARGET_THUMB2
18129                           || (CONST_INT_P (otherops[2])
18130                               && INTVAL (otherops[2]) > -256
18131                               && INTVAL (otherops[2]) < 256)))
18132                     {
18133                       if (reg_overlap_mentioned_p (operands[0],
18134                                                    otherops[2]))
18135                         {
18136                           rtx tmp;
18137                           /* Swap base and index registers over to
18138                              avoid a conflict.  */
18139                           tmp = otherops[1];
18140                           otherops[1] = otherops[2];
18141                           otherops[2] = tmp;
18142                         }
18143                       /* If both registers conflict, it will usually
18144                          have been fixed by a splitter.  */
18145                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18146                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18147                         {
18148                           if (emit)
18149                             {
18150                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18151                               output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18152                             }
18153                           if (count)
18154                             *count = 2;
18155                         }
18156                       else
18157                         {
18158                           otherops[0] = operands[0];
18159                           if (emit)
18160                             output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18161                         }
18162                       return "";
18163                     }
18164
18165                   if (CONST_INT_P (otherops[2]))
18166                     {
18167                       if (emit)
18168                         {
18169                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18170                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18171                           else
18172                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18173                         }
18174                     }
18175                   else
18176                     {
18177                       if (emit)
18178                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18179                     }
18180                 }
18181               else
18182                 {
18183                   if (emit)
18184                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18185                 }
18186
18187               if (count)
18188                 *count = 2;
18189
18190               if (TARGET_LDRD)
18191                 return "ldr%(d%)\t%0, [%1]";
18192
18193               return "ldm%(ia%)\t%1, %M0";
18194             }
18195           else
18196             {
18197               otherops[1] = adjust_address (operands[1], SImode, 4);
18198               /* Take care of overlapping base/data reg.  */
18199               if (reg_mentioned_p (operands[0], operands[1]))
18200                 {
18201                   if (emit)
18202                     {
18203                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18204                       output_asm_insn ("ldr%?\t%0, %1", operands);
18205                     }
18206                   if (count)
18207                     *count = 2;
18208
18209                 }
18210               else
18211                 {
18212                   if (emit)
18213                     {
18214                       output_asm_insn ("ldr%?\t%0, %1", operands);
18215                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18216                     }
18217                   if (count)
18218                     *count = 2;
18219                 }
18220             }
18221         }
18222     }
18223   else
18224     {
18225       /* Constraints should ensure this.  */
18226       gcc_assert (code0 == MEM && code1 == REG);
18227       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18228                   || (TARGET_ARM && TARGET_LDRD));
18229
18230       switch (GET_CODE (XEXP (operands[0], 0)))
18231         {
18232         case REG:
18233           if (emit)
18234             {
18235               if (TARGET_LDRD)
18236                 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18237               else
18238                 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18239             }
18240           break;
18241
18242         case PRE_INC:
18243           gcc_assert (TARGET_LDRD);
18244           if (emit)
18245             output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18246           break;
18247
18248         case PRE_DEC:
18249           if (emit)
18250             {
18251               if (TARGET_LDRD)
18252                 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18253               else
18254                 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18255             }
18256           break;
18257
18258         case POST_INC:
18259           if (emit)
18260             {
18261               if (TARGET_LDRD)
18262                 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18263               else
18264                 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18265             }
18266           break;
18267
18268         case POST_DEC:
18269           gcc_assert (TARGET_LDRD);
18270           if (emit)
18271             output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18272           break;
18273
18274         case PRE_MODIFY:
18275         case POST_MODIFY:
18276           otherops[0] = operands[1];
18277           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18278           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18279
18280           /* IWMMXT allows offsets larger than ldrd can handle,
18281              fix these up with a pair of ldr.  */
18282           if (!TARGET_THUMB2
18283               && CONST_INT_P (otherops[2])
18284               && (INTVAL(otherops[2]) <= -256
18285                   || INTVAL(otherops[2]) >= 256))
18286             {
18287               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18288                 {
18289                   if (emit)
18290                     {
18291                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18292                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18293                     }
18294                   if (count)
18295                     *count = 2;
18296                 }
18297               else
18298                 {
18299                   if (emit)
18300                     {
18301                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18302                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18303                     }
18304                   if (count)
18305                     *count = 2;
18306                 }
18307             }
18308           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18309             {
18310               if (emit)
18311                 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18312             }
18313           else
18314             {
18315               if (emit)
18316                 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18317             }
18318           break;
18319
18320         case PLUS:
18321           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18322           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18323             {
18324               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18325                 {
18326                 case -8:
18327                   if (emit)
18328                     output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18329                   return "";
18330
18331                 case -4:
18332                   if (TARGET_THUMB2)
18333                     break;
18334                   if (emit)
18335                     output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18336                   return "";
18337
18338                 case 4:
18339                   if (TARGET_THUMB2)
18340                     break;
18341                   if (emit)
18342                     output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18343                   return "";
18344                 }
18345             }
18346           if (TARGET_LDRD
18347               && (REG_P (otherops[2])
18348                   || TARGET_THUMB2
18349                   || (CONST_INT_P (otherops[2])
18350                       && INTVAL (otherops[2]) > -256
18351                       && INTVAL (otherops[2]) < 256)))
18352             {
18353               otherops[0] = operands[1];
18354               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18355               if (emit)
18356                 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18357               return "";
18358             }
18359           /* Fall through */
18360
18361         default:
18362           otherops[0] = adjust_address (operands[0], SImode, 4);
18363           otherops[1] = operands[1];
18364           if (emit)
18365             {
18366               output_asm_insn ("str%?\t%1, %0", operands);
18367               output_asm_insn ("str%?\t%H1, %0", otherops);
18368             }
18369           if (count)
18370             *count = 2;
18371         }
18372     }
18373
18374   return "";
18375 }
18376
18377 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18378    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18379
18380 const char *
18381 output_move_quad (rtx *operands)
18382 {
18383   if (REG_P (operands[0]))
18384     {
18385       /* Load, or reg->reg move.  */
18386
18387       if (MEM_P (operands[1]))
18388         {
18389           switch (GET_CODE (XEXP (operands[1], 0)))
18390             {
18391             case REG:
18392               output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18393               break;
18394
18395             case LABEL_REF:
18396             case CONST:
18397               output_asm_insn ("adr%?\t%0, %1", operands);
18398               output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18399               break;
18400
18401             default:
18402               gcc_unreachable ();
18403             }
18404         }
18405       else
18406         {
18407           rtx ops[2];
18408           int dest, src, i;
18409
18410           gcc_assert (REG_P (operands[1]));
18411
18412           dest = REGNO (operands[0]);
18413           src = REGNO (operands[1]);
18414
18415           /* This seems pretty dumb, but hopefully GCC won't try to do it
18416              very often.  */
18417           if (dest < src)
18418             for (i = 0; i < 4; i++)
18419               {
18420                 ops[0] = gen_rtx_REG (SImode, dest + i);
18421                 ops[1] = gen_rtx_REG (SImode, src + i);
18422                 output_asm_insn ("mov%?\t%0, %1", ops);
18423               }
18424           else
18425             for (i = 3; i >= 0; i--)
18426               {
18427                 ops[0] = gen_rtx_REG (SImode, dest + i);
18428                 ops[1] = gen_rtx_REG (SImode, src + i);
18429                 output_asm_insn ("mov%?\t%0, %1", ops);
18430               }
18431         }
18432     }
18433   else
18434     {
18435       gcc_assert (MEM_P (operands[0]));
18436       gcc_assert (REG_P (operands[1]));
18437       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18438
18439       switch (GET_CODE (XEXP (operands[0], 0)))
18440         {
18441         case REG:
18442           output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18443           break;
18444
18445         default:
18446           gcc_unreachable ();
18447         }
18448     }
18449
18450   return "";
18451 }
18452
18453 /* Output a VFP load or store instruction.  */
18454
18455 const char *
18456 output_move_vfp (rtx *operands)
18457 {
18458   rtx reg, mem, addr, ops[2];
18459   int load = REG_P (operands[0]);
18460   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18461   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18462   const char *templ;
18463   char buff[50];
18464   machine_mode mode;
18465
18466   reg = operands[!load];
18467   mem = operands[load];
18468
18469   mode = GET_MODE (reg);
18470
18471   gcc_assert (REG_P (reg));
18472   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18473   gcc_assert (mode == SFmode
18474               || mode == DFmode
18475               || mode == SImode
18476               || mode == DImode
18477               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18478   gcc_assert (MEM_P (mem));
18479
18480   addr = XEXP (mem, 0);
18481
18482   switch (GET_CODE (addr))
18483     {
18484     case PRE_DEC:
18485       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18486       ops[0] = XEXP (addr, 0);
18487       ops[1] = reg;
18488       break;
18489
18490     case POST_INC:
18491       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18492       ops[0] = XEXP (addr, 0);
18493       ops[1] = reg;
18494       break;
18495
18496     default:
18497       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18498       ops[0] = reg;
18499       ops[1] = mem;
18500       break;
18501     }
18502
18503   sprintf (buff, templ,
18504            load ? "ld" : "st",
18505            dp ? "64" : "32",
18506            dp ? "P" : "",
18507            integer_p ? "\t%@ int" : "");
18508   output_asm_insn (buff, ops);
18509
18510   return "";
18511 }
18512
18513 /* Output a Neon double-word or quad-word load or store, or a load
18514    or store for larger structure modes.
18515
18516    WARNING: The ordering of elements is weird in big-endian mode,
18517    because the EABI requires that vectors stored in memory appear
18518    as though they were stored by a VSTM, as required by the EABI.
18519    GCC RTL defines element ordering based on in-memory order.
18520    This can be different from the architectural ordering of elements
18521    within a NEON register. The intrinsics defined in arm_neon.h use the
18522    NEON register element ordering, not the GCC RTL element ordering.
18523
18524    For example, the in-memory ordering of a big-endian a quadword
18525    vector with 16-bit elements when stored from register pair {d0,d1}
18526    will be (lowest address first, d0[N] is NEON register element N):
18527
18528      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18529
18530    When necessary, quadword registers (dN, dN+1) are moved to ARM
18531    registers from rN in the order:
18532
18533      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18534
18535    So that STM/LDM can be used on vectors in ARM registers, and the
18536    same memory layout will result as if VSTM/VLDM were used.
18537
18538    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18539    possible, which allows use of appropriate alignment tags.
18540    Note that the choice of "64" is independent of the actual vector
18541    element size; this size simply ensures that the behavior is
18542    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18543
18544    Due to limitations of those instructions, use of VST1.64/VLD1.64
18545    is not possible if:
18546     - the address contains PRE_DEC, or
18547     - the mode refers to more than 4 double-word registers
18548
18549    In those cases, it would be possible to replace VSTM/VLDM by a
18550    sequence of instructions; this is not currently implemented since
18551    this is not certain to actually improve performance.  */
18552
18553 const char *
18554 output_move_neon (rtx *operands)
18555 {
18556   rtx reg, mem, addr, ops[2];
18557   int regno, nregs, load = REG_P (operands[0]);
18558   const char *templ;
18559   char buff[50];
18560   machine_mode mode;
18561
18562   reg = operands[!load];
18563   mem = operands[load];
18564
18565   mode = GET_MODE (reg);
18566
18567   gcc_assert (REG_P (reg));
18568   regno = REGNO (reg);
18569   nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18570   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18571               || NEON_REGNO_OK_FOR_QUAD (regno));
18572   gcc_assert (VALID_NEON_DREG_MODE (mode)
18573               || VALID_NEON_QREG_MODE (mode)
18574               || VALID_NEON_STRUCT_MODE (mode));
18575   gcc_assert (MEM_P (mem));
18576
18577   addr = XEXP (mem, 0);
18578
18579   /* Strip off const from addresses like (const (plus (...))).  */
18580   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18581     addr = XEXP (addr, 0);
18582
18583   switch (GET_CODE (addr))
18584     {
18585     case POST_INC:
18586       /* We have to use vldm / vstm for too-large modes.  */
18587       if (nregs > 4)
18588         {
18589           templ = "v%smia%%?\t%%0!, %%h1";
18590           ops[0] = XEXP (addr, 0);
18591         }
18592       else
18593         {
18594           templ = "v%s1.64\t%%h1, %%A0";
18595           ops[0] = mem;
18596         }
18597       ops[1] = reg;
18598       break;
18599
18600     case PRE_DEC:
18601       /* We have to use vldm / vstm in this case, since there is no
18602          pre-decrement form of the vld1 / vst1 instructions.  */
18603       templ = "v%smdb%%?\t%%0!, %%h1";
18604       ops[0] = XEXP (addr, 0);
18605       ops[1] = reg;
18606       break;
18607
18608     case POST_MODIFY:
18609       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18610       gcc_unreachable ();
18611
18612     case REG:
18613       /* We have to use vldm / vstm for too-large modes.  */
18614       if (nregs > 1)
18615         {
18616           if (nregs > 4)
18617             templ = "v%smia%%?\t%%m0, %%h1";
18618           else
18619             templ = "v%s1.64\t%%h1, %%A0";
18620
18621           ops[0] = mem;
18622           ops[1] = reg;
18623           break;
18624         }
18625       /* Fall through.  */
18626     case LABEL_REF:
18627     case PLUS:
18628       {
18629         int i;
18630         int overlap = -1;
18631         for (i = 0; i < nregs; i++)
18632           {
18633             /* We're only using DImode here because it's a convenient size.  */
18634             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18635             ops[1] = adjust_address (mem, DImode, 8 * i);
18636             if (reg_overlap_mentioned_p (ops[0], mem))
18637               {
18638                 gcc_assert (overlap == -1);
18639                 overlap = i;
18640               }
18641             else
18642               {
18643                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18644                 output_asm_insn (buff, ops);
18645               }
18646           }
18647         if (overlap != -1)
18648           {
18649             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18650             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18651             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18652             output_asm_insn (buff, ops);
18653           }
18654
18655         return "";
18656       }
18657
18658     default:
18659       gcc_unreachable ();
18660     }
18661
18662   sprintf (buff, templ, load ? "ld" : "st");
18663   output_asm_insn (buff, ops);
18664
18665   return "";
18666 }
18667
18668 /* Compute and return the length of neon_mov<mode>, where <mode> is
18669    one of VSTRUCT modes: EI, OI, CI or XI.  */
18670 int
18671 arm_attr_length_move_neon (rtx_insn *insn)
18672 {
18673   rtx reg, mem, addr;
18674   int load;
18675   machine_mode mode;
18676
18677   extract_insn_cached (insn);
18678
18679   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18680     {
18681       mode = GET_MODE (recog_data.operand[0]);
18682       switch (mode)
18683         {
18684         case EImode:
18685         case OImode:
18686           return 8;
18687         case CImode:
18688           return 12;
18689         case XImode:
18690           return 16;
18691         default:
18692           gcc_unreachable ();
18693         }
18694     }
18695
18696   load = REG_P (recog_data.operand[0]);
18697   reg = recog_data.operand[!load];
18698   mem = recog_data.operand[load];
18699
18700   gcc_assert (MEM_P (mem));
18701
18702   mode = GET_MODE (reg);
18703   addr = XEXP (mem, 0);
18704
18705   /* Strip off const from addresses like (const (plus (...))).  */
18706   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18707     addr = XEXP (addr, 0);
18708
18709   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18710     {
18711       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18712       return insns * 4;
18713     }
18714   else
18715     return 4;
18716 }
18717
18718 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18719    return zero.  */
18720
18721 int
18722 arm_address_offset_is_imm (rtx_insn *insn)
18723 {
18724   rtx mem, addr;
18725
18726   extract_insn_cached (insn);
18727
18728   if (REG_P (recog_data.operand[0]))
18729     return 0;
18730
18731   mem = recog_data.operand[0];
18732
18733   gcc_assert (MEM_P (mem));
18734
18735   addr = XEXP (mem, 0);
18736
18737   if (REG_P (addr)
18738       || (GET_CODE (addr) == PLUS
18739           && REG_P (XEXP (addr, 0))
18740           && CONST_INT_P (XEXP (addr, 1))))
18741     return 1;
18742   else
18743     return 0;
18744 }
18745
18746 /* Output an ADD r, s, #n where n may be too big for one instruction.
18747    If adding zero to one register, output nothing.  */
18748 const char *
18749 output_add_immediate (rtx *operands)
18750 {
18751   HOST_WIDE_INT n = INTVAL (operands[2]);
18752
18753   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18754     {
18755       if (n < 0)
18756         output_multi_immediate (operands,
18757                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18758                                 -n);
18759       else
18760         output_multi_immediate (operands,
18761                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18762                                 n);
18763     }
18764
18765   return "";
18766 }
18767
18768 /* Output a multiple immediate operation.
18769    OPERANDS is the vector of operands referred to in the output patterns.
18770    INSTR1 is the output pattern to use for the first constant.
18771    INSTR2 is the output pattern to use for subsequent constants.
18772    IMMED_OP is the index of the constant slot in OPERANDS.
18773    N is the constant value.  */
18774 static const char *
18775 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18776                         int immed_op, HOST_WIDE_INT n)
18777 {
18778 #if HOST_BITS_PER_WIDE_INT > 32
18779   n &= 0xffffffff;
18780 #endif
18781
18782   if (n == 0)
18783     {
18784       /* Quick and easy output.  */
18785       operands[immed_op] = const0_rtx;
18786       output_asm_insn (instr1, operands);
18787     }
18788   else
18789     {
18790       int i;
18791       const char * instr = instr1;
18792
18793       /* Note that n is never zero here (which would give no output).  */
18794       for (i = 0; i < 32; i += 2)
18795         {
18796           if (n & (3 << i))
18797             {
18798               operands[immed_op] = GEN_INT (n & (255 << i));
18799               output_asm_insn (instr, operands);
18800               instr = instr2;
18801               i += 6;
18802             }
18803         }
18804     }
18805
18806   return "";
18807 }
18808
18809 /* Return the name of a shifter operation.  */
18810 static const char *
18811 arm_shift_nmem(enum rtx_code code)
18812 {
18813   switch (code)
18814     {
18815     case ASHIFT:
18816       return ARM_LSL_NAME;
18817
18818     case ASHIFTRT:
18819       return "asr";
18820
18821     case LSHIFTRT:
18822       return "lsr";
18823
18824     case ROTATERT:
18825       return "ror";
18826
18827     default:
18828       abort();
18829     }
18830 }
18831
18832 /* Return the appropriate ARM instruction for the operation code.
18833    The returned result should not be overwritten.  OP is the rtx of the
18834    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18835    was shifted.  */
18836 const char *
18837 arithmetic_instr (rtx op, int shift_first_arg)
18838 {
18839   switch (GET_CODE (op))
18840     {
18841     case PLUS:
18842       return "add";
18843
18844     case MINUS:
18845       return shift_first_arg ? "rsb" : "sub";
18846
18847     case IOR:
18848       return "orr";
18849
18850     case XOR:
18851       return "eor";
18852
18853     case AND:
18854       return "and";
18855
18856     case ASHIFT:
18857     case ASHIFTRT:
18858     case LSHIFTRT:
18859     case ROTATERT:
18860       return arm_shift_nmem(GET_CODE(op));
18861
18862     default:
18863       gcc_unreachable ();
18864     }
18865 }
18866
18867 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18868    for the operation code.  The returned result should not be overwritten.
18869    OP is the rtx code of the shift.
18870    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18871    shift.  */
18872 static const char *
18873 shift_op (rtx op, HOST_WIDE_INT *amountp)
18874 {
18875   const char * mnem;
18876   enum rtx_code code = GET_CODE (op);
18877
18878   switch (code)
18879     {
18880     case ROTATE:
18881       if (!CONST_INT_P (XEXP (op, 1)))
18882         {
18883           output_operand_lossage ("invalid shift operand");
18884           return NULL;
18885         }
18886
18887       code = ROTATERT;
18888       *amountp = 32 - INTVAL (XEXP (op, 1));
18889       mnem = "ror";
18890       break;
18891
18892     case ASHIFT:
18893     case ASHIFTRT:
18894     case LSHIFTRT:
18895     case ROTATERT:
18896       mnem = arm_shift_nmem(code);
18897       if (CONST_INT_P (XEXP (op, 1)))
18898         {
18899           *amountp = INTVAL (XEXP (op, 1));
18900         }
18901       else if (REG_P (XEXP (op, 1)))
18902         {
18903           *amountp = -1;
18904           return mnem;
18905         }
18906       else
18907         {
18908           output_operand_lossage ("invalid shift operand");
18909           return NULL;
18910         }
18911       break;
18912
18913     case MULT:
18914       /* We never have to worry about the amount being other than a
18915          power of 2, since this case can never be reloaded from a reg.  */
18916       if (!CONST_INT_P (XEXP (op, 1)))
18917         {
18918           output_operand_lossage ("invalid shift operand");
18919           return NULL;
18920         }
18921
18922       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18923
18924       /* Amount must be a power of two.  */
18925       if (*amountp & (*amountp - 1))
18926         {
18927           output_operand_lossage ("invalid shift operand");
18928           return NULL;
18929         }
18930
18931       *amountp = int_log2 (*amountp);
18932       return ARM_LSL_NAME;
18933
18934     default:
18935       output_operand_lossage ("invalid shift operand");
18936       return NULL;
18937     }
18938
18939   /* This is not 100% correct, but follows from the desire to merge
18940      multiplication by a power of 2 with the recognizer for a
18941      shift.  >=32 is not a valid shift for "lsl", so we must try and
18942      output a shift that produces the correct arithmetical result.
18943      Using lsr #32 is identical except for the fact that the carry bit
18944      is not set correctly if we set the flags; but we never use the
18945      carry bit from such an operation, so we can ignore that.  */
18946   if (code == ROTATERT)
18947     /* Rotate is just modulo 32.  */
18948     *amountp &= 31;
18949   else if (*amountp != (*amountp & 31))
18950     {
18951       if (code == ASHIFT)
18952         mnem = "lsr";
18953       *amountp = 32;
18954     }
18955
18956   /* Shifts of 0 are no-ops.  */
18957   if (*amountp == 0)
18958     return NULL;
18959
18960   return mnem;
18961 }
18962
18963 /* Obtain the shift from the POWER of two.  */
18964
18965 static HOST_WIDE_INT
18966 int_log2 (HOST_WIDE_INT power)
18967 {
18968   HOST_WIDE_INT shift = 0;
18969
18970   while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18971     {
18972       gcc_assert (shift <= 31);
18973       shift++;
18974     }
18975
18976   return shift;
18977 }
18978
18979 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
18980    because /bin/as is horribly restrictive.  The judgement about
18981    whether or not each character is 'printable' (and can be output as
18982    is) or not (and must be printed with an octal escape) must be made
18983    with reference to the *host* character set -- the situation is
18984    similar to that discussed in the comments above pp_c_char in
18985    c-pretty-print.c.  */
18986
18987 #define MAX_ASCII_LEN 51
18988
18989 void
18990 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18991 {
18992   int i;
18993   int len_so_far = 0;
18994
18995   fputs ("\t.ascii\t\"", stream);
18996
18997   for (i = 0; i < len; i++)
18998     {
18999       int c = p[i];
19000
19001       if (len_so_far >= MAX_ASCII_LEN)
19002         {
19003           fputs ("\"\n\t.ascii\t\"", stream);
19004           len_so_far = 0;
19005         }
19006
19007       if (ISPRINT (c))
19008         {
19009           if (c == '\\' || c == '\"')
19010             {
19011               putc ('\\', stream);
19012               len_so_far++;
19013             }
19014           putc (c, stream);
19015           len_so_far++;
19016         }
19017       else
19018         {
19019           fprintf (stream, "\\%03o", c);
19020           len_so_far += 4;
19021         }
19022     }
19023
19024   fputs ("\"\n", stream);
19025 }
19026 \f
19027 /* Compute the register save mask for registers 0 through 12
19028    inclusive.  This code is used by arm_compute_save_reg_mask.  */
19029
19030 static unsigned long
19031 arm_compute_save_reg0_reg12_mask (void)
19032 {
19033   unsigned long func_type = arm_current_func_type ();
19034   unsigned long save_reg_mask = 0;
19035   unsigned int reg;
19036
19037   if (IS_INTERRUPT (func_type))
19038     {
19039       unsigned int max_reg;
19040       /* Interrupt functions must not corrupt any registers,
19041          even call clobbered ones.  If this is a leaf function
19042          we can just examine the registers used by the RTL, but
19043          otherwise we have to assume that whatever function is
19044          called might clobber anything, and so we have to save
19045          all the call-clobbered registers as well.  */
19046       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19047         /* FIQ handlers have registers r8 - r12 banked, so
19048            we only need to check r0 - r7, Normal ISRs only
19049            bank r14 and r15, so we must check up to r12.
19050            r13 is the stack pointer which is always preserved,
19051            so we do not need to consider it here.  */
19052         max_reg = 7;
19053       else
19054         max_reg = 12;
19055
19056       for (reg = 0; reg <= max_reg; reg++)
19057         if (df_regs_ever_live_p (reg)
19058             || (! crtl->is_leaf && call_used_regs[reg]))
19059           save_reg_mask |= (1 << reg);
19060
19061       /* Also save the pic base register if necessary.  */
19062       if (flag_pic
19063           && !TARGET_SINGLE_PIC_BASE
19064           && arm_pic_register != INVALID_REGNUM
19065           && crtl->uses_pic_offset_table)
19066         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19067     }
19068   else if (IS_VOLATILE(func_type))
19069     {
19070       /* For noreturn functions we historically omitted register saves
19071          altogether.  However this really messes up debugging.  As a
19072          compromise save just the frame pointers.  Combined with the link
19073          register saved elsewhere this should be sufficient to get
19074          a backtrace.  */
19075       if (frame_pointer_needed)
19076         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19077       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19078         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19079       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19080         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19081     }
19082   else
19083     {
19084       /* In the normal case we only need to save those registers
19085          which are call saved and which are used by this function.  */
19086       for (reg = 0; reg <= 11; reg++)
19087         if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19088           save_reg_mask |= (1 << reg);
19089
19090       /* Handle the frame pointer as a special case.  */
19091       if (frame_pointer_needed)
19092         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19093
19094       /* If we aren't loading the PIC register,
19095          don't stack it even though it may be live.  */
19096       if (flag_pic
19097           && !TARGET_SINGLE_PIC_BASE
19098           && arm_pic_register != INVALID_REGNUM
19099           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19100               || crtl->uses_pic_offset_table))
19101         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19102
19103       /* The prologue will copy SP into R0, so save it.  */
19104       if (IS_STACKALIGN (func_type))
19105         save_reg_mask |= 1;
19106     }
19107
19108   /* Save registers so the exception handler can modify them.  */
19109   if (crtl->calls_eh_return)
19110     {
19111       unsigned int i;
19112
19113       for (i = 0; ; i++)
19114         {
19115           reg = EH_RETURN_DATA_REGNO (i);
19116           if (reg == INVALID_REGNUM)
19117             break;
19118           save_reg_mask |= 1 << reg;
19119         }
19120     }
19121
19122   return save_reg_mask;
19123 }
19124
19125 /* Return true if r3 is live at the start of the function.  */
19126
19127 static bool
19128 arm_r3_live_at_start_p (void)
19129 {
19130   /* Just look at cfg info, which is still close enough to correct at this
19131      point.  This gives false positives for broken functions that might use
19132      uninitialized data that happens to be allocated in r3, but who cares?  */
19133   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19134 }
19135
19136 /* Compute the number of bytes used to store the static chain register on the
19137    stack, above the stack frame.  We need to know this accurately to get the
19138    alignment of the rest of the stack frame correct.  */
19139
19140 static int
19141 arm_compute_static_chain_stack_bytes (void)
19142 {
19143   /* See the defining assertion in arm_expand_prologue.  */
19144   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19145       && IS_NESTED (arm_current_func_type ())
19146       && arm_r3_live_at_start_p ()
19147       && crtl->args.pretend_args_size == 0)
19148     return 4;
19149
19150   return 0;
19151 }
19152
19153 /* Compute a bit mask of which registers need to be
19154    saved on the stack for the current function.
19155    This is used by arm_get_frame_offsets, which may add extra registers.  */
19156
19157 static unsigned long
19158 arm_compute_save_reg_mask (void)
19159 {
19160   unsigned int save_reg_mask = 0;
19161   unsigned long func_type = arm_current_func_type ();
19162   unsigned int reg;
19163
19164   if (IS_NAKED (func_type))
19165     /* This should never really happen.  */
19166     return 0;
19167
19168   /* If we are creating a stack frame, then we must save the frame pointer,
19169      IP (which will hold the old stack pointer), LR and the PC.  */
19170   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19171     save_reg_mask |=
19172       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19173       | (1 << IP_REGNUM)
19174       | (1 << LR_REGNUM)
19175       | (1 << PC_REGNUM);
19176
19177   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19178
19179   /* Decide if we need to save the link register.
19180      Interrupt routines have their own banked link register,
19181      so they never need to save it.
19182      Otherwise if we do not use the link register we do not need to save
19183      it.  If we are pushing other registers onto the stack however, we
19184      can save an instruction in the epilogue by pushing the link register
19185      now and then popping it back into the PC.  This incurs extra memory
19186      accesses though, so we only do it when optimizing for size, and only
19187      if we know that we will not need a fancy return sequence.  */
19188   if (df_regs_ever_live_p (LR_REGNUM)
19189       || (save_reg_mask
19190           && optimize_size
19191           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19192           && !crtl->calls_eh_return))
19193     save_reg_mask |= 1 << LR_REGNUM;
19194
19195   if (cfun->machine->lr_save_eliminated)
19196     save_reg_mask &= ~ (1 << LR_REGNUM);
19197
19198   if (TARGET_REALLY_IWMMXT
19199       && ((bit_count (save_reg_mask)
19200            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19201                            arm_compute_static_chain_stack_bytes())
19202            ) % 2) != 0)
19203     {
19204       /* The total number of registers that are going to be pushed
19205          onto the stack is odd.  We need to ensure that the stack
19206          is 64-bit aligned before we start to save iWMMXt registers,
19207          and also before we start to create locals.  (A local variable
19208          might be a double or long long which we will load/store using
19209          an iWMMXt instruction).  Therefore we need to push another
19210          ARM register, so that the stack will be 64-bit aligned.  We
19211          try to avoid using the arg registers (r0 -r3) as they might be
19212          used to pass values in a tail call.  */
19213       for (reg = 4; reg <= 12; reg++)
19214         if ((save_reg_mask & (1 << reg)) == 0)
19215           break;
19216
19217       if (reg <= 12)
19218         save_reg_mask |= (1 << reg);
19219       else
19220         {
19221           cfun->machine->sibcall_blocked = 1;
19222           save_reg_mask |= (1 << 3);
19223         }
19224     }
19225
19226   /* We may need to push an additional register for use initializing the
19227      PIC base register.  */
19228   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19229       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19230     {
19231       reg = thumb_find_work_register (1 << 4);
19232       if (!call_used_regs[reg])
19233         save_reg_mask |= (1 << reg);
19234     }
19235
19236   return save_reg_mask;
19237 }
19238
19239
19240 /* Compute a bit mask of which registers need to be
19241    saved on the stack for the current function.  */
19242 static unsigned long
19243 thumb1_compute_save_reg_mask (void)
19244 {
19245   unsigned long mask;
19246   unsigned reg;
19247
19248   mask = 0;
19249   for (reg = 0; reg < 12; reg ++)
19250     if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19251       mask |= 1 << reg;
19252
19253   if (flag_pic
19254       && !TARGET_SINGLE_PIC_BASE
19255       && arm_pic_register != INVALID_REGNUM
19256       && crtl->uses_pic_offset_table)
19257     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19258
19259   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19260   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19261     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19262
19263   /* LR will also be pushed if any lo regs are pushed.  */
19264   if (mask & 0xff || thumb_force_lr_save ())
19265     mask |= (1 << LR_REGNUM);
19266
19267   /* Make sure we have a low work register if we need one.
19268      We will need one if we are going to push a high register,
19269      but we are not currently intending to push a low register.  */
19270   if ((mask & 0xff) == 0
19271       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19272     {
19273       /* Use thumb_find_work_register to choose which register
19274          we will use.  If the register is live then we will
19275          have to push it.  Use LAST_LO_REGNUM as our fallback
19276          choice for the register to select.  */
19277       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19278       /* Make sure the register returned by thumb_find_work_register is
19279          not part of the return value.  */
19280       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19281         reg = LAST_LO_REGNUM;
19282
19283       if (! call_used_regs[reg])
19284         mask |= 1 << reg;
19285     }
19286
19287   /* The 504 below is 8 bytes less than 512 because there are two possible
19288      alignment words.  We can't tell here if they will be present or not so we
19289      have to play it safe and assume that they are. */
19290   if ((CALLER_INTERWORKING_SLOT_SIZE +
19291        ROUND_UP_WORD (get_frame_size ()) +
19292        crtl->outgoing_args_size) >= 504)
19293     {
19294       /* This is the same as the code in thumb1_expand_prologue() which
19295          determines which register to use for stack decrement. */
19296       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19297         if (mask & (1 << reg))
19298           break;
19299
19300       if (reg > LAST_LO_REGNUM)
19301         {
19302           /* Make sure we have a register available for stack decrement. */
19303           mask |= 1 << LAST_LO_REGNUM;
19304         }
19305     }
19306
19307   return mask;
19308 }
19309
19310
19311 /* Return the number of bytes required to save VFP registers.  */
19312 static int
19313 arm_get_vfp_saved_size (void)
19314 {
19315   unsigned int regno;
19316   int count;
19317   int saved;
19318
19319   saved = 0;
19320   /* Space for saved VFP registers.  */
19321   if (TARGET_HARD_FLOAT && TARGET_VFP)
19322     {
19323       count = 0;
19324       for (regno = FIRST_VFP_REGNUM;
19325            regno < LAST_VFP_REGNUM;
19326            regno += 2)
19327         {
19328           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19329               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19330             {
19331               if (count > 0)
19332                 {
19333                   /* Workaround ARM10 VFPr1 bug.  */
19334                   if (count == 2 && !arm_arch6)
19335                     count++;
19336                   saved += count * 8;
19337                 }
19338               count = 0;
19339             }
19340           else
19341             count++;
19342         }
19343       if (count > 0)
19344         {
19345           if (count == 2 && !arm_arch6)
19346             count++;
19347           saved += count * 8;
19348         }
19349     }
19350   return saved;
19351 }
19352
19353
19354 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19355    everything bar the final return instruction.  If simple_return is true,
19356    then do not output epilogue, because it has already been emitted in RTL.  */
19357 const char *
19358 output_return_instruction (rtx operand, bool really_return, bool reverse,
19359                            bool simple_return)
19360 {
19361   char conditional[10];
19362   char instr[100];
19363   unsigned reg;
19364   unsigned long live_regs_mask;
19365   unsigned long func_type;
19366   arm_stack_offsets *offsets;
19367
19368   func_type = arm_current_func_type ();
19369
19370   if (IS_NAKED (func_type))
19371     return "";
19372
19373   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19374     {
19375       /* If this function was declared non-returning, and we have
19376          found a tail call, then we have to trust that the called
19377          function won't return.  */
19378       if (really_return)
19379         {
19380           rtx ops[2];
19381
19382           /* Otherwise, trap an attempted return by aborting.  */
19383           ops[0] = operand;
19384           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19385                                        : "abort");
19386           assemble_external_libcall (ops[1]);
19387           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19388         }
19389
19390       return "";
19391     }
19392
19393   gcc_assert (!cfun->calls_alloca || really_return);
19394
19395   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19396
19397   cfun->machine->return_used_this_function = 1;
19398
19399   offsets = arm_get_frame_offsets ();
19400   live_regs_mask = offsets->saved_regs_mask;
19401
19402   if (!simple_return && live_regs_mask)
19403     {
19404       const char * return_reg;
19405
19406       /* If we do not have any special requirements for function exit
19407          (e.g. interworking) then we can load the return address
19408          directly into the PC.  Otherwise we must load it into LR.  */
19409       if (really_return
19410           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19411         return_reg = reg_names[PC_REGNUM];
19412       else
19413         return_reg = reg_names[LR_REGNUM];
19414
19415       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19416         {
19417           /* There are three possible reasons for the IP register
19418              being saved.  1) a stack frame was created, in which case
19419              IP contains the old stack pointer, or 2) an ISR routine
19420              corrupted it, or 3) it was saved to align the stack on
19421              iWMMXt.  In case 1, restore IP into SP, otherwise just
19422              restore IP.  */
19423           if (frame_pointer_needed)
19424             {
19425               live_regs_mask &= ~ (1 << IP_REGNUM);
19426               live_regs_mask |=   (1 << SP_REGNUM);
19427             }
19428           else
19429             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19430         }
19431
19432       /* On some ARM architectures it is faster to use LDR rather than
19433          LDM to load a single register.  On other architectures, the
19434          cost is the same.  In 26 bit mode, or for exception handlers,
19435          we have to use LDM to load the PC so that the CPSR is also
19436          restored.  */
19437       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19438         if (live_regs_mask == (1U << reg))
19439           break;
19440
19441       if (reg <= LAST_ARM_REGNUM
19442           && (reg != LR_REGNUM
19443               || ! really_return
19444               || ! IS_INTERRUPT (func_type)))
19445         {
19446           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19447                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19448         }
19449       else
19450         {
19451           char *p;
19452           int first = 1;
19453
19454           /* Generate the load multiple instruction to restore the
19455              registers.  Note we can get here, even if
19456              frame_pointer_needed is true, but only if sp already
19457              points to the base of the saved core registers.  */
19458           if (live_regs_mask & (1 << SP_REGNUM))
19459             {
19460               unsigned HOST_WIDE_INT stack_adjust;
19461
19462               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19463               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19464
19465               if (stack_adjust && arm_arch5 && TARGET_ARM)
19466                 if (TARGET_UNIFIED_ASM)
19467                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19468                 else
19469                   sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19470               else
19471                 {
19472                   /* If we can't use ldmib (SA110 bug),
19473                      then try to pop r3 instead.  */
19474                   if (stack_adjust)
19475                     live_regs_mask |= 1 << 3;
19476
19477                   if (TARGET_UNIFIED_ASM)
19478                     sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19479                   else
19480                     sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19481                 }
19482             }
19483           else
19484             if (TARGET_UNIFIED_ASM)
19485               sprintf (instr, "pop%s\t{", conditional);
19486             else
19487               sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19488
19489           p = instr + strlen (instr);
19490
19491           for (reg = 0; reg <= SP_REGNUM; reg++)
19492             if (live_regs_mask & (1 << reg))
19493               {
19494                 int l = strlen (reg_names[reg]);
19495
19496                 if (first)
19497                   first = 0;
19498                 else
19499                   {
19500                     memcpy (p, ", ", 2);
19501                     p += 2;
19502                   }
19503
19504                 memcpy (p, "%|", 2);
19505                 memcpy (p + 2, reg_names[reg], l);
19506                 p += l + 2;
19507               }
19508
19509           if (live_regs_mask & (1 << LR_REGNUM))
19510             {
19511               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19512               /* If returning from an interrupt, restore the CPSR.  */
19513               if (IS_INTERRUPT (func_type))
19514                 strcat (p, "^");
19515             }
19516           else
19517             strcpy (p, "}");
19518         }
19519
19520       output_asm_insn (instr, & operand);
19521
19522       /* See if we need to generate an extra instruction to
19523          perform the actual function return.  */
19524       if (really_return
19525           && func_type != ARM_FT_INTERWORKED
19526           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19527         {
19528           /* The return has already been handled
19529              by loading the LR into the PC.  */
19530           return "";
19531         }
19532     }
19533
19534   if (really_return)
19535     {
19536       switch ((int) ARM_FUNC_TYPE (func_type))
19537         {
19538         case ARM_FT_ISR:
19539         case ARM_FT_FIQ:
19540           /* ??? This is wrong for unified assembly syntax.  */
19541           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19542           break;
19543
19544         case ARM_FT_INTERWORKED:
19545           sprintf (instr, "bx%s\t%%|lr", conditional);
19546           break;
19547
19548         case ARM_FT_EXCEPTION:
19549           /* ??? This is wrong for unified assembly syntax.  */
19550           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19551           break;
19552
19553         default:
19554           /* Use bx if it's available.  */
19555           if (arm_arch5 || arm_arch4t)
19556             sprintf (instr, "bx%s\t%%|lr", conditional);
19557           else
19558             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19559           break;
19560         }
19561
19562       output_asm_insn (instr, & operand);
19563     }
19564
19565   return "";
19566 }
19567
19568 /* Write the function name into the code section, directly preceding
19569    the function prologue.
19570
19571    Code will be output similar to this:
19572      t0
19573          .ascii "arm_poke_function_name", 0
19574          .align
19575      t1
19576          .word 0xff000000 + (t1 - t0)
19577      arm_poke_function_name
19578          mov     ip, sp
19579          stmfd   sp!, {fp, ip, lr, pc}
19580          sub     fp, ip, #4
19581
19582    When performing a stack backtrace, code can inspect the value
19583    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19584    at location pc - 12 and the top 8 bits are set, then we know
19585    that there is a function name embedded immediately preceding this
19586    location and has length ((pc[-3]) & 0xff000000).
19587
19588    We assume that pc is declared as a pointer to an unsigned long.
19589
19590    It is of no benefit to output the function name if we are assembling
19591    a leaf function.  These function types will not contain a stack
19592    backtrace structure, therefore it is not possible to determine the
19593    function name.  */
19594 void
19595 arm_poke_function_name (FILE *stream, const char *name)
19596 {
19597   unsigned long alignlength;
19598   unsigned long length;
19599   rtx           x;
19600
19601   length      = strlen (name) + 1;
19602   alignlength = ROUND_UP_WORD (length);
19603
19604   ASM_OUTPUT_ASCII (stream, name, length);
19605   ASM_OUTPUT_ALIGN (stream, 2);
19606   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19607   assemble_aligned_integer (UNITS_PER_WORD, x);
19608 }
19609
19610 /* Place some comments into the assembler stream
19611    describing the current function.  */
19612 static void
19613 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19614 {
19615   unsigned long func_type;
19616
19617   /* ??? Do we want to print some of the below anyway?  */
19618   if (TARGET_THUMB1)
19619     return;
19620
19621   /* Sanity check.  */
19622   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19623
19624   func_type = arm_current_func_type ();
19625
19626   switch ((int) ARM_FUNC_TYPE (func_type))
19627     {
19628     default:
19629     case ARM_FT_NORMAL:
19630       break;
19631     case ARM_FT_INTERWORKED:
19632       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19633       break;
19634     case ARM_FT_ISR:
19635       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19636       break;
19637     case ARM_FT_FIQ:
19638       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19639       break;
19640     case ARM_FT_EXCEPTION:
19641       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19642       break;
19643     }
19644
19645   if (IS_NAKED (func_type))
19646     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19647
19648   if (IS_VOLATILE (func_type))
19649     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19650
19651   if (IS_NESTED (func_type))
19652     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19653   if (IS_STACKALIGN (func_type))
19654     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19655
19656   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19657                crtl->args.size,
19658                crtl->args.pretend_args_size, frame_size);
19659
19660   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19661                frame_pointer_needed,
19662                cfun->machine->uses_anonymous_args);
19663
19664   if (cfun->machine->lr_save_eliminated)
19665     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19666
19667   if (crtl->calls_eh_return)
19668     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19669
19670 }
19671
19672 static void
19673 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19674                               HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19675 {
19676   arm_stack_offsets *offsets;
19677
19678   if (TARGET_THUMB1)
19679     {
19680       int regno;
19681
19682       /* Emit any call-via-reg trampolines that are needed for v4t support
19683          of call_reg and call_value_reg type insns.  */
19684       for (regno = 0; regno < LR_REGNUM; regno++)
19685         {
19686           rtx label = cfun->machine->call_via[regno];
19687
19688           if (label != NULL)
19689             {
19690               switch_to_section (function_section (current_function_decl));
19691               targetm.asm_out.internal_label (asm_out_file, "L",
19692                                               CODE_LABEL_NUMBER (label));
19693               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19694             }
19695         }
19696
19697       /* ??? Probably not safe to set this here, since it assumes that a
19698          function will be emitted as assembly immediately after we generate
19699          RTL for it.  This does not happen for inline functions.  */
19700       cfun->machine->return_used_this_function = 0;
19701     }
19702   else /* TARGET_32BIT */
19703     {
19704       /* We need to take into account any stack-frame rounding.  */
19705       offsets = arm_get_frame_offsets ();
19706
19707       gcc_assert (!use_return_insn (FALSE, NULL)
19708                   || (cfun->machine->return_used_this_function != 0)
19709                   || offsets->saved_regs == offsets->outgoing_args
19710                   || frame_pointer_needed);
19711     }
19712 }
19713
19714 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19715    STR and STRD.  If an even number of registers are being pushed, one
19716    or more STRD patterns are created for each register pair.  If an
19717    odd number of registers are pushed, emit an initial STR followed by
19718    as many STRD instructions as are needed.  This works best when the
19719    stack is initially 64-bit aligned (the normal case), since it
19720    ensures that each STRD is also 64-bit aligned.  */
19721 static void
19722 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19723 {
19724   int num_regs = 0;
19725   int i;
19726   int regno;
19727   rtx par = NULL_RTX;
19728   rtx dwarf = NULL_RTX;
19729   rtx tmp;
19730   bool first = true;
19731
19732   num_regs = bit_count (saved_regs_mask);
19733
19734   /* Must be at least one register to save, and can't save SP or PC.  */
19735   gcc_assert (num_regs > 0 && num_regs <= 14);
19736   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19737   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19738
19739   /* Create sequence for DWARF info.  All the frame-related data for
19740      debugging is held in this wrapper.  */
19741   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19742
19743   /* Describe the stack adjustment.  */
19744   tmp = gen_rtx_SET (VOIDmode,
19745                       stack_pointer_rtx,
19746                       plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19747   RTX_FRAME_RELATED_P (tmp) = 1;
19748   XVECEXP (dwarf, 0, 0) = tmp;
19749
19750   /* Find the first register.  */
19751   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19752     ;
19753
19754   i = 0;
19755
19756   /* If there's an odd number of registers to push.  Start off by
19757      pushing a single register.  This ensures that subsequent strd
19758      operations are dword aligned (assuming that SP was originally
19759      64-bit aligned).  */
19760   if ((num_regs & 1) != 0)
19761     {
19762       rtx reg, mem, insn;
19763
19764       reg = gen_rtx_REG (SImode, regno);
19765       if (num_regs == 1)
19766         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19767                                                      stack_pointer_rtx));
19768       else
19769         mem = gen_frame_mem (Pmode,
19770                              gen_rtx_PRE_MODIFY
19771                              (Pmode, stack_pointer_rtx,
19772                               plus_constant (Pmode, stack_pointer_rtx,
19773                                              -4 * num_regs)));
19774
19775       tmp = gen_rtx_SET (VOIDmode, mem, reg);
19776       RTX_FRAME_RELATED_P (tmp) = 1;
19777       insn = emit_insn (tmp);
19778       RTX_FRAME_RELATED_P (insn) = 1;
19779       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19780       tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19781                          reg);
19782       RTX_FRAME_RELATED_P (tmp) = 1;
19783       i++;
19784       regno++;
19785       XVECEXP (dwarf, 0, i) = tmp;
19786       first = false;
19787     }
19788
19789   while (i < num_regs)
19790     if (saved_regs_mask & (1 << regno))
19791       {
19792         rtx reg1, reg2, mem1, mem2;
19793         rtx tmp0, tmp1, tmp2;
19794         int regno2;
19795
19796         /* Find the register to pair with this one.  */
19797         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19798              regno2++)
19799           ;
19800
19801         reg1 = gen_rtx_REG (SImode, regno);
19802         reg2 = gen_rtx_REG (SImode, regno2);
19803
19804         if (first)
19805           {
19806             rtx insn;
19807
19808             first = false;
19809             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19810                                                         stack_pointer_rtx,
19811                                                         -4 * num_regs));
19812             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19813                                                         stack_pointer_rtx,
19814                                                         -4 * (num_regs - 1)));
19815             tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19816                                 plus_constant (Pmode, stack_pointer_rtx,
19817                                                -4 * (num_regs)));
19818             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19819             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19820             RTX_FRAME_RELATED_P (tmp0) = 1;
19821             RTX_FRAME_RELATED_P (tmp1) = 1;
19822             RTX_FRAME_RELATED_P (tmp2) = 1;
19823             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19824             XVECEXP (par, 0, 0) = tmp0;
19825             XVECEXP (par, 0, 1) = tmp1;
19826             XVECEXP (par, 0, 2) = tmp2;
19827             insn = emit_insn (par);
19828             RTX_FRAME_RELATED_P (insn) = 1;
19829             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19830           }
19831         else
19832           {
19833             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19834                                                         stack_pointer_rtx,
19835                                                         4 * i));
19836             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19837                                                         stack_pointer_rtx,
19838                                                         4 * (i + 1)));
19839             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19840             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19841             RTX_FRAME_RELATED_P (tmp1) = 1;
19842             RTX_FRAME_RELATED_P (tmp2) = 1;
19843             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19844             XVECEXP (par, 0, 0) = tmp1;
19845             XVECEXP (par, 0, 1) = tmp2;
19846             emit_insn (par);
19847           }
19848
19849         /* Create unwind information.  This is an approximation.  */
19850         tmp1 = gen_rtx_SET (VOIDmode,
19851                             gen_frame_mem (Pmode,
19852                                            plus_constant (Pmode,
19853                                                           stack_pointer_rtx,
19854                                                           4 * i)),
19855                             reg1);
19856         tmp2 = gen_rtx_SET (VOIDmode,
19857                             gen_frame_mem (Pmode,
19858                                            plus_constant (Pmode,
19859                                                           stack_pointer_rtx,
19860                                                           4 * (i + 1))),
19861                             reg2);
19862
19863         RTX_FRAME_RELATED_P (tmp1) = 1;
19864         RTX_FRAME_RELATED_P (tmp2) = 1;
19865         XVECEXP (dwarf, 0, i + 1) = tmp1;
19866         XVECEXP (dwarf, 0, i + 2) = tmp2;
19867         i += 2;
19868         regno = regno2 + 1;
19869       }
19870     else
19871       regno++;
19872
19873   return;
19874 }
19875
19876 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19877    whenever possible, otherwise it emits single-word stores.  The first store
19878    also allocates stack space for all saved registers, using writeback with
19879    post-addressing mode.  All other stores use offset addressing.  If no STRD
19880    can be emitted, this function emits a sequence of single-word stores,
19881    and not an STM as before, because single-word stores provide more freedom
19882    scheduling and can be turned into an STM by peephole optimizations.  */
19883 static void
19884 arm_emit_strd_push (unsigned long saved_regs_mask)
19885 {
19886   int num_regs = 0;
19887   int i, j, dwarf_index  = 0;
19888   int offset = 0;
19889   rtx dwarf = NULL_RTX;
19890   rtx insn = NULL_RTX;
19891   rtx tmp, mem;
19892
19893   /* TODO: A more efficient code can be emitted by changing the
19894      layout, e.g., first push all pairs that can use STRD to keep the
19895      stack aligned, and then push all other registers.  */
19896   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19897     if (saved_regs_mask & (1 << i))
19898       num_regs++;
19899
19900   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19901   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19902   gcc_assert (num_regs > 0);
19903
19904   /* Create sequence for DWARF info.  */
19905   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19906
19907   /* For dwarf info, we generate explicit stack update.  */
19908   tmp = gen_rtx_SET (VOIDmode,
19909                      stack_pointer_rtx,
19910                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19911   RTX_FRAME_RELATED_P (tmp) = 1;
19912   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19913
19914   /* Save registers.  */
19915   offset = - 4 * num_regs;
19916   j = 0;
19917   while (j <= LAST_ARM_REGNUM)
19918     if (saved_regs_mask & (1 << j))
19919       {
19920         if ((j % 2 == 0)
19921             && (saved_regs_mask & (1 << (j + 1))))
19922           {
19923             /* Current register and previous register form register pair for
19924                which STRD can be generated.  */
19925             if (offset < 0)
19926               {
19927                 /* Allocate stack space for all saved registers.  */
19928                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19929                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19930                 mem = gen_frame_mem (DImode, tmp);
19931                 offset = 0;
19932               }
19933             else if (offset > 0)
19934               mem = gen_frame_mem (DImode,
19935                                    plus_constant (Pmode,
19936                                                   stack_pointer_rtx,
19937                                                   offset));
19938             else
19939               mem = gen_frame_mem (DImode, stack_pointer_rtx);
19940
19941             tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19942             RTX_FRAME_RELATED_P (tmp) = 1;
19943             tmp = emit_insn (tmp);
19944
19945             /* Record the first store insn.  */
19946             if (dwarf_index == 1)
19947               insn = tmp;
19948
19949             /* Generate dwarf info.  */
19950             mem = gen_frame_mem (SImode,
19951                                  plus_constant (Pmode,
19952                                                 stack_pointer_rtx,
19953                                                 offset));
19954             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19955             RTX_FRAME_RELATED_P (tmp) = 1;
19956             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19957
19958             mem = gen_frame_mem (SImode,
19959                                  plus_constant (Pmode,
19960                                                 stack_pointer_rtx,
19961                                                 offset + 4));
19962             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19963             RTX_FRAME_RELATED_P (tmp) = 1;
19964             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19965
19966             offset += 8;
19967             j += 2;
19968           }
19969         else
19970           {
19971             /* Emit a single word store.  */
19972             if (offset < 0)
19973               {
19974                 /* Allocate stack space for all saved registers.  */
19975                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19976                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19977                 mem = gen_frame_mem (SImode, tmp);
19978                 offset = 0;
19979               }
19980             else if (offset > 0)
19981               mem = gen_frame_mem (SImode,
19982                                    plus_constant (Pmode,
19983                                                   stack_pointer_rtx,
19984                                                   offset));
19985             else
19986               mem = gen_frame_mem (SImode, stack_pointer_rtx);
19987
19988             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19989             RTX_FRAME_RELATED_P (tmp) = 1;
19990             tmp = emit_insn (tmp);
19991
19992             /* Record the first store insn.  */
19993             if (dwarf_index == 1)
19994               insn = tmp;
19995
19996             /* Generate dwarf info.  */
19997             mem = gen_frame_mem (SImode,
19998                                  plus_constant(Pmode,
19999                                                stack_pointer_rtx,
20000                                                offset));
20001             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20002             RTX_FRAME_RELATED_P (tmp) = 1;
20003             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20004
20005             offset += 4;
20006             j += 1;
20007           }
20008       }
20009     else
20010       j++;
20011
20012   /* Attach dwarf info to the first insn we generate.  */
20013   gcc_assert (insn != NULL_RTX);
20014   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20015   RTX_FRAME_RELATED_P (insn) = 1;
20016 }
20017
20018 /* Generate and emit an insn that we will recognize as a push_multi.
20019    Unfortunately, since this insn does not reflect very well the actual
20020    semantics of the operation, we need to annotate the insn for the benefit
20021    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20022    MASK for registers that should be annotated for DWARF2 frame unwind
20023    information.  */
20024 static rtx
20025 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20026 {
20027   int num_regs = 0;
20028   int num_dwarf_regs = 0;
20029   int i, j;
20030   rtx par;
20031   rtx dwarf;
20032   int dwarf_par_index;
20033   rtx tmp, reg;
20034
20035   /* We don't record the PC in the dwarf frame information.  */
20036   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20037
20038   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20039     {
20040       if (mask & (1 << i))
20041         num_regs++;
20042       if (dwarf_regs_mask & (1 << i))
20043         num_dwarf_regs++;
20044     }
20045
20046   gcc_assert (num_regs && num_regs <= 16);
20047   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20048
20049   /* For the body of the insn we are going to generate an UNSPEC in
20050      parallel with several USEs.  This allows the insn to be recognized
20051      by the push_multi pattern in the arm.md file.
20052
20053      The body of the insn looks something like this:
20054
20055        (parallel [
20056            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20057                                         (const_int:SI <num>)))
20058                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20059            (use (reg:SI XX))
20060            (use (reg:SI YY))
20061            ...
20062         ])
20063
20064      For the frame note however, we try to be more explicit and actually
20065      show each register being stored into the stack frame, plus a (single)
20066      decrement of the stack pointer.  We do it this way in order to be
20067      friendly to the stack unwinding code, which only wants to see a single
20068      stack decrement per instruction.  The RTL we generate for the note looks
20069      something like this:
20070
20071       (sequence [
20072            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20073            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20074            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20075            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20076            ...
20077         ])
20078
20079      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20080      instead we'd have a parallel expression detailing all
20081      the stores to the various memory addresses so that debug
20082      information is more up-to-date. Remember however while writing
20083      this to take care of the constraints with the push instruction.
20084
20085      Note also that this has to be taken care of for the VFP registers.
20086
20087      For more see PR43399.  */
20088
20089   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20090   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20091   dwarf_par_index = 1;
20092
20093   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20094     {
20095       if (mask & (1 << i))
20096         {
20097           reg = gen_rtx_REG (SImode, i);
20098
20099           XVECEXP (par, 0, 0)
20100             = gen_rtx_SET (VOIDmode,
20101                            gen_frame_mem
20102                            (BLKmode,
20103                             gen_rtx_PRE_MODIFY (Pmode,
20104                                                 stack_pointer_rtx,
20105                                                 plus_constant
20106                                                 (Pmode, stack_pointer_rtx,
20107                                                  -4 * num_regs))
20108                             ),
20109                            gen_rtx_UNSPEC (BLKmode,
20110                                            gen_rtvec (1, reg),
20111                                            UNSPEC_PUSH_MULT));
20112
20113           if (dwarf_regs_mask & (1 << i))
20114             {
20115               tmp = gen_rtx_SET (VOIDmode,
20116                                  gen_frame_mem (SImode, stack_pointer_rtx),
20117                                  reg);
20118               RTX_FRAME_RELATED_P (tmp) = 1;
20119               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20120             }
20121
20122           break;
20123         }
20124     }
20125
20126   for (j = 1, i++; j < num_regs; i++)
20127     {
20128       if (mask & (1 << i))
20129         {
20130           reg = gen_rtx_REG (SImode, i);
20131
20132           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20133
20134           if (dwarf_regs_mask & (1 << i))
20135             {
20136               tmp
20137                 = gen_rtx_SET (VOIDmode,
20138                                gen_frame_mem
20139                                (SImode,
20140                                 plus_constant (Pmode, stack_pointer_rtx,
20141                                                4 * j)),
20142                                reg);
20143               RTX_FRAME_RELATED_P (tmp) = 1;
20144               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20145             }
20146
20147           j++;
20148         }
20149     }
20150
20151   par = emit_insn (par);
20152
20153   tmp = gen_rtx_SET (VOIDmode,
20154                      stack_pointer_rtx,
20155                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20156   RTX_FRAME_RELATED_P (tmp) = 1;
20157   XVECEXP (dwarf, 0, 0) = tmp;
20158
20159   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20160
20161   return par;
20162 }
20163
20164 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20165    SIZE is the offset to be adjusted.
20166    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20167 static void
20168 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20169 {
20170   rtx dwarf;
20171
20172   RTX_FRAME_RELATED_P (insn) = 1;
20173   dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20174   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20175 }
20176
20177 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20178    SAVED_REGS_MASK shows which registers need to be restored.
20179
20180    Unfortunately, since this insn does not reflect very well the actual
20181    semantics of the operation, we need to annotate the insn for the benefit
20182    of DWARF2 frame unwind information.  */
20183 static void
20184 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20185 {
20186   int num_regs = 0;
20187   int i, j;
20188   rtx par;
20189   rtx dwarf = NULL_RTX;
20190   rtx tmp, reg;
20191   bool return_in_pc;
20192   int offset_adj;
20193   int emit_update;
20194
20195   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20196   offset_adj = return_in_pc ? 1 : 0;
20197   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20198     if (saved_regs_mask & (1 << i))
20199       num_regs++;
20200
20201   gcc_assert (num_regs && num_regs <= 16);
20202
20203   /* If SP is in reglist, then we don't emit SP update insn.  */
20204   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20205
20206   /* The parallel needs to hold num_regs SETs
20207      and one SET for the stack update.  */
20208   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20209
20210   if (return_in_pc)
20211     {
20212       tmp = ret_rtx;
20213       XVECEXP (par, 0, 0) = tmp;
20214     }
20215
20216   if (emit_update)
20217     {
20218       /* Increment the stack pointer, based on there being
20219          num_regs 4-byte registers to restore.  */
20220       tmp = gen_rtx_SET (VOIDmode,
20221                          stack_pointer_rtx,
20222                          plus_constant (Pmode,
20223                                         stack_pointer_rtx,
20224                                         4 * num_regs));
20225       RTX_FRAME_RELATED_P (tmp) = 1;
20226       XVECEXP (par, 0, offset_adj) = tmp;
20227     }
20228
20229   /* Now restore every reg, which may include PC.  */
20230   for (j = 0, i = 0; j < num_regs; i++)
20231     if (saved_regs_mask & (1 << i))
20232       {
20233         reg = gen_rtx_REG (SImode, i);
20234         if ((num_regs == 1) && emit_update && !return_in_pc)
20235           {
20236             /* Emit single load with writeback.  */
20237             tmp = gen_frame_mem (SImode,
20238                                  gen_rtx_POST_INC (Pmode,
20239                                                    stack_pointer_rtx));
20240             tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20241             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20242             return;
20243           }
20244
20245         tmp = gen_rtx_SET (VOIDmode,
20246                            reg,
20247                            gen_frame_mem
20248                            (SImode,
20249                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20250         RTX_FRAME_RELATED_P (tmp) = 1;
20251         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20252
20253         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20254            should not have PC, skip PC.  */
20255         if (i != PC_REGNUM)
20256           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20257
20258         j++;
20259       }
20260
20261   if (return_in_pc)
20262     par = emit_jump_insn (par);
20263   else
20264     par = emit_insn (par);
20265
20266   REG_NOTES (par) = dwarf;
20267   if (!return_in_pc)
20268     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20269                                  stack_pointer_rtx, stack_pointer_rtx);
20270 }
20271
20272 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20273    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20274
20275    Unfortunately, since this insn does not reflect very well the actual
20276    semantics of the operation, we need to annotate the insn for the benefit
20277    of DWARF2 frame unwind information.  */
20278 static void
20279 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20280 {
20281   int i, j;
20282   rtx par;
20283   rtx dwarf = NULL_RTX;
20284   rtx tmp, reg;
20285
20286   gcc_assert (num_regs && num_regs <= 32);
20287
20288     /* Workaround ARM10 VFPr1 bug.  */
20289   if (num_regs == 2 && !arm_arch6)
20290     {
20291       if (first_reg == 15)
20292         first_reg--;
20293
20294       num_regs++;
20295     }
20296
20297   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20298      there could be up to 32 D-registers to restore.
20299      If there are more than 16 D-registers, make two recursive calls,
20300      each of which emits one pop_multi instruction.  */
20301   if (num_regs > 16)
20302     {
20303       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20304       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20305       return;
20306     }
20307
20308   /* The parallel needs to hold num_regs SETs
20309      and one SET for the stack update.  */
20310   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20311
20312   /* Increment the stack pointer, based on there being
20313      num_regs 8-byte registers to restore.  */
20314   tmp = gen_rtx_SET (VOIDmode,
20315                      base_reg,
20316                      plus_constant (Pmode, base_reg, 8 * num_regs));
20317   RTX_FRAME_RELATED_P (tmp) = 1;
20318   XVECEXP (par, 0, 0) = tmp;
20319
20320   /* Now show every reg that will be restored, using a SET for each.  */
20321   for (j = 0, i=first_reg; j < num_regs; i += 2)
20322     {
20323       reg = gen_rtx_REG (DFmode, i);
20324
20325       tmp = gen_rtx_SET (VOIDmode,
20326                          reg,
20327                          gen_frame_mem
20328                          (DFmode,
20329                           plus_constant (Pmode, base_reg, 8 * j)));
20330       RTX_FRAME_RELATED_P (tmp) = 1;
20331       XVECEXP (par, 0, j + 1) = tmp;
20332
20333       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20334
20335       j++;
20336     }
20337
20338   par = emit_insn (par);
20339   REG_NOTES (par) = dwarf;
20340
20341   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20342   if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20343     {
20344       RTX_FRAME_RELATED_P (par) = 1;
20345       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20346     }
20347   else
20348     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20349                                  base_reg, base_reg);
20350 }
20351
20352 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20353    number of registers are being popped, multiple LDRD patterns are created for
20354    all register pairs.  If odd number of registers are popped, last register is
20355    loaded by using LDR pattern.  */
20356 static void
20357 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20358 {
20359   int num_regs = 0;
20360   int i, j;
20361   rtx par = NULL_RTX;
20362   rtx dwarf = NULL_RTX;
20363   rtx tmp, reg, tmp1;
20364   bool return_in_pc;
20365
20366   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20367   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20368     if (saved_regs_mask & (1 << i))
20369       num_regs++;
20370
20371   gcc_assert (num_regs && num_regs <= 16);
20372
20373   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20374      to be popped.  So, if num_regs is even, now it will become odd,
20375      and we can generate pop with PC.  If num_regs is odd, it will be
20376      even now, and ldr with return can be generated for PC.  */
20377   if (return_in_pc)
20378     num_regs--;
20379
20380   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20381
20382   /* Var j iterates over all the registers to gather all the registers in
20383      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20384      A PARALLEL RTX of register-pair is created here, so that pattern for
20385      LDRD can be matched.  As PC is always last register to be popped, and
20386      we have already decremented num_regs if PC, we don't have to worry
20387      about PC in this loop.  */
20388   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20389     if (saved_regs_mask & (1 << j))
20390       {
20391         /* Create RTX for memory load.  */
20392         reg = gen_rtx_REG (SImode, j);
20393         tmp = gen_rtx_SET (SImode,
20394                            reg,
20395                            gen_frame_mem (SImode,
20396                                plus_constant (Pmode,
20397                                               stack_pointer_rtx, 4 * i)));
20398         RTX_FRAME_RELATED_P (tmp) = 1;
20399
20400         if (i % 2 == 0)
20401           {
20402             /* When saved-register index (i) is even, the RTX to be emitted is
20403                yet to be created.  Hence create it first.  The LDRD pattern we
20404                are generating is :
20405                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20406                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20407                where target registers need not be consecutive.  */
20408             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20409             dwarf = NULL_RTX;
20410           }
20411
20412         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20413            added as 0th element and if i is odd, reg_i is added as 1st element
20414            of LDRD pattern shown above.  */
20415         XVECEXP (par, 0, (i % 2)) = tmp;
20416         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20417
20418         if ((i % 2) == 1)
20419           {
20420             /* When saved-register index (i) is odd, RTXs for both the registers
20421                to be loaded are generated in above given LDRD pattern, and the
20422                pattern can be emitted now.  */
20423             par = emit_insn (par);
20424             REG_NOTES (par) = dwarf;
20425             RTX_FRAME_RELATED_P (par) = 1;
20426           }
20427
20428         i++;
20429       }
20430
20431   /* If the number of registers pushed is odd AND return_in_pc is false OR
20432      number of registers are even AND return_in_pc is true, last register is
20433      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20434      then LDR with post increment.  */
20435
20436   /* Increment the stack pointer, based on there being
20437      num_regs 4-byte registers to restore.  */
20438   tmp = gen_rtx_SET (VOIDmode,
20439                      stack_pointer_rtx,
20440                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20441   RTX_FRAME_RELATED_P (tmp) = 1;
20442   tmp = emit_insn (tmp);
20443   if (!return_in_pc)
20444     {
20445       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20446                                    stack_pointer_rtx, stack_pointer_rtx);
20447     }
20448
20449   dwarf = NULL_RTX;
20450
20451   if (((num_regs % 2) == 1 && !return_in_pc)
20452       || ((num_regs % 2) == 0 && return_in_pc))
20453     {
20454       /* Scan for the single register to be popped.  Skip until the saved
20455          register is found.  */
20456       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20457
20458       /* Gen LDR with post increment here.  */
20459       tmp1 = gen_rtx_MEM (SImode,
20460                           gen_rtx_POST_INC (SImode,
20461                                             stack_pointer_rtx));
20462       set_mem_alias_set (tmp1, get_frame_alias_set ());
20463
20464       reg = gen_rtx_REG (SImode, j);
20465       tmp = gen_rtx_SET (SImode, reg, tmp1);
20466       RTX_FRAME_RELATED_P (tmp) = 1;
20467       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20468
20469       if (return_in_pc)
20470         {
20471           /* If return_in_pc, j must be PC_REGNUM.  */
20472           gcc_assert (j == PC_REGNUM);
20473           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20474           XVECEXP (par, 0, 0) = ret_rtx;
20475           XVECEXP (par, 0, 1) = tmp;
20476           par = emit_jump_insn (par);
20477         }
20478       else
20479         {
20480           par = emit_insn (tmp);
20481           REG_NOTES (par) = dwarf;
20482           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20483                                        stack_pointer_rtx, stack_pointer_rtx);
20484         }
20485
20486     }
20487   else if ((num_regs % 2) == 1 && return_in_pc)
20488     {
20489       /* There are 2 registers to be popped.  So, generate the pattern
20490          pop_multiple_with_stack_update_and_return to pop in PC.  */
20491       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20492     }
20493
20494   return;
20495 }
20496
20497 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20498    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20499    offset addressing and then generates one separate stack udpate. This provides
20500    more scheduling freedom, compared to writeback on every load.  However,
20501    if the function returns using load into PC directly
20502    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20503    before the last load.  TODO: Add a peephole optimization to recognize
20504    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20505    peephole optimization to merge the load at stack-offset zero
20506    with the stack update instruction using load with writeback
20507    in post-index addressing mode.  */
20508 static void
20509 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20510 {
20511   int j = 0;
20512   int offset = 0;
20513   rtx par = NULL_RTX;
20514   rtx dwarf = NULL_RTX;
20515   rtx tmp, mem;
20516
20517   /* Restore saved registers.  */
20518   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20519   j = 0;
20520   while (j <= LAST_ARM_REGNUM)
20521     if (saved_regs_mask & (1 << j))
20522       {
20523         if ((j % 2) == 0
20524             && (saved_regs_mask & (1 << (j + 1)))
20525             && (j + 1) != PC_REGNUM)
20526           {
20527             /* Current register and next register form register pair for which
20528                LDRD can be generated. PC is always the last register popped, and
20529                we handle it separately.  */
20530             if (offset > 0)
20531               mem = gen_frame_mem (DImode,
20532                                    plus_constant (Pmode,
20533                                                   stack_pointer_rtx,
20534                                                   offset));
20535             else
20536               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20537
20538             tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20539             tmp = emit_insn (tmp);
20540             RTX_FRAME_RELATED_P (tmp) = 1;
20541
20542             /* Generate dwarf info.  */
20543
20544             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20545                                     gen_rtx_REG (SImode, j),
20546                                     NULL_RTX);
20547             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20548                                     gen_rtx_REG (SImode, j + 1),
20549                                     dwarf);
20550
20551             REG_NOTES (tmp) = dwarf;
20552
20553             offset += 8;
20554             j += 2;
20555           }
20556         else if (j != PC_REGNUM)
20557           {
20558             /* Emit a single word load.  */
20559             if (offset > 0)
20560               mem = gen_frame_mem (SImode,
20561                                    plus_constant (Pmode,
20562                                                   stack_pointer_rtx,
20563                                                   offset));
20564             else
20565               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20566
20567             tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20568             tmp = emit_insn (tmp);
20569             RTX_FRAME_RELATED_P (tmp) = 1;
20570
20571             /* Generate dwarf info.  */
20572             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20573                                               gen_rtx_REG (SImode, j),
20574                                               NULL_RTX);
20575
20576             offset += 4;
20577             j += 1;
20578           }
20579         else /* j == PC_REGNUM */
20580           j++;
20581       }
20582     else
20583       j++;
20584
20585   /* Update the stack.  */
20586   if (offset > 0)
20587     {
20588       tmp = gen_rtx_SET (Pmode,
20589                          stack_pointer_rtx,
20590                          plus_constant (Pmode,
20591                                         stack_pointer_rtx,
20592                                         offset));
20593       tmp = emit_insn (tmp);
20594       arm_add_cfa_adjust_cfa_note (tmp, offset,
20595                                    stack_pointer_rtx, stack_pointer_rtx);
20596       offset = 0;
20597     }
20598
20599   if (saved_regs_mask & (1 << PC_REGNUM))
20600     {
20601       /* Only PC is to be popped.  */
20602       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20603       XVECEXP (par, 0, 0) = ret_rtx;
20604       tmp = gen_rtx_SET (SImode,
20605                          gen_rtx_REG (SImode, PC_REGNUM),
20606                          gen_frame_mem (SImode,
20607                                         gen_rtx_POST_INC (SImode,
20608                                                           stack_pointer_rtx)));
20609       RTX_FRAME_RELATED_P (tmp) = 1;
20610       XVECEXP (par, 0, 1) = tmp;
20611       par = emit_jump_insn (par);
20612
20613       /* Generate dwarf info.  */
20614       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20615                               gen_rtx_REG (SImode, PC_REGNUM),
20616                               NULL_RTX);
20617       REG_NOTES (par) = dwarf;
20618       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20619                                    stack_pointer_rtx, stack_pointer_rtx);
20620     }
20621 }
20622
20623 /* Calculate the size of the return value that is passed in registers.  */
20624 static unsigned
20625 arm_size_return_regs (void)
20626 {
20627   machine_mode mode;
20628
20629   if (crtl->return_rtx != 0)
20630     mode = GET_MODE (crtl->return_rtx);
20631   else
20632     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20633
20634   return GET_MODE_SIZE (mode);
20635 }
20636
20637 /* Return true if the current function needs to save/restore LR.  */
20638 static bool
20639 thumb_force_lr_save (void)
20640 {
20641   return !cfun->machine->lr_save_eliminated
20642          && (!leaf_function_p ()
20643              || thumb_far_jump_used_p ()
20644              || df_regs_ever_live_p (LR_REGNUM));
20645 }
20646
20647 /* We do not know if r3 will be available because
20648    we do have an indirect tailcall happening in this
20649    particular case.  */
20650 static bool
20651 is_indirect_tailcall_p (rtx call)
20652 {
20653   rtx pat = PATTERN (call);
20654
20655   /* Indirect tail call.  */
20656   pat = XVECEXP (pat, 0, 0);
20657   if (GET_CODE (pat) == SET)
20658     pat = SET_SRC (pat);
20659
20660   pat = XEXP (XEXP (pat, 0), 0);
20661   return REG_P (pat);
20662 }
20663
20664 /* Return true if r3 is used by any of the tail call insns in the
20665    current function.  */
20666 static bool
20667 any_sibcall_could_use_r3 (void)
20668 {
20669   edge_iterator ei;
20670   edge e;
20671
20672   if (!crtl->tail_call_emit)
20673     return false;
20674   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20675     if (e->flags & EDGE_SIBCALL)
20676       {
20677         rtx call = BB_END (e->src);
20678         if (!CALL_P (call))
20679           call = prev_nonnote_nondebug_insn (call);
20680         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20681         if (find_regno_fusage (call, USE, 3)
20682             || is_indirect_tailcall_p (call))
20683           return true;
20684       }
20685   return false;
20686 }
20687
20688
20689 /* Compute the distance from register FROM to register TO.
20690    These can be the arg pointer (26), the soft frame pointer (25),
20691    the stack pointer (13) or the hard frame pointer (11).
20692    In thumb mode r7 is used as the soft frame pointer, if needed.
20693    Typical stack layout looks like this:
20694
20695        old stack pointer -> |    |
20696                              ----
20697                             |    | \
20698                             |    |   saved arguments for
20699                             |    |   vararg functions
20700                             |    | /
20701                               --
20702    hard FP & arg pointer -> |    | \
20703                             |    |   stack
20704                             |    |   frame
20705                             |    | /
20706                               --
20707                             |    | \
20708                             |    |   call saved
20709                             |    |   registers
20710       soft frame pointer -> |    | /
20711                               --
20712                             |    | \
20713                             |    |   local
20714                             |    |   variables
20715      locals base pointer -> |    | /
20716                               --
20717                             |    | \
20718                             |    |   outgoing
20719                             |    |   arguments
20720    current stack pointer -> |    | /
20721                               --
20722
20723   For a given function some or all of these stack components
20724   may not be needed, giving rise to the possibility of
20725   eliminating some of the registers.
20726
20727   The values returned by this function must reflect the behavior
20728   of arm_expand_prologue() and arm_compute_save_reg_mask().
20729
20730   The sign of the number returned reflects the direction of stack
20731   growth, so the values are positive for all eliminations except
20732   from the soft frame pointer to the hard frame pointer.
20733
20734   SFP may point just inside the local variables block to ensure correct
20735   alignment.  */
20736
20737
20738 /* Calculate stack offsets.  These are used to calculate register elimination
20739    offsets and in prologue/epilogue code.  Also calculates which registers
20740    should be saved.  */
20741
20742 static arm_stack_offsets *
20743 arm_get_frame_offsets (void)
20744 {
20745   struct arm_stack_offsets *offsets;
20746   unsigned long func_type;
20747   int leaf;
20748   int saved;
20749   int core_saved;
20750   HOST_WIDE_INT frame_size;
20751   int i;
20752
20753   offsets = &cfun->machine->stack_offsets;
20754
20755   /* We need to know if we are a leaf function.  Unfortunately, it
20756      is possible to be called after start_sequence has been called,
20757      which causes get_insns to return the insns for the sequence,
20758      not the function, which will cause leaf_function_p to return
20759      the incorrect result.
20760
20761      to know about leaf functions once reload has completed, and the
20762      frame size cannot be changed after that time, so we can safely
20763      use the cached value.  */
20764
20765   if (reload_completed)
20766     return offsets;
20767
20768   /* Initially this is the size of the local variables.  It will translated
20769      into an offset once we have determined the size of preceding data.  */
20770   frame_size = ROUND_UP_WORD (get_frame_size ());
20771
20772   leaf = leaf_function_p ();
20773
20774   /* Space for variadic functions.  */
20775   offsets->saved_args = crtl->args.pretend_args_size;
20776
20777   /* In Thumb mode this is incorrect, but never used.  */
20778   offsets->frame
20779     = (offsets->saved_args
20780        + arm_compute_static_chain_stack_bytes ()
20781        + (frame_pointer_needed ? 4 : 0));
20782
20783   if (TARGET_32BIT)
20784     {
20785       unsigned int regno;
20786
20787       offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20788       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20789       saved = core_saved;
20790
20791       /* We know that SP will be doubleword aligned on entry, and we must
20792          preserve that condition at any subroutine call.  We also require the
20793          soft frame pointer to be doubleword aligned.  */
20794
20795       if (TARGET_REALLY_IWMMXT)
20796         {
20797           /* Check for the call-saved iWMMXt registers.  */
20798           for (regno = FIRST_IWMMXT_REGNUM;
20799                regno <= LAST_IWMMXT_REGNUM;
20800                regno++)
20801             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20802               saved += 8;
20803         }
20804
20805       func_type = arm_current_func_type ();
20806       /* Space for saved VFP registers.  */
20807       if (! IS_VOLATILE (func_type)
20808           && TARGET_HARD_FLOAT && TARGET_VFP)
20809         saved += arm_get_vfp_saved_size ();
20810     }
20811   else /* TARGET_THUMB1 */
20812     {
20813       offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20814       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20815       saved = core_saved;
20816       if (TARGET_BACKTRACE)
20817         saved += 16;
20818     }
20819
20820   /* Saved registers include the stack frame.  */
20821   offsets->saved_regs
20822     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20823   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20824
20825   /* A leaf function does not need any stack alignment if it has nothing
20826      on the stack.  */
20827   if (leaf && frame_size == 0
20828       /* However if it calls alloca(), we have a dynamically allocated
20829          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20830       && ! cfun->calls_alloca)
20831     {
20832       offsets->outgoing_args = offsets->soft_frame;
20833       offsets->locals_base = offsets->soft_frame;
20834       return offsets;
20835     }
20836
20837   /* Ensure SFP has the correct alignment.  */
20838   if (ARM_DOUBLEWORD_ALIGN
20839       && (offsets->soft_frame & 7))
20840     {
20841       offsets->soft_frame += 4;
20842       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20843          when there is a stack frame as the alignment will be rolled into
20844          the normal stack adjustment.  */
20845       if (frame_size + crtl->outgoing_args_size == 0)
20846         {
20847           int reg = -1;
20848
20849           /* Register r3 is caller-saved.  Normally it does not need to be
20850              saved on entry by the prologue.  However if we choose to save
20851              it for padding then we may confuse the compiler into thinking
20852              a prologue sequence is required when in fact it is not.  This
20853              will occur when shrink-wrapping if r3 is used as a scratch
20854              register and there are no other callee-saved writes.
20855
20856              This situation can be avoided when other callee-saved registers
20857              are available and r3 is not mandatory if we choose a callee-saved
20858              register for padding.  */
20859           bool prefer_callee_reg_p = false;
20860
20861           /* If it is safe to use r3, then do so.  This sometimes
20862              generates better code on Thumb-2 by avoiding the need to
20863              use 32-bit push/pop instructions.  */
20864           if (! any_sibcall_could_use_r3 ()
20865               && arm_size_return_regs () <= 12
20866               && (offsets->saved_regs_mask & (1 << 3)) == 0
20867               && (TARGET_THUMB2
20868                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20869             {
20870               reg = 3;
20871               if (!TARGET_THUMB2)
20872                 prefer_callee_reg_p = true;
20873             }
20874           if (reg == -1
20875               || prefer_callee_reg_p)
20876             {
20877               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20878                 {
20879                   /* Avoid fixed registers; they may be changed at
20880                      arbitrary times so it's unsafe to restore them
20881                      during the epilogue.  */
20882                   if (!fixed_regs[i]
20883                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20884                     {
20885                       reg = i;
20886                       break;
20887                     }
20888                 }
20889             }
20890
20891           if (reg != -1)
20892             {
20893               offsets->saved_regs += 4;
20894               offsets->saved_regs_mask |= (1 << reg);
20895             }
20896         }
20897     }
20898
20899   offsets->locals_base = offsets->soft_frame + frame_size;
20900   offsets->outgoing_args = (offsets->locals_base
20901                             + crtl->outgoing_args_size);
20902
20903   if (ARM_DOUBLEWORD_ALIGN)
20904     {
20905       /* Ensure SP remains doubleword aligned.  */
20906       if (offsets->outgoing_args & 7)
20907         offsets->outgoing_args += 4;
20908       gcc_assert (!(offsets->outgoing_args & 7));
20909     }
20910
20911   return offsets;
20912 }
20913
20914
20915 /* Calculate the relative offsets for the different stack pointers.  Positive
20916    offsets are in the direction of stack growth.  */
20917
20918 HOST_WIDE_INT
20919 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20920 {
20921   arm_stack_offsets *offsets;
20922
20923   offsets = arm_get_frame_offsets ();
20924
20925   /* OK, now we have enough information to compute the distances.
20926      There must be an entry in these switch tables for each pair
20927      of registers in ELIMINABLE_REGS, even if some of the entries
20928      seem to be redundant or useless.  */
20929   switch (from)
20930     {
20931     case ARG_POINTER_REGNUM:
20932       switch (to)
20933         {
20934         case THUMB_HARD_FRAME_POINTER_REGNUM:
20935           return 0;
20936
20937         case FRAME_POINTER_REGNUM:
20938           /* This is the reverse of the soft frame pointer
20939              to hard frame pointer elimination below.  */
20940           return offsets->soft_frame - offsets->saved_args;
20941
20942         case ARM_HARD_FRAME_POINTER_REGNUM:
20943           /* This is only non-zero in the case where the static chain register
20944              is stored above the frame.  */
20945           return offsets->frame - offsets->saved_args - 4;
20946
20947         case STACK_POINTER_REGNUM:
20948           /* If nothing has been pushed on the stack at all
20949              then this will return -4.  This *is* correct!  */
20950           return offsets->outgoing_args - (offsets->saved_args + 4);
20951
20952         default:
20953           gcc_unreachable ();
20954         }
20955       gcc_unreachable ();
20956
20957     case FRAME_POINTER_REGNUM:
20958       switch (to)
20959         {
20960         case THUMB_HARD_FRAME_POINTER_REGNUM:
20961           return 0;
20962
20963         case ARM_HARD_FRAME_POINTER_REGNUM:
20964           /* The hard frame pointer points to the top entry in the
20965              stack frame.  The soft frame pointer to the bottom entry
20966              in the stack frame.  If there is no stack frame at all,
20967              then they are identical.  */
20968
20969           return offsets->frame - offsets->soft_frame;
20970
20971         case STACK_POINTER_REGNUM:
20972           return offsets->outgoing_args - offsets->soft_frame;
20973
20974         default:
20975           gcc_unreachable ();
20976         }
20977       gcc_unreachable ();
20978
20979     default:
20980       /* You cannot eliminate from the stack pointer.
20981          In theory you could eliminate from the hard frame
20982          pointer to the stack pointer, but this will never
20983          happen, since if a stack frame is not needed the
20984          hard frame pointer will never be used.  */
20985       gcc_unreachable ();
20986     }
20987 }
20988
20989 /* Given FROM and TO register numbers, say whether this elimination is
20990    allowed.  Frame pointer elimination is automatically handled.
20991
20992    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
20993    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
20994    pointer, we must eliminate FRAME_POINTER_REGNUM into
20995    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20996    ARG_POINTER_REGNUM.  */
20997
20998 bool
20999 arm_can_eliminate (const int from, const int to)
21000 {
21001   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21002           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21003           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21004           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21005            true);
21006 }
21007
21008 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21009    number of bytes pushed.  */
21010
21011 static int
21012 arm_save_coproc_regs(void)
21013 {
21014   int saved_size = 0;
21015   unsigned reg;
21016   unsigned start_reg;
21017   rtx insn;
21018
21019   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21020     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21021       {
21022         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21023         insn = gen_rtx_MEM (V2SImode, insn);
21024         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21025         RTX_FRAME_RELATED_P (insn) = 1;
21026         saved_size += 8;
21027       }
21028
21029   if (TARGET_HARD_FLOAT && TARGET_VFP)
21030     {
21031       start_reg = FIRST_VFP_REGNUM;
21032
21033       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21034         {
21035           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21036               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21037             {
21038               if (start_reg != reg)
21039                 saved_size += vfp_emit_fstmd (start_reg,
21040                                               (reg - start_reg) / 2);
21041               start_reg = reg + 2;
21042             }
21043         }
21044       if (start_reg != reg)
21045         saved_size += vfp_emit_fstmd (start_reg,
21046                                       (reg - start_reg) / 2);
21047     }
21048   return saved_size;
21049 }
21050
21051
21052 /* Set the Thumb frame pointer from the stack pointer.  */
21053
21054 static void
21055 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21056 {
21057   HOST_WIDE_INT amount;
21058   rtx insn, dwarf;
21059
21060   amount = offsets->outgoing_args - offsets->locals_base;
21061   if (amount < 1024)
21062     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21063                                   stack_pointer_rtx, GEN_INT (amount)));
21064   else
21065     {
21066       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21067       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21068          expects the first two operands to be the same.  */
21069       if (TARGET_THUMB2)
21070         {
21071           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21072                                         stack_pointer_rtx,
21073                                         hard_frame_pointer_rtx));
21074         }
21075       else
21076         {
21077           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21078                                         hard_frame_pointer_rtx,
21079                                         stack_pointer_rtx));
21080         }
21081       dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21082                            plus_constant (Pmode, stack_pointer_rtx, amount));
21083       RTX_FRAME_RELATED_P (dwarf) = 1;
21084       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21085     }
21086
21087   RTX_FRAME_RELATED_P (insn) = 1;
21088 }
21089
21090 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21091    function.  */
21092 void
21093 arm_expand_prologue (void)
21094 {
21095   rtx amount;
21096   rtx insn;
21097   rtx ip_rtx;
21098   unsigned long live_regs_mask;
21099   unsigned long func_type;
21100   int fp_offset = 0;
21101   int saved_pretend_args = 0;
21102   int saved_regs = 0;
21103   unsigned HOST_WIDE_INT args_to_push;
21104   arm_stack_offsets *offsets;
21105
21106   func_type = arm_current_func_type ();
21107
21108   /* Naked functions don't have prologues.  */
21109   if (IS_NAKED (func_type))
21110     return;
21111
21112   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21113   args_to_push = crtl->args.pretend_args_size;
21114
21115   /* Compute which register we will have to save onto the stack.  */
21116   offsets = arm_get_frame_offsets ();
21117   live_regs_mask = offsets->saved_regs_mask;
21118
21119   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21120
21121   if (IS_STACKALIGN (func_type))
21122     {
21123       rtx r0, r1;
21124
21125       /* Handle a word-aligned stack pointer.  We generate the following:
21126
21127           mov r0, sp
21128           bic r1, r0, #7
21129           mov sp, r1
21130           <save and restore r0 in normal prologue/epilogue>
21131           mov sp, r0
21132           bx lr
21133
21134          The unwinder doesn't need to know about the stack realignment.
21135          Just tell it we saved SP in r0.  */
21136       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21137
21138       r0 = gen_rtx_REG (SImode, 0);
21139       r1 = gen_rtx_REG (SImode, 1);
21140
21141       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21142       RTX_FRAME_RELATED_P (insn) = 1;
21143       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21144
21145       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21146
21147       /* ??? The CFA changes here, which may cause GDB to conclude that it
21148          has entered a different function.  That said, the unwind info is
21149          correct, individually, before and after this instruction because
21150          we've described the save of SP, which will override the default
21151          handling of SP as restoring from the CFA.  */
21152       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21153     }
21154
21155   /* For APCS frames, if IP register is clobbered
21156      when creating frame, save that register in a special
21157      way.  */
21158   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21159     {
21160       if (IS_INTERRUPT (func_type))
21161         {
21162           /* Interrupt functions must not corrupt any registers.
21163              Creating a frame pointer however, corrupts the IP
21164              register, so we must push it first.  */
21165           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21166
21167           /* Do not set RTX_FRAME_RELATED_P on this insn.
21168              The dwarf stack unwinding code only wants to see one
21169              stack decrement per function, and this is not it.  If
21170              this instruction is labeled as being part of the frame
21171              creation sequence then dwarf2out_frame_debug_expr will
21172              die when it encounters the assignment of IP to FP
21173              later on, since the use of SP here establishes SP as
21174              the CFA register and not IP.
21175
21176              Anyway this instruction is not really part of the stack
21177              frame creation although it is part of the prologue.  */
21178         }
21179       else if (IS_NESTED (func_type))
21180         {
21181           /* The static chain register is the same as the IP register
21182              used as a scratch register during stack frame creation.
21183              To get around this need to find somewhere to store IP
21184              whilst the frame is being created.  We try the following
21185              places in order:
21186
21187                1. The last argument register r3 if it is available.
21188                2. A slot on the stack above the frame if there are no
21189                   arguments to push onto the stack.
21190                3. Register r3 again, after pushing the argument registers
21191                   onto the stack, if this is a varargs function.
21192                4. The last slot on the stack created for the arguments to
21193                   push, if this isn't a varargs function.
21194
21195              Note - we only need to tell the dwarf2 backend about the SP
21196              adjustment in the second variant; the static chain register
21197              doesn't need to be unwound, as it doesn't contain a value
21198              inherited from the caller.  */
21199
21200           if (!arm_r3_live_at_start_p ())
21201             insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21202           else if (args_to_push == 0)
21203             {
21204               rtx addr, dwarf;
21205
21206               gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21207               saved_regs += 4;
21208
21209               addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21210               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21211               fp_offset = 4;
21212
21213               /* Just tell the dwarf backend that we adjusted SP.  */
21214               dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21215                                    plus_constant (Pmode, stack_pointer_rtx,
21216                                                   -fp_offset));
21217               RTX_FRAME_RELATED_P (insn) = 1;
21218               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21219             }
21220           else
21221             {
21222               /* Store the args on the stack.  */
21223               if (cfun->machine->uses_anonymous_args)
21224                 {
21225                   insn
21226                     = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21227                                            (0xf0 >> (args_to_push / 4)) & 0xf);
21228                   emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21229                   saved_pretend_args = 1;
21230                 }
21231               else
21232                 {
21233                   rtx addr, dwarf;
21234
21235                   if (args_to_push == 4)
21236                     addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21237                   else
21238                     addr
21239                       = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21240                                             plus_constant (Pmode,
21241                                                            stack_pointer_rtx,
21242                                                            -args_to_push));
21243
21244                   insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21245
21246                   /* Just tell the dwarf backend that we adjusted SP.  */
21247                   dwarf
21248                     = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21249                                    plus_constant (Pmode, stack_pointer_rtx,
21250                                                   -args_to_push));
21251                   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21252                 }
21253
21254               RTX_FRAME_RELATED_P (insn) = 1;
21255               fp_offset = args_to_push;
21256               args_to_push = 0;
21257             }
21258         }
21259
21260       insn = emit_set_insn (ip_rtx,
21261                             plus_constant (Pmode, stack_pointer_rtx,
21262                                            fp_offset));
21263       RTX_FRAME_RELATED_P (insn) = 1;
21264     }
21265
21266   if (args_to_push)
21267     {
21268       /* Push the argument registers, or reserve space for them.  */
21269       if (cfun->machine->uses_anonymous_args)
21270         insn = emit_multi_reg_push
21271           ((0xf0 >> (args_to_push / 4)) & 0xf,
21272            (0xf0 >> (args_to_push / 4)) & 0xf);
21273       else
21274         insn = emit_insn
21275           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21276                        GEN_INT (- args_to_push)));
21277       RTX_FRAME_RELATED_P (insn) = 1;
21278     }
21279
21280   /* If this is an interrupt service routine, and the link register
21281      is going to be pushed, and we're not generating extra
21282      push of IP (needed when frame is needed and frame layout if apcs),
21283      subtracting four from LR now will mean that the function return
21284      can be done with a single instruction.  */
21285   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21286       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21287       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21288       && TARGET_ARM)
21289     {
21290       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21291
21292       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21293     }
21294
21295   if (live_regs_mask)
21296     {
21297       unsigned long dwarf_regs_mask = live_regs_mask;
21298
21299       saved_regs += bit_count (live_regs_mask) * 4;
21300       if (optimize_size && !frame_pointer_needed
21301           && saved_regs == offsets->saved_regs - offsets->saved_args)
21302         {
21303           /* If no coprocessor registers are being pushed and we don't have
21304              to worry about a frame pointer then push extra registers to
21305              create the stack frame.  This is done is a way that does not
21306              alter the frame layout, so is independent of the epilogue.  */
21307           int n;
21308           int frame;
21309           n = 0;
21310           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21311             n++;
21312           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21313           if (frame && n * 4 >= frame)
21314             {
21315               n = frame / 4;
21316               live_regs_mask |= (1 << n) - 1;
21317               saved_regs += frame;
21318             }
21319         }
21320
21321       if (TARGET_LDRD
21322           && current_tune->prefer_ldrd_strd
21323           && !optimize_function_for_size_p (cfun))
21324         {
21325           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21326           if (TARGET_THUMB2)
21327             thumb2_emit_strd_push (live_regs_mask);
21328           else if (TARGET_ARM
21329                    && !TARGET_APCS_FRAME
21330                    && !IS_INTERRUPT (func_type))
21331             arm_emit_strd_push (live_regs_mask);
21332           else
21333             {
21334               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21335               RTX_FRAME_RELATED_P (insn) = 1;
21336             }
21337         }
21338       else
21339         {
21340           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21341           RTX_FRAME_RELATED_P (insn) = 1;
21342         }
21343     }
21344
21345   if (! IS_VOLATILE (func_type))
21346     saved_regs += arm_save_coproc_regs ();
21347
21348   if (frame_pointer_needed && TARGET_ARM)
21349     {
21350       /* Create the new frame pointer.  */
21351       if (TARGET_APCS_FRAME)
21352         {
21353           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21354           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21355           RTX_FRAME_RELATED_P (insn) = 1;
21356
21357           if (IS_NESTED (func_type))
21358             {
21359               /* Recover the static chain register.  */
21360               if (!arm_r3_live_at_start_p () || saved_pretend_args)
21361                 insn = gen_rtx_REG (SImode, 3);
21362               else
21363                 {
21364                   insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21365                   insn = gen_frame_mem (SImode, insn);
21366                 }
21367               emit_set_insn (ip_rtx, insn);
21368               /* Add a USE to stop propagate_one_insn() from barfing.  */
21369               emit_insn (gen_force_register_use (ip_rtx));
21370             }
21371         }
21372       else
21373         {
21374           insn = GEN_INT (saved_regs - 4);
21375           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21376                                         stack_pointer_rtx, insn));
21377           RTX_FRAME_RELATED_P (insn) = 1;
21378         }
21379     }
21380
21381   if (flag_stack_usage_info)
21382     current_function_static_stack_size
21383       = offsets->outgoing_args - offsets->saved_args;
21384
21385   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21386     {
21387       /* This add can produce multiple insns for a large constant, so we
21388          need to get tricky.  */
21389       rtx_insn *last = get_last_insn ();
21390
21391       amount = GEN_INT (offsets->saved_args + saved_regs
21392                         - offsets->outgoing_args);
21393
21394       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21395                                     amount));
21396       do
21397         {
21398           last = last ? NEXT_INSN (last) : get_insns ();
21399           RTX_FRAME_RELATED_P (last) = 1;
21400         }
21401       while (last != insn);
21402
21403       /* If the frame pointer is needed, emit a special barrier that
21404          will prevent the scheduler from moving stores to the frame
21405          before the stack adjustment.  */
21406       if (frame_pointer_needed)
21407         insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21408                                          hard_frame_pointer_rtx));
21409     }
21410
21411
21412   if (frame_pointer_needed && TARGET_THUMB2)
21413     thumb_set_frame_pointer (offsets);
21414
21415   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21416     {
21417       unsigned long mask;
21418
21419       mask = live_regs_mask;
21420       mask &= THUMB2_WORK_REGS;
21421       if (!IS_NESTED (func_type))
21422         mask |= (1 << IP_REGNUM);
21423       arm_load_pic_register (mask);
21424     }
21425
21426   /* If we are profiling, make sure no instructions are scheduled before
21427      the call to mcount.  Similarly if the user has requested no
21428      scheduling in the prolog.  Similarly if we want non-call exceptions
21429      using the EABI unwinder, to prevent faulting instructions from being
21430      swapped with a stack adjustment.  */
21431   if (crtl->profile || !TARGET_SCHED_PROLOG
21432       || (arm_except_unwind_info (&global_options) == UI_TARGET
21433           && cfun->can_throw_non_call_exceptions))
21434     emit_insn (gen_blockage ());
21435
21436   /* If the link register is being kept alive, with the return address in it,
21437      then make sure that it does not get reused by the ce2 pass.  */
21438   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21439     cfun->machine->lr_save_eliminated = 1;
21440 }
21441 \f
21442 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21443 static void
21444 arm_print_condition (FILE *stream)
21445 {
21446   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21447     {
21448       /* Branch conversion is not implemented for Thumb-2.  */
21449       if (TARGET_THUMB)
21450         {
21451           output_operand_lossage ("predicated Thumb instruction");
21452           return;
21453         }
21454       if (current_insn_predicate != NULL)
21455         {
21456           output_operand_lossage
21457             ("predicated instruction in conditional sequence");
21458           return;
21459         }
21460
21461       fputs (arm_condition_codes[arm_current_cc], stream);
21462     }
21463   else if (current_insn_predicate)
21464     {
21465       enum arm_cond_code code;
21466
21467       if (TARGET_THUMB1)
21468         {
21469           output_operand_lossage ("predicated Thumb instruction");
21470           return;
21471         }
21472
21473       code = get_arm_condition_code (current_insn_predicate);
21474       fputs (arm_condition_codes[code], stream);
21475     }
21476 }
21477
21478
21479 /* Globally reserved letters: acln
21480    Puncutation letters currently used: @_|?().!#
21481    Lower case letters currently used: bcdefhimpqtvwxyz
21482    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21483    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21484
21485    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21486
21487    If CODE is 'd', then the X is a condition operand and the instruction
21488    should only be executed if the condition is true.
21489    if CODE is 'D', then the X is a condition operand and the instruction
21490    should only be executed if the condition is false: however, if the mode
21491    of the comparison is CCFPEmode, then always execute the instruction -- we
21492    do this because in these circumstances !GE does not necessarily imply LT;
21493    in these cases the instruction pattern will take care to make sure that
21494    an instruction containing %d will follow, thereby undoing the effects of
21495    doing this instruction unconditionally.
21496    If CODE is 'N' then X is a floating point operand that must be negated
21497    before output.
21498    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21499    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21500 static void
21501 arm_print_operand (FILE *stream, rtx x, int code)
21502 {
21503   switch (code)
21504     {
21505     case '@':
21506       fputs (ASM_COMMENT_START, stream);
21507       return;
21508
21509     case '_':
21510       fputs (user_label_prefix, stream);
21511       return;
21512
21513     case '|':
21514       fputs (REGISTER_PREFIX, stream);
21515       return;
21516
21517     case '?':
21518       arm_print_condition (stream);
21519       return;
21520
21521     case '(':
21522       /* Nothing in unified syntax, otherwise the current condition code.  */
21523       if (!TARGET_UNIFIED_ASM)
21524         arm_print_condition (stream);
21525       break;
21526
21527     case ')':
21528       /* The current condition code in unified syntax, otherwise nothing.  */
21529       if (TARGET_UNIFIED_ASM)
21530         arm_print_condition (stream);
21531       break;
21532
21533     case '.':
21534       /* The current condition code for a condition code setting instruction.
21535          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21536       if (TARGET_UNIFIED_ASM)
21537         {
21538           fputc('s', stream);
21539           arm_print_condition (stream);
21540         }
21541       else
21542         {
21543           arm_print_condition (stream);
21544           fputc('s', stream);
21545         }
21546       return;
21547
21548     case '!':
21549       /* If the instruction is conditionally executed then print
21550          the current condition code, otherwise print 's'.  */
21551       gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21552       if (current_insn_predicate)
21553         arm_print_condition (stream);
21554       else
21555         fputc('s', stream);
21556       break;
21557
21558     /* %# is a "break" sequence. It doesn't output anything, but is used to
21559        separate e.g. operand numbers from following text, if that text consists
21560        of further digits which we don't want to be part of the operand
21561        number.  */
21562     case '#':
21563       return;
21564
21565     case 'N':
21566       {
21567         REAL_VALUE_TYPE r;
21568         REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21569         r = real_value_negate (&r);
21570         fprintf (stream, "%s", fp_const_from_val (&r));
21571       }
21572       return;
21573
21574     /* An integer or symbol address without a preceding # sign.  */
21575     case 'c':
21576       switch (GET_CODE (x))
21577         {
21578         case CONST_INT:
21579           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21580           break;
21581
21582         case SYMBOL_REF:
21583           output_addr_const (stream, x);
21584           break;
21585
21586         case CONST:
21587           if (GET_CODE (XEXP (x, 0)) == PLUS
21588               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21589             {
21590               output_addr_const (stream, x);
21591               break;
21592             }
21593           /* Fall through.  */
21594
21595         default:
21596           output_operand_lossage ("Unsupported operand for code '%c'", code);
21597         }
21598       return;
21599
21600     /* An integer that we want to print in HEX.  */
21601     case 'x':
21602       switch (GET_CODE (x))
21603         {
21604         case CONST_INT:
21605           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21606           break;
21607
21608         default:
21609           output_operand_lossage ("Unsupported operand for code '%c'", code);
21610         }
21611       return;
21612
21613     case 'B':
21614       if (CONST_INT_P (x))
21615         {
21616           HOST_WIDE_INT val;
21617           val = ARM_SIGN_EXTEND (~INTVAL (x));
21618           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21619         }
21620       else
21621         {
21622           putc ('~', stream);
21623           output_addr_const (stream, x);
21624         }
21625       return;
21626
21627     case 'b':
21628       /* Print the log2 of a CONST_INT.  */
21629       {
21630         HOST_WIDE_INT val;
21631
21632         if (!CONST_INT_P (x)
21633             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21634           output_operand_lossage ("Unsupported operand for code '%c'", code);
21635         else
21636           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21637       }
21638       return;
21639
21640     case 'L':
21641       /* The low 16 bits of an immediate constant.  */
21642       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21643       return;
21644
21645     case 'i':
21646       fprintf (stream, "%s", arithmetic_instr (x, 1));
21647       return;
21648
21649     case 'I':
21650       fprintf (stream, "%s", arithmetic_instr (x, 0));
21651       return;
21652
21653     case 'S':
21654       {
21655         HOST_WIDE_INT val;
21656         const char *shift;
21657
21658         shift = shift_op (x, &val);
21659
21660         if (shift)
21661           {
21662             fprintf (stream, ", %s ", shift);
21663             if (val == -1)
21664               arm_print_operand (stream, XEXP (x, 1), 0);
21665             else
21666               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21667           }
21668       }
21669       return;
21670
21671       /* An explanation of the 'Q', 'R' and 'H' register operands:
21672
21673          In a pair of registers containing a DI or DF value the 'Q'
21674          operand returns the register number of the register containing
21675          the least significant part of the value.  The 'R' operand returns
21676          the register number of the register containing the most
21677          significant part of the value.
21678
21679          The 'H' operand returns the higher of the two register numbers.
21680          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21681          same as the 'Q' operand, since the most significant part of the
21682          value is held in the lower number register.  The reverse is true
21683          on systems where WORDS_BIG_ENDIAN is false.
21684
21685          The purpose of these operands is to distinguish between cases
21686          where the endian-ness of the values is important (for example
21687          when they are added together), and cases where the endian-ness
21688          is irrelevant, but the order of register operations is important.
21689          For example when loading a value from memory into a register
21690          pair, the endian-ness does not matter.  Provided that the value
21691          from the lower memory address is put into the lower numbered
21692          register, and the value from the higher address is put into the
21693          higher numbered register, the load will work regardless of whether
21694          the value being loaded is big-wordian or little-wordian.  The
21695          order of the two register loads can matter however, if the address
21696          of the memory location is actually held in one of the registers
21697          being overwritten by the load.
21698
21699          The 'Q' and 'R' constraints are also available for 64-bit
21700          constants.  */
21701     case 'Q':
21702       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21703         {
21704           rtx part = gen_lowpart (SImode, x);
21705           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21706           return;
21707         }
21708
21709       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21710         {
21711           output_operand_lossage ("invalid operand for code '%c'", code);
21712           return;
21713         }
21714
21715       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21716       return;
21717
21718     case 'R':
21719       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21720         {
21721           machine_mode mode = GET_MODE (x);
21722           rtx part;
21723
21724           if (mode == VOIDmode)
21725             mode = DImode;
21726           part = gen_highpart_mode (SImode, mode, x);
21727           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21728           return;
21729         }
21730
21731       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21732         {
21733           output_operand_lossage ("invalid operand for code '%c'", code);
21734           return;
21735         }
21736
21737       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21738       return;
21739
21740     case 'H':
21741       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21742         {
21743           output_operand_lossage ("invalid operand for code '%c'", code);
21744           return;
21745         }
21746
21747       asm_fprintf (stream, "%r", REGNO (x) + 1);
21748       return;
21749
21750     case 'J':
21751       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21752         {
21753           output_operand_lossage ("invalid operand for code '%c'", code);
21754           return;
21755         }
21756
21757       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21758       return;
21759
21760     case 'K':
21761       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21762         {
21763           output_operand_lossage ("invalid operand for code '%c'", code);
21764           return;
21765         }
21766
21767       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21768       return;
21769
21770     case 'm':
21771       asm_fprintf (stream, "%r",
21772                    REG_P (XEXP (x, 0))
21773                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21774       return;
21775
21776     case 'M':
21777       asm_fprintf (stream, "{%r-%r}",
21778                    REGNO (x),
21779                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21780       return;
21781
21782     /* Like 'M', but writing doubleword vector registers, for use by Neon
21783        insns.  */
21784     case 'h':
21785       {
21786         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21787         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21788         if (numregs == 1)
21789           asm_fprintf (stream, "{d%d}", regno);
21790         else
21791           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21792       }
21793       return;
21794
21795     case 'd':
21796       /* CONST_TRUE_RTX means always -- that's the default.  */
21797       if (x == const_true_rtx)
21798         return;
21799
21800       if (!COMPARISON_P (x))
21801         {
21802           output_operand_lossage ("invalid operand for code '%c'", code);
21803           return;
21804         }
21805
21806       fputs (arm_condition_codes[get_arm_condition_code (x)],
21807              stream);
21808       return;
21809
21810     case 'D':
21811       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
21812          want to do that.  */
21813       if (x == const_true_rtx)
21814         {
21815           output_operand_lossage ("instruction never executed");
21816           return;
21817         }
21818       if (!COMPARISON_P (x))
21819         {
21820           output_operand_lossage ("invalid operand for code '%c'", code);
21821           return;
21822         }
21823
21824       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21825                                  (get_arm_condition_code (x))],
21826              stream);
21827       return;
21828
21829     case 's':
21830     case 'V':
21831     case 'W':
21832     case 'X':
21833     case 'Y':
21834     case 'Z':
21835       /* Former Maverick support, removed after GCC-4.7.  */
21836       output_operand_lossage ("obsolete Maverick format code '%c'", code);
21837       return;
21838
21839     case 'U':
21840       if (!REG_P (x)
21841           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21842           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21843         /* Bad value for wCG register number.  */
21844         {
21845           output_operand_lossage ("invalid operand for code '%c'", code);
21846           return;
21847         }
21848
21849       else
21850         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21851       return;
21852
21853       /* Print an iWMMXt control register name.  */
21854     case 'w':
21855       if (!CONST_INT_P (x)
21856           || INTVAL (x) < 0
21857           || INTVAL (x) >= 16)
21858         /* Bad value for wC register number.  */
21859         {
21860           output_operand_lossage ("invalid operand for code '%c'", code);
21861           return;
21862         }
21863
21864       else
21865         {
21866           static const char * wc_reg_names [16] =
21867             {
21868               "wCID",  "wCon",  "wCSSF", "wCASF",
21869               "wC4",   "wC5",   "wC6",   "wC7",
21870               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21871               "wC12",  "wC13",  "wC14",  "wC15"
21872             };
21873
21874           fputs (wc_reg_names [INTVAL (x)], stream);
21875         }
21876       return;
21877
21878     /* Print the high single-precision register of a VFP double-precision
21879        register.  */
21880     case 'p':
21881       {
21882         machine_mode mode = GET_MODE (x);
21883         int regno;
21884
21885         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21886           {
21887             output_operand_lossage ("invalid operand for code '%c'", code);
21888             return;
21889           }
21890
21891         regno = REGNO (x);
21892         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21893           {
21894             output_operand_lossage ("invalid operand for code '%c'", code);
21895             return;
21896           }
21897
21898         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21899       }
21900       return;
21901
21902     /* Print a VFP/Neon double precision or quad precision register name.  */
21903     case 'P':
21904     case 'q':
21905       {
21906         machine_mode mode = GET_MODE (x);
21907         int is_quad = (code == 'q');
21908         int regno;
21909
21910         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21911           {
21912             output_operand_lossage ("invalid operand for code '%c'", code);
21913             return;
21914           }
21915
21916         if (!REG_P (x)
21917             || !IS_VFP_REGNUM (REGNO (x)))
21918           {
21919             output_operand_lossage ("invalid operand for code '%c'", code);
21920             return;
21921           }
21922
21923         regno = REGNO (x);
21924         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21925             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21926           {
21927             output_operand_lossage ("invalid operand for code '%c'", code);
21928             return;
21929           }
21930
21931         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21932           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21933       }
21934       return;
21935
21936     /* These two codes print the low/high doubleword register of a Neon quad
21937        register, respectively.  For pair-structure types, can also print
21938        low/high quadword registers.  */
21939     case 'e':
21940     case 'f':
21941       {
21942         machine_mode mode = GET_MODE (x);
21943         int regno;
21944
21945         if ((GET_MODE_SIZE (mode) != 16
21946              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21947           {
21948             output_operand_lossage ("invalid operand for code '%c'", code);
21949             return;
21950           }
21951
21952         regno = REGNO (x);
21953         if (!NEON_REGNO_OK_FOR_QUAD (regno))
21954           {
21955             output_operand_lossage ("invalid operand for code '%c'", code);
21956             return;
21957           }
21958
21959         if (GET_MODE_SIZE (mode) == 16)
21960           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21961                                   + (code == 'f' ? 1 : 0));
21962         else
21963           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21964                                   + (code == 'f' ? 1 : 0));
21965       }
21966       return;
21967
21968     /* Print a VFPv3 floating-point constant, represented as an integer
21969        index.  */
21970     case 'G':
21971       {
21972         int index = vfp3_const_double_index (x);
21973         gcc_assert (index != -1);
21974         fprintf (stream, "%d", index);
21975       }
21976       return;
21977
21978     /* Print bits representing opcode features for Neon.
21979
21980        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
21981        and polynomials as unsigned.
21982
21983        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21984
21985        Bit 2 is 1 for rounding functions, 0 otherwise.  */
21986
21987     /* Identify the type as 's', 'u', 'p' or 'f'.  */
21988     case 'T':
21989       {
21990         HOST_WIDE_INT bits = INTVAL (x);
21991         fputc ("uspf"[bits & 3], stream);
21992       }
21993       return;
21994
21995     /* Likewise, but signed and unsigned integers are both 'i'.  */
21996     case 'F':
21997       {
21998         HOST_WIDE_INT bits = INTVAL (x);
21999         fputc ("iipf"[bits & 3], stream);
22000       }
22001       return;
22002
22003     /* As for 'T', but emit 'u' instead of 'p'.  */
22004     case 't':
22005       {
22006         HOST_WIDE_INT bits = INTVAL (x);
22007         fputc ("usuf"[bits & 3], stream);
22008       }
22009       return;
22010
22011     /* Bit 2: rounding (vs none).  */
22012     case 'O':
22013       {
22014         HOST_WIDE_INT bits = INTVAL (x);
22015         fputs ((bits & 4) != 0 ? "r" : "", stream);
22016       }
22017       return;
22018
22019     /* Memory operand for vld1/vst1 instruction.  */
22020     case 'A':
22021       {
22022         rtx addr;
22023         bool postinc = FALSE;
22024         rtx postinc_reg = NULL;
22025         unsigned align, memsize, align_bits;
22026
22027         gcc_assert (MEM_P (x));
22028         addr = XEXP (x, 0);
22029         if (GET_CODE (addr) == POST_INC)
22030           {
22031             postinc = 1;
22032             addr = XEXP (addr, 0);
22033           }
22034         if (GET_CODE (addr) == POST_MODIFY)
22035           {
22036             postinc_reg = XEXP( XEXP (addr, 1), 1);
22037             addr = XEXP (addr, 0);
22038           }
22039         asm_fprintf (stream, "[%r", REGNO (addr));
22040
22041         /* We know the alignment of this access, so we can emit a hint in the
22042            instruction (for some alignments) as an aid to the memory subsystem
22043            of the target.  */
22044         align = MEM_ALIGN (x) >> 3;
22045         memsize = MEM_SIZE (x);
22046
22047         /* Only certain alignment specifiers are supported by the hardware.  */
22048         if (memsize == 32 && (align % 32) == 0)
22049           align_bits = 256;
22050         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22051           align_bits = 128;
22052         else if (memsize >= 8 && (align % 8) == 0)
22053           align_bits = 64;
22054         else
22055           align_bits = 0;
22056
22057         if (align_bits != 0)
22058           asm_fprintf (stream, ":%d", align_bits);
22059
22060         asm_fprintf (stream, "]");
22061
22062         if (postinc)
22063           fputs("!", stream);
22064         if (postinc_reg)
22065           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22066       }
22067       return;
22068
22069     case 'C':
22070       {
22071         rtx addr;
22072
22073         gcc_assert (MEM_P (x));
22074         addr = XEXP (x, 0);
22075         gcc_assert (REG_P (addr));
22076         asm_fprintf (stream, "[%r]", REGNO (addr));
22077       }
22078       return;
22079
22080     /* Translate an S register number into a D register number and element index.  */
22081     case 'y':
22082       {
22083         machine_mode mode = GET_MODE (x);
22084         int regno;
22085
22086         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22087           {
22088             output_operand_lossage ("invalid operand for code '%c'", code);
22089             return;
22090           }
22091
22092         regno = REGNO (x);
22093         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22094           {
22095             output_operand_lossage ("invalid operand for code '%c'", code);
22096             return;
22097           }
22098
22099         regno = regno - FIRST_VFP_REGNUM;
22100         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22101       }
22102       return;
22103
22104     case 'v':
22105         gcc_assert (CONST_DOUBLE_P (x));
22106         int result;
22107         result = vfp3_const_double_for_fract_bits (x);
22108         if (result == 0)
22109           result = vfp3_const_double_for_bits (x);
22110         fprintf (stream, "#%d", result);
22111         return;
22112
22113     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22114        number into a D register number and element index.  */
22115     case 'z':
22116       {
22117         machine_mode mode = GET_MODE (x);
22118         int regno;
22119
22120         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22121           {
22122             output_operand_lossage ("invalid operand for code '%c'", code);
22123             return;
22124           }
22125
22126         regno = REGNO (x);
22127         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22128           {
22129             output_operand_lossage ("invalid operand for code '%c'", code);
22130             return;
22131           }
22132
22133         regno = regno - FIRST_VFP_REGNUM;
22134         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22135       }
22136       return;
22137
22138     default:
22139       if (x == 0)
22140         {
22141           output_operand_lossage ("missing operand");
22142           return;
22143         }
22144
22145       switch (GET_CODE (x))
22146         {
22147         case REG:
22148           asm_fprintf (stream, "%r", REGNO (x));
22149           break;
22150
22151         case MEM:
22152           output_memory_reference_mode = GET_MODE (x);
22153           output_address (XEXP (x, 0));
22154           break;
22155
22156         case CONST_DOUBLE:
22157           {
22158             char fpstr[20];
22159             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22160                               sizeof (fpstr), 0, 1);
22161             fprintf (stream, "#%s", fpstr);
22162           }
22163           break;
22164
22165         default:
22166           gcc_assert (GET_CODE (x) != NEG);
22167           fputc ('#', stream);
22168           if (GET_CODE (x) == HIGH)
22169             {
22170               fputs (":lower16:", stream);
22171               x = XEXP (x, 0);
22172             }
22173
22174           output_addr_const (stream, x);
22175           break;
22176         }
22177     }
22178 }
22179 \f
22180 /* Target hook for printing a memory address.  */
22181 static void
22182 arm_print_operand_address (FILE *stream, rtx x)
22183 {
22184   if (TARGET_32BIT)
22185     {
22186       int is_minus = GET_CODE (x) == MINUS;
22187
22188       if (REG_P (x))
22189         asm_fprintf (stream, "[%r]", REGNO (x));
22190       else if (GET_CODE (x) == PLUS || is_minus)
22191         {
22192           rtx base = XEXP (x, 0);
22193           rtx index = XEXP (x, 1);
22194           HOST_WIDE_INT offset = 0;
22195           if (!REG_P (base)
22196               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22197             {
22198               /* Ensure that BASE is a register.  */
22199               /* (one of them must be).  */
22200               /* Also ensure the SP is not used as in index register.  */
22201               rtx temp = base;
22202               base = index;
22203               index = temp;
22204             }
22205           switch (GET_CODE (index))
22206             {
22207             case CONST_INT:
22208               offset = INTVAL (index);
22209               if (is_minus)
22210                 offset = -offset;
22211               asm_fprintf (stream, "[%r, #%wd]",
22212                            REGNO (base), offset);
22213               break;
22214
22215             case REG:
22216               asm_fprintf (stream, "[%r, %s%r]",
22217                            REGNO (base), is_minus ? "-" : "",
22218                            REGNO (index));
22219               break;
22220
22221             case MULT:
22222             case ASHIFTRT:
22223             case LSHIFTRT:
22224             case ASHIFT:
22225             case ROTATERT:
22226               {
22227                 asm_fprintf (stream, "[%r, %s%r",
22228                              REGNO (base), is_minus ? "-" : "",
22229                              REGNO (XEXP (index, 0)));
22230                 arm_print_operand (stream, index, 'S');
22231                 fputs ("]", stream);
22232                 break;
22233               }
22234
22235             default:
22236               gcc_unreachable ();
22237             }
22238         }
22239       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22240                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22241         {
22242           extern machine_mode output_memory_reference_mode;
22243
22244           gcc_assert (REG_P (XEXP (x, 0)));
22245
22246           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22247             asm_fprintf (stream, "[%r, #%s%d]!",
22248                          REGNO (XEXP (x, 0)),
22249                          GET_CODE (x) == PRE_DEC ? "-" : "",
22250                          GET_MODE_SIZE (output_memory_reference_mode));
22251           else
22252             asm_fprintf (stream, "[%r], #%s%d",
22253                          REGNO (XEXP (x, 0)),
22254                          GET_CODE (x) == POST_DEC ? "-" : "",
22255                          GET_MODE_SIZE (output_memory_reference_mode));
22256         }
22257       else if (GET_CODE (x) == PRE_MODIFY)
22258         {
22259           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22260           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22261             asm_fprintf (stream, "#%wd]!",
22262                          INTVAL (XEXP (XEXP (x, 1), 1)));
22263           else
22264             asm_fprintf (stream, "%r]!",
22265                          REGNO (XEXP (XEXP (x, 1), 1)));
22266         }
22267       else if (GET_CODE (x) == POST_MODIFY)
22268         {
22269           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22270           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22271             asm_fprintf (stream, "#%wd",
22272                          INTVAL (XEXP (XEXP (x, 1), 1)));
22273           else
22274             asm_fprintf (stream, "%r",
22275                          REGNO (XEXP (XEXP (x, 1), 1)));
22276         }
22277       else output_addr_const (stream, x);
22278     }
22279   else
22280     {
22281       if (REG_P (x))
22282         asm_fprintf (stream, "[%r]", REGNO (x));
22283       else if (GET_CODE (x) == POST_INC)
22284         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22285       else if (GET_CODE (x) == PLUS)
22286         {
22287           gcc_assert (REG_P (XEXP (x, 0)));
22288           if (CONST_INT_P (XEXP (x, 1)))
22289             asm_fprintf (stream, "[%r, #%wd]",
22290                          REGNO (XEXP (x, 0)),
22291                          INTVAL (XEXP (x, 1)));
22292           else
22293             asm_fprintf (stream, "[%r, %r]",
22294                          REGNO (XEXP (x, 0)),
22295                          REGNO (XEXP (x, 1)));
22296         }
22297       else
22298         output_addr_const (stream, x);
22299     }
22300 }
22301 \f
22302 /* Target hook for indicating whether a punctuation character for
22303    TARGET_PRINT_OPERAND is valid.  */
22304 static bool
22305 arm_print_operand_punct_valid_p (unsigned char code)
22306 {
22307   return (code == '@' || code == '|' || code == '.'
22308           || code == '(' || code == ')' || code == '#'
22309           || (TARGET_32BIT && (code == '?'))
22310           || (TARGET_THUMB2 && (code == '!'))
22311           || (TARGET_THUMB && (code == '_')));
22312 }
22313 \f
22314 /* Target hook for assembling integer objects.  The ARM version needs to
22315    handle word-sized values specially.  */
22316 static bool
22317 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22318 {
22319   machine_mode mode;
22320
22321   if (size == UNITS_PER_WORD && aligned_p)
22322     {
22323       fputs ("\t.word\t", asm_out_file);
22324       output_addr_const (asm_out_file, x);
22325
22326       /* Mark symbols as position independent.  We only do this in the
22327          .text segment, not in the .data segment.  */
22328       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22329           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22330         {
22331           /* See legitimize_pic_address for an explanation of the
22332              TARGET_VXWORKS_RTP check.  */
22333           if (!arm_pic_data_is_text_relative
22334               || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22335             fputs ("(GOT)", asm_out_file);
22336           else
22337             fputs ("(GOTOFF)", asm_out_file);
22338         }
22339       fputc ('\n', asm_out_file);
22340       return true;
22341     }
22342
22343   mode = GET_MODE (x);
22344
22345   if (arm_vector_mode_supported_p (mode))
22346     {
22347       int i, units;
22348
22349       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22350
22351       units = CONST_VECTOR_NUNITS (x);
22352       size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22353
22354       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22355         for (i = 0; i < units; i++)
22356           {
22357             rtx elt = CONST_VECTOR_ELT (x, i);
22358             assemble_integer
22359               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22360           }
22361       else
22362         for (i = 0; i < units; i++)
22363           {
22364             rtx elt = CONST_VECTOR_ELT (x, i);
22365             REAL_VALUE_TYPE rval;
22366
22367             REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22368
22369             assemble_real
22370               (rval, GET_MODE_INNER (mode),
22371               i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22372           }
22373
22374       return true;
22375     }
22376
22377   return default_assemble_integer (x, size, aligned_p);
22378 }
22379
22380 static void
22381 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22382 {
22383   section *s;
22384
22385   if (!TARGET_AAPCS_BASED)
22386     {
22387       (is_ctor ?
22388        default_named_section_asm_out_constructor
22389        : default_named_section_asm_out_destructor) (symbol, priority);
22390       return;
22391     }
22392
22393   /* Put these in the .init_array section, using a special relocation.  */
22394   if (priority != DEFAULT_INIT_PRIORITY)
22395     {
22396       char buf[18];
22397       sprintf (buf, "%s.%.5u",
22398                is_ctor ? ".init_array" : ".fini_array",
22399                priority);
22400       s = get_section (buf, SECTION_WRITE, NULL_TREE);
22401     }
22402   else if (is_ctor)
22403     s = ctors_section;
22404   else
22405     s = dtors_section;
22406
22407   switch_to_section (s);
22408   assemble_align (POINTER_SIZE);
22409   fputs ("\t.word\t", asm_out_file);
22410   output_addr_const (asm_out_file, symbol);
22411   fputs ("(target1)\n", asm_out_file);
22412 }
22413
22414 /* Add a function to the list of static constructors.  */
22415
22416 static void
22417 arm_elf_asm_constructor (rtx symbol, int priority)
22418 {
22419   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22420 }
22421
22422 /* Add a function to the list of static destructors.  */
22423
22424 static void
22425 arm_elf_asm_destructor (rtx symbol, int priority)
22426 {
22427   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22428 }
22429 \f
22430 /* A finite state machine takes care of noticing whether or not instructions
22431    can be conditionally executed, and thus decrease execution time and code
22432    size by deleting branch instructions.  The fsm is controlled by
22433    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22434
22435 /* The state of the fsm controlling condition codes are:
22436    0: normal, do nothing special
22437    1: make ASM_OUTPUT_OPCODE not output this instruction
22438    2: make ASM_OUTPUT_OPCODE not output this instruction
22439    3: make instructions conditional
22440    4: make instructions conditional
22441
22442    State transitions (state->state by whom under condition):
22443    0 -> 1 final_prescan_insn if the `target' is a label
22444    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22445    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22446    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22447    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22448           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22449    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22450           (the target insn is arm_target_insn).
22451
22452    If the jump clobbers the conditions then we use states 2 and 4.
22453
22454    A similar thing can be done with conditional return insns.
22455
22456    XXX In case the `target' is an unconditional branch, this conditionalising
22457    of the instructions always reduces code size, but not always execution
22458    time.  But then, I want to reduce the code size to somewhere near what
22459    /bin/cc produces.  */
22460
22461 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22462    instructions.  When a COND_EXEC instruction is seen the subsequent
22463    instructions are scanned so that multiple conditional instructions can be
22464    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22465    specify the length and true/false mask for the IT block.  These will be
22466    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22467
22468 /* Returns the index of the ARM condition code string in
22469    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22470    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22471
22472 enum arm_cond_code
22473 maybe_get_arm_condition_code (rtx comparison)
22474 {
22475   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22476   enum arm_cond_code code;
22477   enum rtx_code comp_code = GET_CODE (comparison);
22478
22479   if (GET_MODE_CLASS (mode) != MODE_CC)
22480     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22481                            XEXP (comparison, 1));
22482
22483   switch (mode)
22484     {
22485     case CC_DNEmode: code = ARM_NE; goto dominance;
22486     case CC_DEQmode: code = ARM_EQ; goto dominance;
22487     case CC_DGEmode: code = ARM_GE; goto dominance;
22488     case CC_DGTmode: code = ARM_GT; goto dominance;
22489     case CC_DLEmode: code = ARM_LE; goto dominance;
22490     case CC_DLTmode: code = ARM_LT; goto dominance;
22491     case CC_DGEUmode: code = ARM_CS; goto dominance;
22492     case CC_DGTUmode: code = ARM_HI; goto dominance;
22493     case CC_DLEUmode: code = ARM_LS; goto dominance;
22494     case CC_DLTUmode: code = ARM_CC;
22495
22496     dominance:
22497       if (comp_code == EQ)
22498         return ARM_INVERSE_CONDITION_CODE (code);
22499       if (comp_code == NE)
22500         return code;
22501       return ARM_NV;
22502
22503     case CC_NOOVmode:
22504       switch (comp_code)
22505         {
22506         case NE: return ARM_NE;
22507         case EQ: return ARM_EQ;
22508         case GE: return ARM_PL;
22509         case LT: return ARM_MI;
22510         default: return ARM_NV;
22511         }
22512
22513     case CC_Zmode:
22514       switch (comp_code)
22515         {
22516         case NE: return ARM_NE;
22517         case EQ: return ARM_EQ;
22518         default: return ARM_NV;
22519         }
22520
22521     case CC_Nmode:
22522       switch (comp_code)
22523         {
22524         case NE: return ARM_MI;
22525         case EQ: return ARM_PL;
22526         default: return ARM_NV;
22527         }
22528
22529     case CCFPEmode:
22530     case CCFPmode:
22531       /* We can handle all cases except UNEQ and LTGT.  */
22532       switch (comp_code)
22533         {
22534         case GE: return ARM_GE;
22535         case GT: return ARM_GT;
22536         case LE: return ARM_LS;
22537         case LT: return ARM_MI;
22538         case NE: return ARM_NE;
22539         case EQ: return ARM_EQ;
22540         case ORDERED: return ARM_VC;
22541         case UNORDERED: return ARM_VS;
22542         case UNLT: return ARM_LT;
22543         case UNLE: return ARM_LE;
22544         case UNGT: return ARM_HI;
22545         case UNGE: return ARM_PL;
22546           /* UNEQ and LTGT do not have a representation.  */
22547         case UNEQ: /* Fall through.  */
22548         case LTGT: /* Fall through.  */
22549         default: return ARM_NV;
22550         }
22551
22552     case CC_SWPmode:
22553       switch (comp_code)
22554         {
22555         case NE: return ARM_NE;
22556         case EQ: return ARM_EQ;
22557         case GE: return ARM_LE;
22558         case GT: return ARM_LT;
22559         case LE: return ARM_GE;
22560         case LT: return ARM_GT;
22561         case GEU: return ARM_LS;
22562         case GTU: return ARM_CC;
22563         case LEU: return ARM_CS;
22564         case LTU: return ARM_HI;
22565         default: return ARM_NV;
22566         }
22567
22568     case CC_Cmode:
22569       switch (comp_code)
22570         {
22571         case LTU: return ARM_CS;
22572         case GEU: return ARM_CC;
22573         default: return ARM_NV;
22574         }
22575
22576     case CC_CZmode:
22577       switch (comp_code)
22578         {
22579         case NE: return ARM_NE;
22580         case EQ: return ARM_EQ;
22581         case GEU: return ARM_CS;
22582         case GTU: return ARM_HI;
22583         case LEU: return ARM_LS;
22584         case LTU: return ARM_CC;
22585         default: return ARM_NV;
22586         }
22587
22588     case CC_NCVmode:
22589       switch (comp_code)
22590         {
22591         case GE: return ARM_GE;
22592         case LT: return ARM_LT;
22593         case GEU: return ARM_CS;
22594         case LTU: return ARM_CC;
22595         default: return ARM_NV;
22596         }
22597
22598     case CCmode:
22599       switch (comp_code)
22600         {
22601         case NE: return ARM_NE;
22602         case EQ: return ARM_EQ;
22603         case GE: return ARM_GE;
22604         case GT: return ARM_GT;
22605         case LE: return ARM_LE;
22606         case LT: return ARM_LT;
22607         case GEU: return ARM_CS;
22608         case GTU: return ARM_HI;
22609         case LEU: return ARM_LS;
22610         case LTU: return ARM_CC;
22611         default: return ARM_NV;
22612         }
22613
22614     default: gcc_unreachable ();
22615     }
22616 }
22617
22618 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22619 static enum arm_cond_code
22620 get_arm_condition_code (rtx comparison)
22621 {
22622   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22623   gcc_assert (code != ARM_NV);
22624   return code;
22625 }
22626
22627 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22628    instructions.  */
22629 void
22630 thumb2_final_prescan_insn (rtx_insn *insn)
22631 {
22632   rtx_insn *first_insn = insn;
22633   rtx body = PATTERN (insn);
22634   rtx predicate;
22635   enum arm_cond_code code;
22636   int n;
22637   int mask;
22638   int max;
22639
22640   /* max_insns_skipped in the tune was already taken into account in the
22641      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22642      just emit the IT blocks as we can.  It does not make sense to split
22643      the IT blocks.  */
22644   max = MAX_INSN_PER_IT_BLOCK;
22645
22646   /* Remove the previous insn from the count of insns to be output.  */
22647   if (arm_condexec_count)
22648       arm_condexec_count--;
22649
22650   /* Nothing to do if we are already inside a conditional block.  */
22651   if (arm_condexec_count)
22652     return;
22653
22654   if (GET_CODE (body) != COND_EXEC)
22655     return;
22656
22657   /* Conditional jumps are implemented directly.  */
22658   if (JUMP_P (insn))
22659     return;
22660
22661   predicate = COND_EXEC_TEST (body);
22662   arm_current_cc = get_arm_condition_code (predicate);
22663
22664   n = get_attr_ce_count (insn);
22665   arm_condexec_count = 1;
22666   arm_condexec_mask = (1 << n) - 1;
22667   arm_condexec_masklen = n;
22668   /* See if subsequent instructions can be combined into the same block.  */
22669   for (;;)
22670     {
22671       insn = next_nonnote_insn (insn);
22672
22673       /* Jumping into the middle of an IT block is illegal, so a label or
22674          barrier terminates the block.  */
22675       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22676         break;
22677
22678       body = PATTERN (insn);
22679       /* USE and CLOBBER aren't really insns, so just skip them.  */
22680       if (GET_CODE (body) == USE
22681           || GET_CODE (body) == CLOBBER)
22682         continue;
22683
22684       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
22685       if (GET_CODE (body) != COND_EXEC)
22686         break;
22687       /* Maximum number of conditionally executed instructions in a block.  */
22688       n = get_attr_ce_count (insn);
22689       if (arm_condexec_masklen + n > max)
22690         break;
22691
22692       predicate = COND_EXEC_TEST (body);
22693       code = get_arm_condition_code (predicate);
22694       mask = (1 << n) - 1;
22695       if (arm_current_cc == code)
22696         arm_condexec_mask |= (mask << arm_condexec_masklen);
22697       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22698         break;
22699
22700       arm_condexec_count++;
22701       arm_condexec_masklen += n;
22702
22703       /* A jump must be the last instruction in a conditional block.  */
22704       if (JUMP_P (insn))
22705         break;
22706     }
22707   /* Restore recog_data (getting the attributes of other insns can
22708      destroy this array, but final.c assumes that it remains intact
22709      across this call).  */
22710   extract_constrain_insn_cached (first_insn);
22711 }
22712
22713 void
22714 arm_final_prescan_insn (rtx_insn *insn)
22715 {
22716   /* BODY will hold the body of INSN.  */
22717   rtx body = PATTERN (insn);
22718
22719   /* This will be 1 if trying to repeat the trick, and things need to be
22720      reversed if it appears to fail.  */
22721   int reverse = 0;
22722
22723   /* If we start with a return insn, we only succeed if we find another one.  */
22724   int seeking_return = 0;
22725   enum rtx_code return_code = UNKNOWN;
22726
22727   /* START_INSN will hold the insn from where we start looking.  This is the
22728      first insn after the following code_label if REVERSE is true.  */
22729   rtx_insn *start_insn = insn;
22730
22731   /* If in state 4, check if the target branch is reached, in order to
22732      change back to state 0.  */
22733   if (arm_ccfsm_state == 4)
22734     {
22735       if (insn == arm_target_insn)
22736         {
22737           arm_target_insn = NULL;
22738           arm_ccfsm_state = 0;
22739         }
22740       return;
22741     }
22742
22743   /* If in state 3, it is possible to repeat the trick, if this insn is an
22744      unconditional branch to a label, and immediately following this branch
22745      is the previous target label which is only used once, and the label this
22746      branch jumps to is not too far off.  */
22747   if (arm_ccfsm_state == 3)
22748     {
22749       if (simplejump_p (insn))
22750         {
22751           start_insn = next_nonnote_insn (start_insn);
22752           if (BARRIER_P (start_insn))
22753             {
22754               /* XXX Isn't this always a barrier?  */
22755               start_insn = next_nonnote_insn (start_insn);
22756             }
22757           if (LABEL_P (start_insn)
22758               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22759               && LABEL_NUSES (start_insn) == 1)
22760             reverse = TRUE;
22761           else
22762             return;
22763         }
22764       else if (ANY_RETURN_P (body))
22765         {
22766           start_insn = next_nonnote_insn (start_insn);
22767           if (BARRIER_P (start_insn))
22768             start_insn = next_nonnote_insn (start_insn);
22769           if (LABEL_P (start_insn)
22770               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22771               && LABEL_NUSES (start_insn) == 1)
22772             {
22773               reverse = TRUE;
22774               seeking_return = 1;
22775               return_code = GET_CODE (body);
22776             }
22777           else
22778             return;
22779         }
22780       else
22781         return;
22782     }
22783
22784   gcc_assert (!arm_ccfsm_state || reverse);
22785   if (!JUMP_P (insn))
22786     return;
22787
22788   /* This jump might be paralleled with a clobber of the condition codes
22789      the jump should always come first */
22790   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22791     body = XVECEXP (body, 0, 0);
22792
22793   if (reverse
22794       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22795           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22796     {
22797       int insns_skipped;
22798       int fail = FALSE, succeed = FALSE;
22799       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
22800       int then_not_else = TRUE;
22801       rtx_insn *this_insn = start_insn;
22802       rtx label = 0;
22803
22804       /* Register the insn jumped to.  */
22805       if (reverse)
22806         {
22807           if (!seeking_return)
22808             label = XEXP (SET_SRC (body), 0);
22809         }
22810       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22811         label = XEXP (XEXP (SET_SRC (body), 1), 0);
22812       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22813         {
22814           label = XEXP (XEXP (SET_SRC (body), 2), 0);
22815           then_not_else = FALSE;
22816         }
22817       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22818         {
22819           seeking_return = 1;
22820           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22821         }
22822       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22823         {
22824           seeking_return = 1;
22825           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22826           then_not_else = FALSE;
22827         }
22828       else
22829         gcc_unreachable ();
22830
22831       /* See how many insns this branch skips, and what kind of insns.  If all
22832          insns are okay, and the label or unconditional branch to the same
22833          label is not too far away, succeed.  */
22834       for (insns_skipped = 0;
22835            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22836         {
22837           rtx scanbody;
22838
22839           this_insn = next_nonnote_insn (this_insn);
22840           if (!this_insn)
22841             break;
22842
22843           switch (GET_CODE (this_insn))
22844             {
22845             case CODE_LABEL:
22846               /* Succeed if it is the target label, otherwise fail since
22847                  control falls in from somewhere else.  */
22848               if (this_insn == label)
22849                 {
22850                   arm_ccfsm_state = 1;
22851                   succeed = TRUE;
22852                 }
22853               else
22854                 fail = TRUE;
22855               break;
22856
22857             case BARRIER:
22858               /* Succeed if the following insn is the target label.
22859                  Otherwise fail.
22860                  If return insns are used then the last insn in a function
22861                  will be a barrier.  */
22862               this_insn = next_nonnote_insn (this_insn);
22863               if (this_insn && this_insn == label)
22864                 {
22865                   arm_ccfsm_state = 1;
22866                   succeed = TRUE;
22867                 }
22868               else
22869                 fail = TRUE;
22870               break;
22871
22872             case CALL_INSN:
22873               /* The AAPCS says that conditional calls should not be
22874                  used since they make interworking inefficient (the
22875                  linker can't transform BL<cond> into BLX).  That's
22876                  only a problem if the machine has BLX.  */
22877               if (arm_arch5)
22878                 {
22879                   fail = TRUE;
22880                   break;
22881                 }
22882
22883               /* Succeed if the following insn is the target label, or
22884                  if the following two insns are a barrier and the
22885                  target label.  */
22886               this_insn = next_nonnote_insn (this_insn);
22887               if (this_insn && BARRIER_P (this_insn))
22888                 this_insn = next_nonnote_insn (this_insn);
22889
22890               if (this_insn && this_insn == label
22891                   && insns_skipped < max_insns_skipped)
22892                 {
22893                   arm_ccfsm_state = 1;
22894                   succeed = TRUE;
22895                 }
22896               else
22897                 fail = TRUE;
22898               break;
22899
22900             case JUMP_INSN:
22901               /* If this is an unconditional branch to the same label, succeed.
22902                  If it is to another label, do nothing.  If it is conditional,
22903                  fail.  */
22904               /* XXX Probably, the tests for SET and the PC are
22905                  unnecessary.  */
22906
22907               scanbody = PATTERN (this_insn);
22908               if (GET_CODE (scanbody) == SET
22909                   && GET_CODE (SET_DEST (scanbody)) == PC)
22910                 {
22911                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22912                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22913                     {
22914                       arm_ccfsm_state = 2;
22915                       succeed = TRUE;
22916                     }
22917                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22918                     fail = TRUE;
22919                 }
22920               /* Fail if a conditional return is undesirable (e.g. on a
22921                  StrongARM), but still allow this if optimizing for size.  */
22922               else if (GET_CODE (scanbody) == return_code
22923                        && !use_return_insn (TRUE, NULL)
22924                        && !optimize_size)
22925                 fail = TRUE;
22926               else if (GET_CODE (scanbody) == return_code)
22927                 {
22928                   arm_ccfsm_state = 2;
22929                   succeed = TRUE;
22930                 }
22931               else if (GET_CODE (scanbody) == PARALLEL)
22932                 {
22933                   switch (get_attr_conds (this_insn))
22934                     {
22935                     case CONDS_NOCOND:
22936                       break;
22937                     default:
22938                       fail = TRUE;
22939                       break;
22940                     }
22941                 }
22942               else
22943                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
22944
22945               break;
22946
22947             case INSN:
22948               /* Instructions using or affecting the condition codes make it
22949                  fail.  */
22950               scanbody = PATTERN (this_insn);
22951               if (!(GET_CODE (scanbody) == SET
22952                     || GET_CODE (scanbody) == PARALLEL)
22953                   || get_attr_conds (this_insn) != CONDS_NOCOND)
22954                 fail = TRUE;
22955               break;
22956
22957             default:
22958               break;
22959             }
22960         }
22961       if (succeed)
22962         {
22963           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22964             arm_target_label = CODE_LABEL_NUMBER (label);
22965           else
22966             {
22967               gcc_assert (seeking_return || arm_ccfsm_state == 2);
22968
22969               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22970                 {
22971                   this_insn = next_nonnote_insn (this_insn);
22972                   gcc_assert (!this_insn
22973                               || (!BARRIER_P (this_insn)
22974                                   && !LABEL_P (this_insn)));
22975                 }
22976               if (!this_insn)
22977                 {
22978                   /* Oh, dear! we ran off the end.. give up.  */
22979                   extract_constrain_insn_cached (insn);
22980                   arm_ccfsm_state = 0;
22981                   arm_target_insn = NULL;
22982                   return;
22983                 }
22984               arm_target_insn = this_insn;
22985             }
22986
22987           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22988              what it was.  */
22989           if (!reverse)
22990             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22991
22992           if (reverse || then_not_else)
22993             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22994         }
22995
22996       /* Restore recog_data (getting the attributes of other insns can
22997          destroy this array, but final.c assumes that it remains intact
22998          across this call.  */
22999       extract_constrain_insn_cached (insn);
23000     }
23001 }
23002
23003 /* Output IT instructions.  */
23004 void
23005 thumb2_asm_output_opcode (FILE * stream)
23006 {
23007   char buff[5];
23008   int n;
23009
23010   if (arm_condexec_mask)
23011     {
23012       for (n = 0; n < arm_condexec_masklen; n++)
23013         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23014       buff[n] = 0;
23015       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23016                   arm_condition_codes[arm_current_cc]);
23017       arm_condexec_mask = 0;
23018     }
23019 }
23020
23021 /* Returns true if REGNO is a valid register
23022    for holding a quantity of type MODE.  */
23023 int
23024 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23025 {
23026   if (GET_MODE_CLASS (mode) == MODE_CC)
23027     return (regno == CC_REGNUM
23028             || (TARGET_HARD_FLOAT && TARGET_VFP
23029                 && regno == VFPCC_REGNUM));
23030
23031   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23032     return false;
23033
23034   if (TARGET_THUMB1)
23035     /* For the Thumb we only allow values bigger than SImode in
23036        registers 0 - 6, so that there is always a second low
23037        register available to hold the upper part of the value.
23038        We probably we ought to ensure that the register is the
23039        start of an even numbered register pair.  */
23040     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23041
23042   if (TARGET_HARD_FLOAT && TARGET_VFP
23043       && IS_VFP_REGNUM (regno))
23044     {
23045       if (mode == SFmode || mode == SImode)
23046         return VFP_REGNO_OK_FOR_SINGLE (regno);
23047
23048       if (mode == DFmode)
23049         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23050
23051       /* VFP registers can hold HFmode values, but there is no point in
23052          putting them there unless we have hardware conversion insns. */
23053       if (mode == HFmode)
23054         return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23055
23056       if (TARGET_NEON)
23057         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23058                || (VALID_NEON_QREG_MODE (mode)
23059                    && NEON_REGNO_OK_FOR_QUAD (regno))
23060                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23061                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23062                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23063                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23064                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23065
23066       return FALSE;
23067     }
23068
23069   if (TARGET_REALLY_IWMMXT)
23070     {
23071       if (IS_IWMMXT_GR_REGNUM (regno))
23072         return mode == SImode;
23073
23074       if (IS_IWMMXT_REGNUM (regno))
23075         return VALID_IWMMXT_REG_MODE (mode);
23076     }
23077
23078   /* We allow almost any value to be stored in the general registers.
23079      Restrict doubleword quantities to even register pairs in ARM state
23080      so that we can use ldrd.  Do not allow very large Neon structure
23081      opaque modes in general registers; they would use too many.  */
23082   if (regno <= LAST_ARM_REGNUM)
23083     {
23084       if (ARM_NUM_REGS (mode) > 4)
23085           return FALSE;
23086
23087       if (TARGET_THUMB2)
23088         return TRUE;
23089
23090       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23091     }
23092
23093   if (regno == FRAME_POINTER_REGNUM
23094       || regno == ARG_POINTER_REGNUM)
23095     /* We only allow integers in the fake hard registers.  */
23096     return GET_MODE_CLASS (mode) == MODE_INT;
23097
23098   return FALSE;
23099 }
23100
23101 /* Implement MODES_TIEABLE_P.  */
23102
23103 bool
23104 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23105 {
23106   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23107     return true;
23108
23109   /* We specifically want to allow elements of "structure" modes to
23110      be tieable to the structure.  This more general condition allows
23111      other rarer situations too.  */
23112   if (TARGET_NEON
23113       && (VALID_NEON_DREG_MODE (mode1)
23114           || VALID_NEON_QREG_MODE (mode1)
23115           || VALID_NEON_STRUCT_MODE (mode1))
23116       && (VALID_NEON_DREG_MODE (mode2)
23117           || VALID_NEON_QREG_MODE (mode2)
23118           || VALID_NEON_STRUCT_MODE (mode2)))
23119     return true;
23120
23121   return false;
23122 }
23123
23124 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23125    not used in arm mode.  */
23126
23127 enum reg_class
23128 arm_regno_class (int regno)
23129 {
23130   if (regno == PC_REGNUM)
23131     return NO_REGS;
23132
23133   if (TARGET_THUMB1)
23134     {
23135       if (regno == STACK_POINTER_REGNUM)
23136         return STACK_REG;
23137       if (regno == CC_REGNUM)
23138         return CC_REG;
23139       if (regno < 8)
23140         return LO_REGS;
23141       return HI_REGS;
23142     }
23143
23144   if (TARGET_THUMB2 && regno < 8)
23145     return LO_REGS;
23146
23147   if (   regno <= LAST_ARM_REGNUM
23148       || regno == FRAME_POINTER_REGNUM
23149       || regno == ARG_POINTER_REGNUM)
23150     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23151
23152   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23153     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23154
23155   if (IS_VFP_REGNUM (regno))
23156     {
23157       if (regno <= D7_VFP_REGNUM)
23158         return VFP_D0_D7_REGS;
23159       else if (regno <= LAST_LO_VFP_REGNUM)
23160         return VFP_LO_REGS;
23161       else
23162         return VFP_HI_REGS;
23163     }
23164
23165   if (IS_IWMMXT_REGNUM (regno))
23166     return IWMMXT_REGS;
23167
23168   if (IS_IWMMXT_GR_REGNUM (regno))
23169     return IWMMXT_GR_REGS;
23170
23171   return NO_REGS;
23172 }
23173
23174 /* Handle a special case when computing the offset
23175    of an argument from the frame pointer.  */
23176 int
23177 arm_debugger_arg_offset (int value, rtx addr)
23178 {
23179   rtx_insn *insn;
23180
23181   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23182   if (value != 0)
23183     return 0;
23184
23185   /* We can only cope with the case where the address is held in a register.  */
23186   if (!REG_P (addr))
23187     return 0;
23188
23189   /* If we are using the frame pointer to point at the argument, then
23190      an offset of 0 is correct.  */
23191   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23192     return 0;
23193
23194   /* If we are using the stack pointer to point at the
23195      argument, then an offset of 0 is correct.  */
23196   /* ??? Check this is consistent with thumb2 frame layout.  */
23197   if ((TARGET_THUMB || !frame_pointer_needed)
23198       && REGNO (addr) == SP_REGNUM)
23199     return 0;
23200
23201   /* Oh dear.  The argument is pointed to by a register rather
23202      than being held in a register, or being stored at a known
23203      offset from the frame pointer.  Since GDB only understands
23204      those two kinds of argument we must translate the address
23205      held in the register into an offset from the frame pointer.
23206      We do this by searching through the insns for the function
23207      looking to see where this register gets its value.  If the
23208      register is initialized from the frame pointer plus an offset
23209      then we are in luck and we can continue, otherwise we give up.
23210
23211      This code is exercised by producing debugging information
23212      for a function with arguments like this:
23213
23214            double func (double a, double b, int c, double d) {return d;}
23215
23216      Without this code the stab for parameter 'd' will be set to
23217      an offset of 0 from the frame pointer, rather than 8.  */
23218
23219   /* The if() statement says:
23220
23221      If the insn is a normal instruction
23222      and if the insn is setting the value in a register
23223      and if the register being set is the register holding the address of the argument
23224      and if the address is computing by an addition
23225      that involves adding to a register
23226      which is the frame pointer
23227      a constant integer
23228
23229      then...  */
23230
23231   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23232     {
23233       if (   NONJUMP_INSN_P (insn)
23234           && GET_CODE (PATTERN (insn)) == SET
23235           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23236           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23237           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23238           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23239           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23240              )
23241         {
23242           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23243
23244           break;
23245         }
23246     }
23247
23248   if (value == 0)
23249     {
23250       debug_rtx (addr);
23251       warning (0, "unable to compute real location of stacked parameter");
23252       value = 8; /* XXX magic hack */
23253     }
23254
23255   return value;
23256 }
23257 \f
23258 typedef enum {
23259   T_V8QI,
23260   T_V4HI,
23261   T_V4HF,
23262   T_V2SI,
23263   T_V2SF,
23264   T_DI,
23265   T_V16QI,
23266   T_V8HI,
23267   T_V4SI,
23268   T_V4SF,
23269   T_V2DI,
23270   T_TI,
23271   T_EI,
23272   T_OI,
23273   T_MAX         /* Size of enum.  Keep last.  */
23274 } neon_builtin_type_mode;
23275
23276 #define TYPE_MODE_BIT(X) (1 << (X))
23277
23278 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI)        \
23279                  | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI)      \
23280                  | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23281 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI)       \
23282                  | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF)      \
23283                  | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23284
23285 #define v8qi_UP  T_V8QI
23286 #define v4hi_UP  T_V4HI
23287 #define v4hf_UP  T_V4HF
23288 #define v2si_UP  T_V2SI
23289 #define v2sf_UP  T_V2SF
23290 #define di_UP    T_DI
23291 #define v16qi_UP T_V16QI
23292 #define v8hi_UP  T_V8HI
23293 #define v4si_UP  T_V4SI
23294 #define v4sf_UP  T_V4SF
23295 #define v2di_UP  T_V2DI
23296 #define ti_UP    T_TI
23297 #define ei_UP    T_EI
23298 #define oi_UP    T_OI
23299
23300 #define UP(X) X##_UP
23301
23302 typedef enum {
23303   NEON_BINOP,
23304   NEON_TERNOP,
23305   NEON_UNOP,
23306   NEON_BSWAP,
23307   NEON_GETLANE,
23308   NEON_SETLANE,
23309   NEON_CREATE,
23310   NEON_RINT,
23311   NEON_COPYSIGNF,
23312   NEON_DUP,
23313   NEON_DUPLANE,
23314   NEON_COMBINE,
23315   NEON_SPLIT,
23316   NEON_LANEMUL,
23317   NEON_LANEMULL,
23318   NEON_LANEMULH,
23319   NEON_LANEMAC,
23320   NEON_SCALARMUL,
23321   NEON_SCALARMULL,
23322   NEON_SCALARMULH,
23323   NEON_SCALARMAC,
23324   NEON_CONVERT,
23325   NEON_FLOAT_WIDEN,
23326   NEON_FLOAT_NARROW,
23327   NEON_FIXCONV,
23328   NEON_SELECT,
23329   NEON_REINTERP,
23330   NEON_VTBL,
23331   NEON_VTBX,
23332   NEON_LOAD1,
23333   NEON_LOAD1LANE,
23334   NEON_STORE1,
23335   NEON_STORE1LANE,
23336   NEON_LOADSTRUCT,
23337   NEON_LOADSTRUCTLANE,
23338   NEON_STORESTRUCT,
23339   NEON_STORESTRUCTLANE,
23340   NEON_LOGICBINOP,
23341   NEON_SHIFTINSERT,
23342   NEON_SHIFTIMM,
23343   NEON_SHIFTACC
23344 } neon_itype;
23345
23346 typedef struct {
23347   const char *name;
23348   const neon_itype itype;
23349   const neon_builtin_type_mode mode;
23350   const enum insn_code code;
23351   unsigned int fcode;
23352 } neon_builtin_datum;
23353
23354 #define CF(N,X) CODE_FOR_neon_##N##X
23355
23356 #define VAR1(T, N, A) \
23357   {#N, NEON_##T, UP (A), CF (N, A), 0}
23358 #define VAR2(T, N, A, B) \
23359   VAR1 (T, N, A), \
23360   {#N, NEON_##T, UP (B), CF (N, B), 0}
23361 #define VAR3(T, N, A, B, C) \
23362   VAR2 (T, N, A, B), \
23363   {#N, NEON_##T, UP (C), CF (N, C), 0}
23364 #define VAR4(T, N, A, B, C, D) \
23365   VAR3 (T, N, A, B, C), \
23366   {#N, NEON_##T, UP (D), CF (N, D), 0}
23367 #define VAR5(T, N, A, B, C, D, E) \
23368   VAR4 (T, N, A, B, C, D), \
23369   {#N, NEON_##T, UP (E), CF (N, E), 0}
23370 #define VAR6(T, N, A, B, C, D, E, F) \
23371   VAR5 (T, N, A, B, C, D, E), \
23372   {#N, NEON_##T, UP (F), CF (N, F), 0}
23373 #define VAR7(T, N, A, B, C, D, E, F, G) \
23374   VAR6 (T, N, A, B, C, D, E, F), \
23375   {#N, NEON_##T, UP (G), CF (N, G), 0}
23376 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23377   VAR7 (T, N, A, B, C, D, E, F, G), \
23378   {#N, NEON_##T, UP (H), CF (N, H), 0}
23379 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23380   VAR8 (T, N, A, B, C, D, E, F, G, H), \
23381   {#N, NEON_##T, UP (I), CF (N, I), 0}
23382 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23383   VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23384   {#N, NEON_##T, UP (J), CF (N, J), 0}
23385
23386 /* The NEON builtin data can be found in arm_neon_builtins.def.
23387    The mode entries in the following table correspond to the "key" type of the
23388    instruction variant, i.e. equivalent to that which would be specified after
23389    the assembler mnemonic, which usually refers to the last vector operand.
23390    (Signed/unsigned/polynomial types are not differentiated between though, and
23391    are all mapped onto the same mode for a given element size.) The modes
23392    listed per instruction should be the same as those defined for that
23393    instruction's pattern in neon.md.  */
23394
23395 static neon_builtin_datum neon_builtin_data[] =
23396 {
23397 #include "arm_neon_builtins.def"
23398 };
23399
23400 #undef CF
23401 #undef VAR1
23402 #undef VAR2
23403 #undef VAR3
23404 #undef VAR4
23405 #undef VAR5
23406 #undef VAR6
23407 #undef VAR7
23408 #undef VAR8
23409 #undef VAR9
23410 #undef VAR10
23411
23412 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23413 #define VAR1(T, N, A) \
23414   CF (N, A)
23415 #define VAR2(T, N, A, B) \
23416   VAR1 (T, N, A), \
23417   CF (N, B)
23418 #define VAR3(T, N, A, B, C) \
23419   VAR2 (T, N, A, B), \
23420   CF (N, C)
23421 #define VAR4(T, N, A, B, C, D) \
23422   VAR3 (T, N, A, B, C), \
23423   CF (N, D)
23424 #define VAR5(T, N, A, B, C, D, E) \
23425   VAR4 (T, N, A, B, C, D), \
23426   CF (N, E)
23427 #define VAR6(T, N, A, B, C, D, E, F) \
23428   VAR5 (T, N, A, B, C, D, E), \
23429   CF (N, F)
23430 #define VAR7(T, N, A, B, C, D, E, F, G) \
23431   VAR6 (T, N, A, B, C, D, E, F), \
23432   CF (N, G)
23433 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23434   VAR7 (T, N, A, B, C, D, E, F, G), \
23435   CF (N, H)
23436 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23437   VAR8 (T, N, A, B, C, D, E, F, G, H), \
23438   CF (N, I)
23439 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23440   VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23441   CF (N, J)
23442 enum arm_builtins
23443 {
23444   ARM_BUILTIN_GETWCGR0,
23445   ARM_BUILTIN_GETWCGR1,
23446   ARM_BUILTIN_GETWCGR2,
23447   ARM_BUILTIN_GETWCGR3,
23448
23449   ARM_BUILTIN_SETWCGR0,
23450   ARM_BUILTIN_SETWCGR1,
23451   ARM_BUILTIN_SETWCGR2,
23452   ARM_BUILTIN_SETWCGR3,
23453
23454   ARM_BUILTIN_WZERO,
23455
23456   ARM_BUILTIN_WAVG2BR,
23457   ARM_BUILTIN_WAVG2HR,
23458   ARM_BUILTIN_WAVG2B,
23459   ARM_BUILTIN_WAVG2H,
23460
23461   ARM_BUILTIN_WACCB,
23462   ARM_BUILTIN_WACCH,
23463   ARM_BUILTIN_WACCW,
23464
23465   ARM_BUILTIN_WMACS,
23466   ARM_BUILTIN_WMACSZ,
23467   ARM_BUILTIN_WMACU,
23468   ARM_BUILTIN_WMACUZ,
23469
23470   ARM_BUILTIN_WSADB,
23471   ARM_BUILTIN_WSADBZ,
23472   ARM_BUILTIN_WSADH,
23473   ARM_BUILTIN_WSADHZ,
23474
23475   ARM_BUILTIN_WALIGNI,
23476   ARM_BUILTIN_WALIGNR0,
23477   ARM_BUILTIN_WALIGNR1,
23478   ARM_BUILTIN_WALIGNR2,
23479   ARM_BUILTIN_WALIGNR3,
23480
23481   ARM_BUILTIN_TMIA,
23482   ARM_BUILTIN_TMIAPH,
23483   ARM_BUILTIN_TMIABB,
23484   ARM_BUILTIN_TMIABT,
23485   ARM_BUILTIN_TMIATB,
23486   ARM_BUILTIN_TMIATT,
23487
23488   ARM_BUILTIN_TMOVMSKB,
23489   ARM_BUILTIN_TMOVMSKH,
23490   ARM_BUILTIN_TMOVMSKW,
23491
23492   ARM_BUILTIN_TBCSTB,
23493   ARM_BUILTIN_TBCSTH,
23494   ARM_BUILTIN_TBCSTW,
23495
23496   ARM_BUILTIN_WMADDS,
23497   ARM_BUILTIN_WMADDU,
23498
23499   ARM_BUILTIN_WPACKHSS,
23500   ARM_BUILTIN_WPACKWSS,
23501   ARM_BUILTIN_WPACKDSS,
23502   ARM_BUILTIN_WPACKHUS,
23503   ARM_BUILTIN_WPACKWUS,
23504   ARM_BUILTIN_WPACKDUS,
23505
23506   ARM_BUILTIN_WADDB,
23507   ARM_BUILTIN_WADDH,
23508   ARM_BUILTIN_WADDW,
23509   ARM_BUILTIN_WADDSSB,
23510   ARM_BUILTIN_WADDSSH,
23511   ARM_BUILTIN_WADDSSW,
23512   ARM_BUILTIN_WADDUSB,
23513   ARM_BUILTIN_WADDUSH,
23514   ARM_BUILTIN_WADDUSW,
23515   ARM_BUILTIN_WSUBB,
23516   ARM_BUILTIN_WSUBH,
23517   ARM_BUILTIN_WSUBW,
23518   ARM_BUILTIN_WSUBSSB,
23519   ARM_BUILTIN_WSUBSSH,
23520   ARM_BUILTIN_WSUBSSW,
23521   ARM_BUILTIN_WSUBUSB,
23522   ARM_BUILTIN_WSUBUSH,
23523   ARM_BUILTIN_WSUBUSW,
23524
23525   ARM_BUILTIN_WAND,
23526   ARM_BUILTIN_WANDN,
23527   ARM_BUILTIN_WOR,
23528   ARM_BUILTIN_WXOR,
23529
23530   ARM_BUILTIN_WCMPEQB,
23531   ARM_BUILTIN_WCMPEQH,
23532   ARM_BUILTIN_WCMPEQW,
23533   ARM_BUILTIN_WCMPGTUB,
23534   ARM_BUILTIN_WCMPGTUH,
23535   ARM_BUILTIN_WCMPGTUW,
23536   ARM_BUILTIN_WCMPGTSB,
23537   ARM_BUILTIN_WCMPGTSH,
23538   ARM_BUILTIN_WCMPGTSW,
23539
23540   ARM_BUILTIN_TEXTRMSB,
23541   ARM_BUILTIN_TEXTRMSH,
23542   ARM_BUILTIN_TEXTRMSW,
23543   ARM_BUILTIN_TEXTRMUB,
23544   ARM_BUILTIN_TEXTRMUH,
23545   ARM_BUILTIN_TEXTRMUW,
23546   ARM_BUILTIN_TINSRB,
23547   ARM_BUILTIN_TINSRH,
23548   ARM_BUILTIN_TINSRW,
23549
23550   ARM_BUILTIN_WMAXSW,
23551   ARM_BUILTIN_WMAXSH,
23552   ARM_BUILTIN_WMAXSB,
23553   ARM_BUILTIN_WMAXUW,
23554   ARM_BUILTIN_WMAXUH,
23555   ARM_BUILTIN_WMAXUB,
23556   ARM_BUILTIN_WMINSW,
23557   ARM_BUILTIN_WMINSH,
23558   ARM_BUILTIN_WMINSB,
23559   ARM_BUILTIN_WMINUW,
23560   ARM_BUILTIN_WMINUH,
23561   ARM_BUILTIN_WMINUB,
23562
23563   ARM_BUILTIN_WMULUM,
23564   ARM_BUILTIN_WMULSM,
23565   ARM_BUILTIN_WMULUL,
23566
23567   ARM_BUILTIN_PSADBH,
23568   ARM_BUILTIN_WSHUFH,
23569
23570   ARM_BUILTIN_WSLLH,
23571   ARM_BUILTIN_WSLLW,
23572   ARM_BUILTIN_WSLLD,
23573   ARM_BUILTIN_WSRAH,
23574   ARM_BUILTIN_WSRAW,
23575   ARM_BUILTIN_WSRAD,
23576   ARM_BUILTIN_WSRLH,
23577   ARM_BUILTIN_WSRLW,
23578   ARM_BUILTIN_WSRLD,
23579   ARM_BUILTIN_WRORH,
23580   ARM_BUILTIN_WRORW,
23581   ARM_BUILTIN_WRORD,
23582   ARM_BUILTIN_WSLLHI,
23583   ARM_BUILTIN_WSLLWI,
23584   ARM_BUILTIN_WSLLDI,
23585   ARM_BUILTIN_WSRAHI,
23586   ARM_BUILTIN_WSRAWI,
23587   ARM_BUILTIN_WSRADI,
23588   ARM_BUILTIN_WSRLHI,
23589   ARM_BUILTIN_WSRLWI,
23590   ARM_BUILTIN_WSRLDI,
23591   ARM_BUILTIN_WRORHI,
23592   ARM_BUILTIN_WRORWI,
23593   ARM_BUILTIN_WRORDI,
23594
23595   ARM_BUILTIN_WUNPCKIHB,
23596   ARM_BUILTIN_WUNPCKIHH,
23597   ARM_BUILTIN_WUNPCKIHW,
23598   ARM_BUILTIN_WUNPCKILB,
23599   ARM_BUILTIN_WUNPCKILH,
23600   ARM_BUILTIN_WUNPCKILW,
23601
23602   ARM_BUILTIN_WUNPCKEHSB,
23603   ARM_BUILTIN_WUNPCKEHSH,
23604   ARM_BUILTIN_WUNPCKEHSW,
23605   ARM_BUILTIN_WUNPCKEHUB,
23606   ARM_BUILTIN_WUNPCKEHUH,
23607   ARM_BUILTIN_WUNPCKEHUW,
23608   ARM_BUILTIN_WUNPCKELSB,
23609   ARM_BUILTIN_WUNPCKELSH,
23610   ARM_BUILTIN_WUNPCKELSW,
23611   ARM_BUILTIN_WUNPCKELUB,
23612   ARM_BUILTIN_WUNPCKELUH,
23613   ARM_BUILTIN_WUNPCKELUW,
23614
23615   ARM_BUILTIN_WABSB,
23616   ARM_BUILTIN_WABSH,
23617   ARM_BUILTIN_WABSW,
23618
23619   ARM_BUILTIN_WADDSUBHX,
23620   ARM_BUILTIN_WSUBADDHX,
23621
23622   ARM_BUILTIN_WABSDIFFB,
23623   ARM_BUILTIN_WABSDIFFH,
23624   ARM_BUILTIN_WABSDIFFW,
23625
23626   ARM_BUILTIN_WADDCH,
23627   ARM_BUILTIN_WADDCW,
23628
23629   ARM_BUILTIN_WAVG4,
23630   ARM_BUILTIN_WAVG4R,
23631
23632   ARM_BUILTIN_WMADDSX,
23633   ARM_BUILTIN_WMADDUX,
23634
23635   ARM_BUILTIN_WMADDSN,
23636   ARM_BUILTIN_WMADDUN,
23637
23638   ARM_BUILTIN_WMULWSM,
23639   ARM_BUILTIN_WMULWUM,
23640
23641   ARM_BUILTIN_WMULWSMR,
23642   ARM_BUILTIN_WMULWUMR,
23643
23644   ARM_BUILTIN_WMULWL,
23645
23646   ARM_BUILTIN_WMULSMR,
23647   ARM_BUILTIN_WMULUMR,
23648
23649   ARM_BUILTIN_WQMULM,
23650   ARM_BUILTIN_WQMULMR,
23651
23652   ARM_BUILTIN_WQMULWM,
23653   ARM_BUILTIN_WQMULWMR,
23654
23655   ARM_BUILTIN_WADDBHUSM,
23656   ARM_BUILTIN_WADDBHUSL,
23657
23658   ARM_BUILTIN_WQMIABB,
23659   ARM_BUILTIN_WQMIABT,
23660   ARM_BUILTIN_WQMIATB,
23661   ARM_BUILTIN_WQMIATT,
23662
23663   ARM_BUILTIN_WQMIABBN,
23664   ARM_BUILTIN_WQMIABTN,
23665   ARM_BUILTIN_WQMIATBN,
23666   ARM_BUILTIN_WQMIATTN,
23667
23668   ARM_BUILTIN_WMIABB,
23669   ARM_BUILTIN_WMIABT,
23670   ARM_BUILTIN_WMIATB,
23671   ARM_BUILTIN_WMIATT,
23672
23673   ARM_BUILTIN_WMIABBN,
23674   ARM_BUILTIN_WMIABTN,
23675   ARM_BUILTIN_WMIATBN,
23676   ARM_BUILTIN_WMIATTN,
23677
23678   ARM_BUILTIN_WMIAWBB,
23679   ARM_BUILTIN_WMIAWBT,
23680   ARM_BUILTIN_WMIAWTB,
23681   ARM_BUILTIN_WMIAWTT,
23682
23683   ARM_BUILTIN_WMIAWBBN,
23684   ARM_BUILTIN_WMIAWBTN,
23685   ARM_BUILTIN_WMIAWTBN,
23686   ARM_BUILTIN_WMIAWTTN,
23687
23688   ARM_BUILTIN_WMERGE,
23689
23690   ARM_BUILTIN_CRC32B,
23691   ARM_BUILTIN_CRC32H,
23692   ARM_BUILTIN_CRC32W,
23693   ARM_BUILTIN_CRC32CB,
23694   ARM_BUILTIN_CRC32CH,
23695   ARM_BUILTIN_CRC32CW,
23696
23697   ARM_BUILTIN_GET_FPSCR,
23698   ARM_BUILTIN_SET_FPSCR,
23699
23700 #undef CRYPTO1
23701 #undef CRYPTO2
23702 #undef CRYPTO3
23703
23704 #define CRYPTO1(L, U, M1, M2) \
23705   ARM_BUILTIN_CRYPTO_##U,
23706 #define CRYPTO2(L, U, M1, M2, M3) \
23707   ARM_BUILTIN_CRYPTO_##U,
23708 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23709   ARM_BUILTIN_CRYPTO_##U,
23710
23711 #include "crypto.def"
23712
23713 #undef CRYPTO1
23714 #undef CRYPTO2
23715 #undef CRYPTO3
23716
23717 #include "arm_neon_builtins.def"
23718
23719   ,ARM_BUILTIN_MAX
23720 };
23721
23722 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23723
23724 #undef CF
23725 #undef VAR1
23726 #undef VAR2
23727 #undef VAR3
23728 #undef VAR4
23729 #undef VAR5
23730 #undef VAR6
23731 #undef VAR7
23732 #undef VAR8
23733 #undef VAR9
23734 #undef VAR10
23735
23736 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23737
23738 #define NUM_DREG_TYPES 5
23739 #define NUM_QREG_TYPES 6
23740
23741 static void
23742 arm_init_neon_builtins (void)
23743 {
23744   unsigned int i, fcode;
23745   tree decl;
23746
23747   tree neon_intQI_type_node;
23748   tree neon_intHI_type_node;
23749   tree neon_floatHF_type_node;
23750   tree neon_polyQI_type_node;
23751   tree neon_polyHI_type_node;
23752   tree neon_intSI_type_node;
23753   tree neon_intDI_type_node;
23754   tree neon_intUTI_type_node;
23755   tree neon_float_type_node;
23756
23757   tree intQI_pointer_node;
23758   tree intHI_pointer_node;
23759   tree intSI_pointer_node;
23760   tree intDI_pointer_node;
23761   tree float_pointer_node;
23762
23763   tree const_intQI_node;
23764   tree const_intHI_node;
23765   tree const_intSI_node;
23766   tree const_intDI_node;
23767   tree const_float_node;
23768
23769   tree const_intQI_pointer_node;
23770   tree const_intHI_pointer_node;
23771   tree const_intSI_pointer_node;
23772   tree const_intDI_pointer_node;
23773   tree const_float_pointer_node;
23774
23775   tree V8QI_type_node;
23776   tree V4HI_type_node;
23777   tree V4UHI_type_node;
23778   tree V4HF_type_node;
23779   tree V2SI_type_node;
23780   tree V2USI_type_node;
23781   tree V2SF_type_node;
23782   tree V16QI_type_node;
23783   tree V8HI_type_node;
23784   tree V8UHI_type_node;
23785   tree V4SI_type_node;
23786   tree V4USI_type_node;
23787   tree V4SF_type_node;
23788   tree V2DI_type_node;
23789   tree V2UDI_type_node;
23790
23791   tree intUQI_type_node;
23792   tree intUHI_type_node;
23793   tree intUSI_type_node;
23794   tree intUDI_type_node;
23795
23796   tree intEI_type_node;
23797   tree intOI_type_node;
23798   tree intCI_type_node;
23799   tree intXI_type_node;
23800
23801   tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23802   tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23803   tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23804
23805   /* Create distinguished type nodes for NEON vector element types,
23806      and pointers to values of such types, so we can detect them later.  */
23807   neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23808   neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23809   neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23810   neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23811   neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23812   neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23813   neon_float_type_node = make_node (REAL_TYPE);
23814   TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23815   layout_type (neon_float_type_node);
23816   neon_floatHF_type_node = make_node (REAL_TYPE);
23817   TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23818   layout_type (neon_floatHF_type_node);
23819
23820   /* Define typedefs which exactly correspond to the modes we are basing vector
23821      types on.  If you change these names you'll need to change
23822      the table used by arm_mangle_type too.  */
23823   (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23824                                              "__builtin_neon_qi");
23825   (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23826                                              "__builtin_neon_hi");
23827   (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23828                                              "__builtin_neon_hf");
23829   (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23830                                              "__builtin_neon_si");
23831   (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23832                                              "__builtin_neon_sf");
23833   (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23834                                              "__builtin_neon_di");
23835   (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23836                                              "__builtin_neon_poly8");
23837   (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23838                                              "__builtin_neon_poly16");
23839
23840   intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23841   intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23842   intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23843   intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23844   float_pointer_node = build_pointer_type (neon_float_type_node);
23845
23846   /* Next create constant-qualified versions of the above types.  */
23847   const_intQI_node = build_qualified_type (neon_intQI_type_node,
23848                                            TYPE_QUAL_CONST);
23849   const_intHI_node = build_qualified_type (neon_intHI_type_node,
23850                                            TYPE_QUAL_CONST);
23851   const_intSI_node = build_qualified_type (neon_intSI_type_node,
23852                                            TYPE_QUAL_CONST);
23853   const_intDI_node = build_qualified_type (neon_intDI_type_node,
23854                                            TYPE_QUAL_CONST);
23855   const_float_node = build_qualified_type (neon_float_type_node,
23856                                            TYPE_QUAL_CONST);
23857
23858   const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23859   const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23860   const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23861   const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23862   const_float_pointer_node = build_pointer_type (const_float_node);
23863
23864   /* Unsigned integer types for various mode sizes.  */
23865   intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23866   intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23867   intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23868   intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23869   neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23870   /* Now create vector types based on our NEON element types.  */
23871   /* 64-bit vectors.  */
23872   V8QI_type_node =
23873     build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23874   V4HI_type_node =
23875     build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23876   V4UHI_type_node =
23877     build_vector_type_for_mode (intUHI_type_node, V4HImode);
23878   V4HF_type_node =
23879     build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23880   V2SI_type_node =
23881     build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23882   V2USI_type_node =
23883     build_vector_type_for_mode (intUSI_type_node, V2SImode);
23884   V2SF_type_node =
23885     build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23886   /* 128-bit vectors.  */
23887   V16QI_type_node =
23888     build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23889   V8HI_type_node =
23890     build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23891   V8UHI_type_node =
23892     build_vector_type_for_mode (intUHI_type_node, V8HImode);
23893   V4SI_type_node =
23894     build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23895   V4USI_type_node =
23896     build_vector_type_for_mode (intUSI_type_node, V4SImode);
23897   V4SF_type_node =
23898     build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23899   V2DI_type_node =
23900     build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23901   V2UDI_type_node =
23902     build_vector_type_for_mode (intUDI_type_node, V2DImode);
23903
23904
23905   (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23906                                              "__builtin_neon_uqi");
23907   (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23908                                              "__builtin_neon_uhi");
23909   (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23910                                              "__builtin_neon_usi");
23911   (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23912                                              "__builtin_neon_udi");
23913   (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23914                                              "__builtin_neon_poly64");
23915   (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23916                                              "__builtin_neon_poly128");
23917
23918   /* Opaque integer types for structures of vectors.  */
23919   intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23920   intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23921   intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23922   intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23923
23924   (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23925                                              "__builtin_neon_ti");
23926   (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23927                                              "__builtin_neon_ei");
23928   (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23929                                              "__builtin_neon_oi");
23930   (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23931                                              "__builtin_neon_ci");
23932   (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23933                                              "__builtin_neon_xi");
23934
23935   if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23936   {
23937
23938     tree V16UQI_type_node =
23939       build_vector_type_for_mode (intUQI_type_node, V16QImode);
23940
23941     tree v16uqi_ftype_v16uqi
23942       = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23943
23944     tree v16uqi_ftype_v16uqi_v16uqi
23945       = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23946                                   V16UQI_type_node, NULL_TREE);
23947
23948     tree v4usi_ftype_v4usi
23949       = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23950
23951     tree v4usi_ftype_v4usi_v4usi
23952       = build_function_type_list (V4USI_type_node, V4USI_type_node,
23953                                   V4USI_type_node, NULL_TREE);
23954
23955     tree v4usi_ftype_v4usi_v4usi_v4usi
23956       = build_function_type_list (V4USI_type_node, V4USI_type_node,
23957                                   V4USI_type_node, V4USI_type_node, NULL_TREE);
23958
23959     tree uti_ftype_udi_udi
23960       = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23961                                   intUDI_type_node, NULL_TREE);
23962
23963     #undef CRYPTO1
23964     #undef CRYPTO2
23965     #undef CRYPTO3
23966     #undef C
23967     #undef N
23968     #undef CF
23969     #undef FT1
23970     #undef FT2
23971     #undef FT3
23972
23973     #define C(U) \
23974       ARM_BUILTIN_CRYPTO_##U
23975     #define N(L) \
23976       "__builtin_arm_crypto_"#L
23977     #define FT1(R, A) \
23978       R##_ftype_##A
23979     #define FT2(R, A1, A2) \
23980       R##_ftype_##A1##_##A2
23981     #define FT3(R, A1, A2, A3) \
23982       R##_ftype_##A1##_##A2##_##A3
23983     #define CRYPTO1(L, U, R, A) \
23984       arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23985                                                        C (U), BUILT_IN_MD, \
23986                                                        NULL, NULL_TREE);
23987     #define CRYPTO2(L, U, R, A1, A2) \
23988       arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23989                                                        C (U), BUILT_IN_MD, \
23990                                                        NULL, NULL_TREE);
23991
23992     #define CRYPTO3(L, U, R, A1, A2, A3) \
23993       arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23994                                                        C (U), BUILT_IN_MD, \
23995                                                        NULL, NULL_TREE);
23996     #include "crypto.def"
23997
23998     #undef CRYPTO1
23999     #undef CRYPTO2
24000     #undef CRYPTO3
24001     #undef C
24002     #undef N
24003     #undef FT1
24004     #undef FT2
24005     #undef FT3
24006   }
24007   dreg_types[0] = V8QI_type_node;
24008   dreg_types[1] = V4HI_type_node;
24009   dreg_types[2] = V2SI_type_node;
24010   dreg_types[3] = V2SF_type_node;
24011   dreg_types[4] = neon_intDI_type_node;
24012
24013   qreg_types[0] = V16QI_type_node;
24014   qreg_types[1] = V8HI_type_node;
24015   qreg_types[2] = V4SI_type_node;
24016   qreg_types[3] = V4SF_type_node;
24017   qreg_types[4] = V2DI_type_node;
24018   qreg_types[5] = neon_intUTI_type_node;
24019
24020   for (i = 0; i < NUM_QREG_TYPES; i++)
24021     {
24022       int j;
24023       for (j = 0; j < NUM_QREG_TYPES; j++)
24024         {
24025           if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
24026             reinterp_ftype_dreg[i][j]
24027               = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
24028
24029           reinterp_ftype_qreg[i][j]
24030             = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
24031         }
24032     }
24033
24034   for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
24035        i < ARRAY_SIZE (neon_builtin_data);
24036        i++, fcode++)
24037     {
24038       neon_builtin_datum *d = &neon_builtin_data[i];
24039
24040       const char* const modenames[] = {
24041         "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
24042         "v16qi", "v8hi", "v4si", "v4sf", "v2di",
24043         "ti", "ei", "oi"
24044       };
24045       char namebuf[60];
24046       tree ftype = NULL;
24047       int is_load = 0, is_store = 0;
24048
24049       gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
24050
24051       d->fcode = fcode;
24052
24053       switch (d->itype)
24054         {
24055         case NEON_LOAD1:
24056         case NEON_LOAD1LANE:
24057         case NEON_LOADSTRUCT:
24058         case NEON_LOADSTRUCTLANE:
24059           is_load = 1;
24060           /* Fall through.  */
24061         case NEON_STORE1:
24062         case NEON_STORE1LANE:
24063         case NEON_STORESTRUCT:
24064         case NEON_STORESTRUCTLANE:
24065           if (!is_load)
24066             is_store = 1;
24067           /* Fall through.  */
24068         case NEON_UNOP:
24069         case NEON_RINT:
24070         case NEON_BINOP:
24071         case NEON_LOGICBINOP:
24072         case NEON_SHIFTINSERT:
24073         case NEON_TERNOP:
24074         case NEON_GETLANE:
24075         case NEON_SETLANE:
24076         case NEON_CREATE:
24077         case NEON_DUP:
24078         case NEON_DUPLANE:
24079         case NEON_SHIFTIMM:
24080         case NEON_SHIFTACC:
24081         case NEON_COMBINE:
24082         case NEON_SPLIT:
24083         case NEON_CONVERT:
24084         case NEON_FIXCONV:
24085         case NEON_LANEMUL:
24086         case NEON_LANEMULL:
24087         case NEON_LANEMULH:
24088         case NEON_LANEMAC:
24089         case NEON_SCALARMUL:
24090         case NEON_SCALARMULL:
24091         case NEON_SCALARMULH:
24092         case NEON_SCALARMAC:
24093         case NEON_SELECT:
24094         case NEON_VTBL:
24095         case NEON_VTBX:
24096           {
24097             int k;
24098             tree return_type = void_type_node, args = void_list_node;
24099
24100             /* Build a function type directly from the insn_data for
24101                this builtin.  The build_function_type() function takes
24102                care of removing duplicates for us.  */
24103             for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
24104               {
24105                 tree eltype;
24106
24107                 if (is_load && k == 1)
24108                   {
24109                     /* Neon load patterns always have the memory
24110                        operand in the operand 1 position.  */
24111                     gcc_assert (insn_data[d->code].operand[k].predicate
24112                                 == neon_struct_operand);
24113
24114                     switch (d->mode)
24115                       {
24116                       case T_V8QI:
24117                       case T_V16QI:
24118                         eltype = const_intQI_pointer_node;
24119                         break;
24120
24121                       case T_V4HI:
24122                       case T_V8HI:
24123                         eltype = const_intHI_pointer_node;
24124                         break;
24125
24126                       case T_V2SI:
24127                       case T_V4SI:
24128                         eltype = const_intSI_pointer_node;
24129                         break;
24130
24131                       case T_V2SF:
24132                       case T_V4SF:
24133                         eltype = const_float_pointer_node;
24134                         break;
24135
24136                       case T_DI:
24137                       case T_V2DI:
24138                         eltype = const_intDI_pointer_node;
24139                         break;
24140
24141                       default: gcc_unreachable ();
24142                       }
24143                   }
24144                 else if (is_store && k == 0)
24145                   {
24146                     /* Similarly, Neon store patterns use operand 0 as
24147                        the memory location to store to.  */
24148                     gcc_assert (insn_data[d->code].operand[k].predicate
24149                                 == neon_struct_operand);
24150
24151                     switch (d->mode)
24152                       {
24153                       case T_V8QI:
24154                       case T_V16QI:
24155                         eltype = intQI_pointer_node;
24156                         break;
24157
24158                       case T_V4HI:
24159                       case T_V8HI:
24160                         eltype = intHI_pointer_node;
24161                         break;
24162
24163                       case T_V2SI:
24164                       case T_V4SI:
24165                         eltype = intSI_pointer_node;
24166                         break;
24167
24168                       case T_V2SF:
24169                       case T_V4SF:
24170                         eltype = float_pointer_node;
24171                         break;
24172
24173                       case T_DI:
24174                       case T_V2DI:
24175                         eltype = intDI_pointer_node;
24176                         break;
24177
24178                       default: gcc_unreachable ();
24179                       }
24180                   }
24181                 else
24182                   {
24183                     switch (insn_data[d->code].operand[k].mode)
24184                       {
24185                       case VOIDmode: eltype = void_type_node; break;
24186                         /* Scalars.  */
24187                       case QImode: eltype = neon_intQI_type_node; break;
24188                       case HImode: eltype = neon_intHI_type_node; break;
24189                       case SImode: eltype = neon_intSI_type_node; break;
24190                       case SFmode: eltype = neon_float_type_node; break;
24191                       case DImode: eltype = neon_intDI_type_node; break;
24192                       case TImode: eltype = intTI_type_node; break;
24193                       case EImode: eltype = intEI_type_node; break;
24194                       case OImode: eltype = intOI_type_node; break;
24195                       case CImode: eltype = intCI_type_node; break;
24196                       case XImode: eltype = intXI_type_node; break;
24197                         /* 64-bit vectors.  */
24198                       case V8QImode: eltype = V8QI_type_node; break;
24199                       case V4HImode: eltype = V4HI_type_node; break;
24200                       case V2SImode: eltype = V2SI_type_node; break;
24201                       case V2SFmode: eltype = V2SF_type_node; break;
24202                         /* 128-bit vectors.  */
24203                       case V16QImode: eltype = V16QI_type_node; break;
24204                       case V8HImode: eltype = V8HI_type_node; break;
24205                       case V4SImode: eltype = V4SI_type_node; break;
24206                       case V4SFmode: eltype = V4SF_type_node; break;
24207                       case V2DImode: eltype = V2DI_type_node; break;
24208                       default: gcc_unreachable ();
24209                       }
24210                   }
24211
24212                 if (k == 0 && !is_store)
24213                   return_type = eltype;
24214                 else
24215                   args = tree_cons (NULL_TREE, eltype, args);
24216               }
24217
24218             ftype = build_function_type (return_type, args);
24219           }
24220           break;
24221
24222         case NEON_REINTERP:
24223           {
24224             /* We iterate over NUM_DREG_TYPES doubleword types,
24225                then NUM_QREG_TYPES quadword  types.
24226                V4HF is not a type used in reinterpret, so we translate
24227                d->mode to the correct index in reinterp_ftype_dreg.  */
24228             bool qreg_p
24229               = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24230             int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24231                       % NUM_QREG_TYPES;
24232             switch (insn_data[d->code].operand[0].mode)
24233               {
24234               case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24235               case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24236               case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24237               case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24238               case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24239               case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24240               case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24241               case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24242               case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24243               case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24244               case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24245               default: gcc_unreachable ();
24246               }
24247           }
24248           break;
24249         case NEON_FLOAT_WIDEN:
24250           {
24251             tree eltype = NULL_TREE;
24252             tree return_type = NULL_TREE;
24253
24254             switch (insn_data[d->code].operand[1].mode)
24255             {
24256               case V4HFmode:
24257                 eltype = V4HF_type_node;
24258                 return_type = V4SF_type_node;
24259                 break;
24260               default: gcc_unreachable ();
24261             }
24262             ftype = build_function_type_list (return_type, eltype, NULL);
24263             break;
24264           }
24265         case NEON_FLOAT_NARROW:
24266           {
24267             tree eltype = NULL_TREE;
24268             tree return_type = NULL_TREE;
24269
24270             switch (insn_data[d->code].operand[1].mode)
24271             {
24272               case V4SFmode:
24273                 eltype = V4SF_type_node;
24274                 return_type = V4HF_type_node;
24275                 break;
24276               default: gcc_unreachable ();
24277             }
24278             ftype = build_function_type_list (return_type, eltype, NULL);
24279             break;
24280           }
24281         case NEON_BSWAP:
24282         {
24283             tree eltype = NULL_TREE;
24284             switch (insn_data[d->code].operand[1].mode)
24285             {
24286               case V4HImode:
24287                 eltype = V4UHI_type_node;
24288                 break;
24289               case V8HImode:
24290                 eltype = V8UHI_type_node;
24291                 break;
24292               case V2SImode:
24293                 eltype = V2USI_type_node;
24294                 break;
24295               case V4SImode:
24296                 eltype = V4USI_type_node;
24297                 break;
24298               case V2DImode:
24299                 eltype = V2UDI_type_node;
24300                 break;
24301               default: gcc_unreachable ();
24302             }
24303             ftype = build_function_type_list (eltype, eltype, NULL);
24304             break;
24305         }
24306         case NEON_COPYSIGNF:
24307           {
24308             tree eltype = NULL_TREE;
24309             switch (insn_data[d->code].operand[1].mode)
24310               {
24311               case V2SFmode:
24312                 eltype = V2SF_type_node;
24313                 break;
24314               case V4SFmode:
24315                 eltype = V4SF_type_node;
24316                 break;
24317               default: gcc_unreachable ();
24318               }
24319             ftype = build_function_type_list (eltype, eltype, NULL);
24320             break;
24321           }
24322         default:
24323           gcc_unreachable ();
24324         }
24325
24326       gcc_assert (ftype != NULL);
24327
24328       sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24329
24330       decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24331                                    NULL_TREE);
24332       arm_builtin_decls[fcode] = decl;
24333     }
24334 }
24335
24336 #undef NUM_DREG_TYPES
24337 #undef NUM_QREG_TYPES
24338
24339 #define def_mbuiltin(MASK, NAME, TYPE, CODE)                            \
24340   do                                                                    \
24341     {                                                                   \
24342       if ((MASK) & insn_flags)                                          \
24343         {                                                               \
24344           tree bdecl;                                                   \
24345           bdecl = add_builtin_function ((NAME), (TYPE), (CODE),         \
24346                                         BUILT_IN_MD, NULL, NULL_TREE);  \
24347           arm_builtin_decls[CODE] = bdecl;                              \
24348         }                                                               \
24349     }                                                                   \
24350   while (0)
24351
24352 struct builtin_description
24353 {
24354   const unsigned int       mask;
24355   const enum insn_code     icode;
24356   const char * const       name;
24357   const enum arm_builtins  code;
24358   const enum rtx_code      comparison;
24359   const unsigned int       flag;
24360 };
24361
24362 static const struct builtin_description bdesc_2arg[] =
24363 {
24364 #define IWMMXT_BUILTIN(code, string, builtin) \
24365   { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24366     ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24367
24368 #define IWMMXT2_BUILTIN(code, string, builtin) \
24369   { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24370     ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24371
24372   IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24373   IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24374   IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24375   IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24376   IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24377   IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24378   IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24379   IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24380   IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24381   IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24382   IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24383   IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24384   IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24385   IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24386   IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24387   IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24388   IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24389   IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24390   IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24391   IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24392   IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24393   IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24394   IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24395   IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24396   IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24397   IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24398   IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24399   IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24400   IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24401   IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24402   IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24403   IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24404   IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24405   IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24406   IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24407   IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24408   IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24409   IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24410   IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24411   IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24412   IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24413   IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24414   IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24415   IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24416   IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24417   IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24418   IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24419   IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24420   IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24421   IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24422   IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24423   IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24424   IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24425   IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24426   IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24427   IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24428   IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24429   IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24430   IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24431   IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24432   IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24433   IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24434   IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24435   IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24436   IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24437   IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24438   IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24439   IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24440   IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24441   IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24442   IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24443   IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24444   IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24445   IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24446   IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24447   IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24448   IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24449   IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24450
24451 #define IWMMXT_BUILTIN2(code, builtin) \
24452   { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24453
24454 #define IWMMXT2_BUILTIN2(code, builtin) \
24455   { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24456
24457   IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24458   IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24459   IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24460   IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24461   IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24462   IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24463   IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24464   IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24465   IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24466   IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24467
24468
24469 #define FP_BUILTIN(L, U) \
24470   {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24471    UNKNOWN, 0},
24472
24473   FP_BUILTIN (get_fpscr, GET_FPSCR)
24474   FP_BUILTIN (set_fpscr, SET_FPSCR)
24475 #undef FP_BUILTIN
24476
24477 #define CRC32_BUILTIN(L, U) \
24478   {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24479    UNKNOWN, 0},
24480    CRC32_BUILTIN (crc32b, CRC32B)
24481    CRC32_BUILTIN (crc32h, CRC32H)
24482    CRC32_BUILTIN (crc32w, CRC32W)
24483    CRC32_BUILTIN (crc32cb, CRC32CB)
24484    CRC32_BUILTIN (crc32ch, CRC32CH)
24485    CRC32_BUILTIN (crc32cw, CRC32CW)
24486 #undef CRC32_BUILTIN
24487
24488
24489 #define CRYPTO_BUILTIN(L, U) \
24490   {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24491    UNKNOWN, 0},
24492 #undef CRYPTO1
24493 #undef CRYPTO2
24494 #undef CRYPTO3
24495 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24496 #define CRYPTO1(L, U, R, A)
24497 #define CRYPTO3(L, U, R, A1, A2, A3)
24498 #include "crypto.def"
24499 #undef CRYPTO1
24500 #undef CRYPTO2
24501 #undef CRYPTO3
24502
24503 };
24504
24505 static const struct builtin_description bdesc_1arg[] =
24506 {
24507   IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24508   IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24509   IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24510   IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24511   IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24512   IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24513   IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24514   IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24515   IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24516   IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24517   IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24518   IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24519   IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24520   IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24521   IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24522   IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24523   IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24524   IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24525   IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24526   IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24527   IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24528   IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24529   IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24530   IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24531
24532 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24533 #define CRYPTO2(L, U, R, A1, A2)
24534 #define CRYPTO3(L, U, R, A1, A2, A3)
24535 #include "crypto.def"
24536 #undef CRYPTO1
24537 #undef CRYPTO2
24538 #undef CRYPTO3
24539 };
24540
24541 static const struct builtin_description bdesc_3arg[] =
24542 {
24543 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24544 #define CRYPTO1(L, U, R, A)
24545 #define CRYPTO2(L, U, R, A1, A2)
24546 #include "crypto.def"
24547 #undef CRYPTO1
24548 #undef CRYPTO2
24549 #undef CRYPTO3
24550  };
24551 #undef CRYPTO_BUILTIN
24552
24553 /* Set up all the iWMMXt builtins.  This is not called if
24554    TARGET_IWMMXT is zero.  */
24555
24556 static void
24557 arm_init_iwmmxt_builtins (void)
24558 {
24559   const struct builtin_description * d;
24560   size_t i;
24561
24562   tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24563   tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24564   tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24565
24566   tree v8qi_ftype_v8qi_v8qi_int
24567     = build_function_type_list (V8QI_type_node,
24568                                 V8QI_type_node, V8QI_type_node,
24569                                 integer_type_node, NULL_TREE);
24570   tree v4hi_ftype_v4hi_int
24571     = build_function_type_list (V4HI_type_node,
24572                                 V4HI_type_node, integer_type_node, NULL_TREE);
24573   tree v2si_ftype_v2si_int
24574     = build_function_type_list (V2SI_type_node,
24575                                 V2SI_type_node, integer_type_node, NULL_TREE);
24576   tree v2si_ftype_di_di
24577     = build_function_type_list (V2SI_type_node,
24578                                 long_long_integer_type_node,
24579                                 long_long_integer_type_node,
24580                                 NULL_TREE);
24581   tree di_ftype_di_int
24582     = build_function_type_list (long_long_integer_type_node,
24583                                 long_long_integer_type_node,
24584                                 integer_type_node, NULL_TREE);
24585   tree di_ftype_di_int_int
24586     = build_function_type_list (long_long_integer_type_node,
24587                                 long_long_integer_type_node,
24588                                 integer_type_node,
24589                                 integer_type_node, NULL_TREE);
24590   tree int_ftype_v8qi
24591     = build_function_type_list (integer_type_node,
24592                                 V8QI_type_node, NULL_TREE);
24593   tree int_ftype_v4hi
24594     = build_function_type_list (integer_type_node,
24595                                 V4HI_type_node, NULL_TREE);
24596   tree int_ftype_v2si
24597     = build_function_type_list (integer_type_node,
24598                                 V2SI_type_node, NULL_TREE);
24599   tree int_ftype_v8qi_int
24600     = build_function_type_list (integer_type_node,
24601                                 V8QI_type_node, integer_type_node, NULL_TREE);
24602   tree int_ftype_v4hi_int
24603     = build_function_type_list (integer_type_node,
24604                                 V4HI_type_node, integer_type_node, NULL_TREE);
24605   tree int_ftype_v2si_int
24606     = build_function_type_list (integer_type_node,
24607                                 V2SI_type_node, integer_type_node, NULL_TREE);
24608   tree v8qi_ftype_v8qi_int_int
24609     = build_function_type_list (V8QI_type_node,
24610                                 V8QI_type_node, integer_type_node,
24611                                 integer_type_node, NULL_TREE);
24612   tree v4hi_ftype_v4hi_int_int
24613     = build_function_type_list (V4HI_type_node,
24614                                 V4HI_type_node, integer_type_node,
24615                                 integer_type_node, NULL_TREE);
24616   tree v2si_ftype_v2si_int_int
24617     = build_function_type_list (V2SI_type_node,
24618                                 V2SI_type_node, integer_type_node,
24619                                 integer_type_node, NULL_TREE);
24620   /* Miscellaneous.  */
24621   tree v8qi_ftype_v4hi_v4hi
24622     = build_function_type_list (V8QI_type_node,
24623                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
24624   tree v4hi_ftype_v2si_v2si
24625     = build_function_type_list (V4HI_type_node,
24626                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
24627   tree v8qi_ftype_v4hi_v8qi
24628     = build_function_type_list (V8QI_type_node,
24629                                 V4HI_type_node, V8QI_type_node, NULL_TREE);
24630   tree v2si_ftype_v4hi_v4hi
24631     = build_function_type_list (V2SI_type_node,
24632                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
24633   tree v2si_ftype_v8qi_v8qi
24634     = build_function_type_list (V2SI_type_node,
24635                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
24636   tree v4hi_ftype_v4hi_di
24637     = build_function_type_list (V4HI_type_node,
24638                                 V4HI_type_node, long_long_integer_type_node,
24639                                 NULL_TREE);
24640   tree v2si_ftype_v2si_di
24641     = build_function_type_list (V2SI_type_node,
24642                                 V2SI_type_node, long_long_integer_type_node,
24643                                 NULL_TREE);
24644   tree di_ftype_void
24645     = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24646   tree int_ftype_void
24647     = build_function_type_list (integer_type_node, NULL_TREE);
24648   tree di_ftype_v8qi
24649     = build_function_type_list (long_long_integer_type_node,
24650                                 V8QI_type_node, NULL_TREE);
24651   tree di_ftype_v4hi
24652     = build_function_type_list (long_long_integer_type_node,
24653                                 V4HI_type_node, NULL_TREE);
24654   tree di_ftype_v2si
24655     = build_function_type_list (long_long_integer_type_node,
24656                                 V2SI_type_node, NULL_TREE);
24657   tree v2si_ftype_v4hi
24658     = build_function_type_list (V2SI_type_node,
24659                                 V4HI_type_node, NULL_TREE);
24660   tree v4hi_ftype_v8qi
24661     = build_function_type_list (V4HI_type_node,
24662                                 V8QI_type_node, NULL_TREE);
24663   tree v8qi_ftype_v8qi
24664     = build_function_type_list (V8QI_type_node,
24665                                 V8QI_type_node, NULL_TREE);
24666   tree v4hi_ftype_v4hi
24667     = build_function_type_list (V4HI_type_node,
24668                                 V4HI_type_node, NULL_TREE);
24669   tree v2si_ftype_v2si
24670     = build_function_type_list (V2SI_type_node,
24671                                 V2SI_type_node, NULL_TREE);
24672
24673   tree di_ftype_di_v4hi_v4hi
24674     = build_function_type_list (long_long_unsigned_type_node,
24675                                 long_long_unsigned_type_node,
24676                                 V4HI_type_node, V4HI_type_node,
24677                                 NULL_TREE);
24678
24679   tree di_ftype_v4hi_v4hi
24680     = build_function_type_list (long_long_unsigned_type_node,
24681                                 V4HI_type_node,V4HI_type_node,
24682                                 NULL_TREE);
24683
24684   tree v2si_ftype_v2si_v4hi_v4hi
24685     = build_function_type_list (V2SI_type_node,
24686                                 V2SI_type_node, V4HI_type_node,
24687                                 V4HI_type_node, NULL_TREE);
24688
24689   tree v2si_ftype_v2si_v8qi_v8qi
24690     = build_function_type_list (V2SI_type_node,
24691                                 V2SI_type_node, V8QI_type_node,
24692                                 V8QI_type_node, NULL_TREE);
24693
24694   tree di_ftype_di_v2si_v2si
24695      = build_function_type_list (long_long_unsigned_type_node,
24696                                  long_long_unsigned_type_node,
24697                                  V2SI_type_node, V2SI_type_node,
24698                                  NULL_TREE);
24699
24700    tree di_ftype_di_di_int
24701      = build_function_type_list (long_long_unsigned_type_node,
24702                                  long_long_unsigned_type_node,
24703                                  long_long_unsigned_type_node,
24704                                  integer_type_node, NULL_TREE);
24705
24706    tree void_ftype_int
24707      = build_function_type_list (void_type_node,
24708                                  integer_type_node, NULL_TREE);
24709
24710    tree v8qi_ftype_char
24711      = build_function_type_list (V8QI_type_node,
24712                                  signed_char_type_node, NULL_TREE);
24713
24714    tree v4hi_ftype_short
24715      = build_function_type_list (V4HI_type_node,
24716                                  short_integer_type_node, NULL_TREE);
24717
24718    tree v2si_ftype_int
24719      = build_function_type_list (V2SI_type_node,
24720                                  integer_type_node, NULL_TREE);
24721
24722   /* Normal vector binops.  */
24723   tree v8qi_ftype_v8qi_v8qi
24724     = build_function_type_list (V8QI_type_node,
24725                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
24726   tree v4hi_ftype_v4hi_v4hi
24727     = build_function_type_list (V4HI_type_node,
24728                                 V4HI_type_node,V4HI_type_node, NULL_TREE);
24729   tree v2si_ftype_v2si_v2si
24730     = build_function_type_list (V2SI_type_node,
24731                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
24732   tree di_ftype_di_di
24733     = build_function_type_list (long_long_unsigned_type_node,
24734                                 long_long_unsigned_type_node,
24735                                 long_long_unsigned_type_node,
24736                                 NULL_TREE);
24737
24738   /* Add all builtins that are more or less simple operations on two
24739      operands.  */
24740   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24741     {
24742       /* Use one of the operands; the target can have a different mode for
24743          mask-generating compares.  */
24744       machine_mode mode;
24745       tree type;
24746
24747       if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24748         continue;
24749
24750       mode = insn_data[d->icode].operand[1].mode;
24751
24752       switch (mode)
24753         {
24754         case V8QImode:
24755           type = v8qi_ftype_v8qi_v8qi;
24756           break;
24757         case V4HImode:
24758           type = v4hi_ftype_v4hi_v4hi;
24759           break;
24760         case V2SImode:
24761           type = v2si_ftype_v2si_v2si;
24762           break;
24763         case DImode:
24764           type = di_ftype_di_di;
24765           break;
24766
24767         default:
24768           gcc_unreachable ();
24769         }
24770
24771       def_mbuiltin (d->mask, d->name, type, d->code);
24772     }
24773
24774   /* Add the remaining MMX insns with somewhat more complicated types.  */
24775 #define iwmmx_mbuiltin(NAME, TYPE, CODE)                        \
24776   def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE),       \
24777                 ARM_BUILTIN_ ## CODE)
24778
24779 #define iwmmx2_mbuiltin(NAME, TYPE, CODE)                      \
24780   def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE),     \
24781                ARM_BUILTIN_ ## CODE)
24782
24783   iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24784   iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24785   iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24786   iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24787   iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24788   iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24789   iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24790   iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24791   iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24792
24793   iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24794   iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24795   iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24796   iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24797   iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24798   iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24799
24800   iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24801   iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24802   iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24803   iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24804   iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24805   iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24806
24807   iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24808   iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24809   iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24810   iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24811   iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24812   iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24813
24814   iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24815   iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24816   iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24817   iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24818   iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24819   iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24820
24821   iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24822
24823   iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24824   iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24825   iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24826   iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24827   iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24828   iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24829   iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24830   iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24831   iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24832   iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24833
24834   iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24835   iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24836   iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24837   iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24838   iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24839   iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24840   iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24841   iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24842   iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24843
24844   iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24845   iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24846   iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24847
24848   iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24849   iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24850   iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24851
24852   iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24853   iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24854
24855   iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24856   iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24857   iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24858   iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24859   iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24860   iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24861
24862   iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24863   iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24864   iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24865   iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24866   iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24867   iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24868   iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24869   iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24870   iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24871   iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24872   iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24873   iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24874
24875   iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24876   iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24877   iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24878   iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24879
24880   iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24881   iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24882   iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24883   iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24884   iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24885   iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24886   iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24887
24888   iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24889   iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24890   iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24891
24892   iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24893   iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24894   iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24895   iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24896
24897   iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24898   iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24899   iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24900   iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24901
24902   iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24903   iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24904   iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24905   iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24906
24907   iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24908   iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24909   iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24910   iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24911
24912   iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24913   iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24914   iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24915   iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24916
24917   iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24918   iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24919   iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24920   iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24921
24922   iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24923
24924   iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24925   iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24926   iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24927
24928 #undef iwmmx_mbuiltin
24929 #undef iwmmx2_mbuiltin
24930 }
24931
24932 static void
24933 arm_init_fp16_builtins (void)
24934 {
24935   tree fp16_type = make_node (REAL_TYPE);
24936   TYPE_PRECISION (fp16_type) = 16;
24937   layout_type (fp16_type);
24938   (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24939 }
24940
24941 static void
24942 arm_init_crc32_builtins ()
24943 {
24944   tree si_ftype_si_qi
24945     = build_function_type_list (unsigned_intSI_type_node,
24946                                 unsigned_intSI_type_node,
24947                                 unsigned_intQI_type_node, NULL_TREE);
24948   tree si_ftype_si_hi
24949     = build_function_type_list (unsigned_intSI_type_node,
24950                                 unsigned_intSI_type_node,
24951                                 unsigned_intHI_type_node, NULL_TREE);
24952   tree si_ftype_si_si
24953     = build_function_type_list (unsigned_intSI_type_node,
24954                                 unsigned_intSI_type_node,
24955                                 unsigned_intSI_type_node, NULL_TREE);
24956
24957   arm_builtin_decls[ARM_BUILTIN_CRC32B]
24958     = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24959                             ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24960   arm_builtin_decls[ARM_BUILTIN_CRC32H]
24961     = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24962                             ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24963   arm_builtin_decls[ARM_BUILTIN_CRC32W]
24964     = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24965                             ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24966   arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24967     = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24968                             ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24969   arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24970     = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24971                             ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24972   arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24973     = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24974                             ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24975 }
24976
24977 static void
24978 arm_init_builtins (void)
24979 {
24980   if (TARGET_REALLY_IWMMXT)
24981     arm_init_iwmmxt_builtins ();
24982
24983   if (TARGET_NEON)
24984     arm_init_neon_builtins ();
24985
24986   if (arm_fp16_format)
24987     arm_init_fp16_builtins ();
24988
24989   if (TARGET_CRC32)
24990     arm_init_crc32_builtins ();
24991
24992   if (TARGET_VFP && TARGET_HARD_FLOAT)
24993     {
24994       tree ftype_set_fpscr
24995         = build_function_type_list (void_type_node, unsigned_type_node, NULL);
24996       tree ftype_get_fpscr
24997         = build_function_type_list (unsigned_type_node, NULL);
24998
24999       arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
25000         = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
25001                                 ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
25002       arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
25003         = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
25004                                 ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
25005     }
25006 }
25007
25008 /* Return the ARM builtin for CODE.  */
25009
25010 static tree
25011 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25012 {
25013   if (code >= ARM_BUILTIN_MAX)
25014     return error_mark_node;
25015
25016   return arm_builtin_decls[code];
25017 }
25018
25019 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
25020
25021 static const char *
25022 arm_invalid_parameter_type (const_tree t)
25023 {
25024   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25025     return N_("function parameters cannot have __fp16 type");
25026   return NULL;
25027 }
25028
25029 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
25030
25031 static const char *
25032 arm_invalid_return_type (const_tree t)
25033 {
25034   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25035     return N_("functions cannot return __fp16 type");
25036   return NULL;
25037 }
25038
25039 /* Implement TARGET_PROMOTED_TYPE.  */
25040
25041 static tree
25042 arm_promoted_type (const_tree t)
25043 {
25044   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25045     return float_type_node;
25046   return NULL_TREE;
25047 }
25048
25049 /* Implement TARGET_CONVERT_TO_TYPE.
25050    Specifically, this hook implements the peculiarity of the ARM
25051    half-precision floating-point C semantics that requires conversions between
25052    __fp16 to or from double to do an intermediate conversion to float.  */
25053
25054 static tree
25055 arm_convert_to_type (tree type, tree expr)
25056 {
25057   tree fromtype = TREE_TYPE (expr);
25058   if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
25059     return NULL_TREE;
25060   if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
25061       || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
25062     return convert (type, convert (float_type_node, expr));
25063   return NULL_TREE;
25064 }
25065
25066 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25067    This simply adds HFmode as a supported mode; even though we don't
25068    implement arithmetic on this type directly, it's supported by
25069    optabs conversions, much the way the double-word arithmetic is
25070    special-cased in the default hook.  */
25071
25072 static bool
25073 arm_scalar_mode_supported_p (machine_mode mode)
25074 {
25075   if (mode == HFmode)
25076     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25077   else if (ALL_FIXED_POINT_MODE_P (mode))
25078     return true;
25079   else
25080     return default_scalar_mode_supported_p (mode);
25081 }
25082
25083 /* Errors in the source file can cause expand_expr to return const0_rtx
25084    where we expect a vector.  To avoid crashing, use one of the vector
25085    clear instructions.  */
25086
25087 static rtx
25088 safe_vector_operand (rtx x, machine_mode mode)
25089 {
25090   if (x != const0_rtx)
25091     return x;
25092   x = gen_reg_rtx (mode);
25093
25094   emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
25095                                : gen_rtx_SUBREG (DImode, x, 0)));
25096   return x;
25097 }
25098
25099 /* Function to expand ternary builtins.  */
25100 static rtx
25101 arm_expand_ternop_builtin (enum insn_code icode,
25102                            tree exp, rtx target)
25103 {
25104   rtx pat;
25105   tree arg0 = CALL_EXPR_ARG (exp, 0);
25106   tree arg1 = CALL_EXPR_ARG (exp, 1);
25107   tree arg2 = CALL_EXPR_ARG (exp, 2);
25108
25109   rtx op0 = expand_normal (arg0);
25110   rtx op1 = expand_normal (arg1);
25111   rtx op2 = expand_normal (arg2);
25112   rtx op3 = NULL_RTX;
25113
25114   /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
25115      lane operand depending on endianness.  */
25116   bool builtin_sha1cpm_p = false;
25117
25118   if (insn_data[icode].n_operands == 5)
25119     {
25120       gcc_assert (icode == CODE_FOR_crypto_sha1c
25121                   || icode == CODE_FOR_crypto_sha1p
25122                   || icode == CODE_FOR_crypto_sha1m);
25123       builtin_sha1cpm_p = true;
25124     }
25125   machine_mode tmode = insn_data[icode].operand[0].mode;
25126   machine_mode mode0 = insn_data[icode].operand[1].mode;
25127   machine_mode mode1 = insn_data[icode].operand[2].mode;
25128   machine_mode mode2 = insn_data[icode].operand[3].mode;
25129
25130
25131   if (VECTOR_MODE_P (mode0))
25132     op0 = safe_vector_operand (op0, mode0);
25133   if (VECTOR_MODE_P (mode1))
25134     op1 = safe_vector_operand (op1, mode1);
25135   if (VECTOR_MODE_P (mode2))
25136     op2 = safe_vector_operand (op2, mode2);
25137
25138   if (! target
25139       || GET_MODE (target) != tmode
25140       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25141     target = gen_reg_rtx (tmode);
25142
25143   gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25144               && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
25145               && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
25146
25147   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25148     op0 = copy_to_mode_reg (mode0, op0);
25149   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25150     op1 = copy_to_mode_reg (mode1, op1);
25151   if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25152     op2 = copy_to_mode_reg (mode2, op2);
25153   if (builtin_sha1cpm_p)
25154     op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25155
25156   if (builtin_sha1cpm_p)
25157     pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
25158   else
25159     pat = GEN_FCN (icode) (target, op0, op1, op2);
25160   if (! pat)
25161     return 0;
25162   emit_insn (pat);
25163   return target;
25164 }
25165
25166 /* Subroutine of arm_expand_builtin to take care of binop insns.  */
25167
25168 static rtx
25169 arm_expand_binop_builtin (enum insn_code icode,
25170                           tree exp, rtx target)
25171 {
25172   rtx pat;
25173   tree arg0 = CALL_EXPR_ARG (exp, 0);
25174   tree arg1 = CALL_EXPR_ARG (exp, 1);
25175   rtx op0 = expand_normal (arg0);
25176   rtx op1 = expand_normal (arg1);
25177   machine_mode tmode = insn_data[icode].operand[0].mode;
25178   machine_mode mode0 = insn_data[icode].operand[1].mode;
25179   machine_mode mode1 = insn_data[icode].operand[2].mode;
25180
25181   if (VECTOR_MODE_P (mode0))
25182     op0 = safe_vector_operand (op0, mode0);
25183   if (VECTOR_MODE_P (mode1))
25184     op1 = safe_vector_operand (op1, mode1);
25185
25186   if (! target
25187       || GET_MODE (target) != tmode
25188       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25189     target = gen_reg_rtx (tmode);
25190
25191   gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25192               && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
25193
25194   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25195     op0 = copy_to_mode_reg (mode0, op0);
25196   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25197     op1 = copy_to_mode_reg (mode1, op1);
25198
25199   pat = GEN_FCN (icode) (target, op0, op1);
25200   if (! pat)
25201     return 0;
25202   emit_insn (pat);
25203   return target;
25204 }
25205
25206 /* Subroutine of arm_expand_builtin to take care of unop insns.  */
25207
25208 static rtx
25209 arm_expand_unop_builtin (enum insn_code icode,
25210                          tree exp, rtx target, int do_load)
25211 {
25212   rtx pat;
25213   tree arg0 = CALL_EXPR_ARG (exp, 0);
25214   rtx op0 = expand_normal (arg0);
25215   rtx op1 = NULL_RTX;
25216   machine_mode tmode = insn_data[icode].operand[0].mode;
25217   machine_mode mode0 = insn_data[icode].operand[1].mode;
25218   bool builtin_sha1h_p = false;
25219
25220   if (insn_data[icode].n_operands == 3)
25221     {
25222       gcc_assert (icode == CODE_FOR_crypto_sha1h);
25223       builtin_sha1h_p = true;
25224     }
25225
25226   if (! target
25227       || GET_MODE (target) != tmode
25228       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25229     target = gen_reg_rtx (tmode);
25230   if (do_load)
25231     op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25232   else
25233     {
25234       if (VECTOR_MODE_P (mode0))
25235         op0 = safe_vector_operand (op0, mode0);
25236
25237       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25238         op0 = copy_to_mode_reg (mode0, op0);
25239     }
25240   if (builtin_sha1h_p)
25241     op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25242
25243   if (builtin_sha1h_p)
25244     pat = GEN_FCN (icode) (target, op0, op1);
25245   else
25246     pat = GEN_FCN (icode) (target, op0);
25247   if (! pat)
25248     return 0;
25249   emit_insn (pat);
25250   return target;
25251 }
25252
25253 typedef enum {
25254   NEON_ARG_COPY_TO_REG,
25255   NEON_ARG_CONSTANT,
25256   NEON_ARG_MEMORY,
25257   NEON_ARG_STOP
25258 } builtin_arg;
25259
25260 #define NEON_MAX_BUILTIN_ARGS 5
25261
25262 /* EXP is a pointer argument to a Neon load or store intrinsic.  Derive
25263    and return an expression for the accessed memory.
25264
25265    The intrinsic function operates on a block of registers that has
25266    mode REG_MODE.  This block contains vectors of type TYPE_MODE.  The
25267    function references the memory at EXP of type TYPE and in mode
25268    MEM_MODE; this mode may be BLKmode if no more suitable mode is
25269    available.  */
25270
25271 static tree
25272 neon_dereference_pointer (tree exp, tree type, machine_mode mem_mode,
25273                           machine_mode reg_mode,
25274                           neon_builtin_type_mode type_mode)
25275 {
25276   HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25277   tree elem_type, upper_bound, array_type;
25278
25279   /* Work out the size of the register block in bytes.  */
25280   reg_size = GET_MODE_SIZE (reg_mode);
25281
25282   /* Work out the size of each vector in bytes.  */
25283   gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25284   vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25285
25286   /* Work out how many vectors there are.  */
25287   gcc_assert (reg_size % vector_size == 0);
25288   nvectors = reg_size / vector_size;
25289
25290   /* Work out the type of each element.  */
25291   gcc_assert (POINTER_TYPE_P (type));
25292   elem_type = TREE_TYPE (type);
25293
25294   /* Work out how many elements are being loaded or stored.
25295      MEM_MODE == REG_MODE implies a one-to-one mapping between register
25296      and memory elements; anything else implies a lane load or store.  */
25297   if (mem_mode == reg_mode)
25298     nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25299   else
25300     nelems = nvectors;
25301
25302   /* Create a type that describes the full access.  */
25303   upper_bound = build_int_cst (size_type_node, nelems - 1);
25304   array_type = build_array_type (elem_type, build_index_type (upper_bound));
25305
25306   /* Dereference EXP using that type.  */
25307   return fold_build2 (MEM_REF, array_type, exp,
25308                       build_int_cst (build_pointer_type (array_type), 0));
25309 }
25310
25311 /* Expand a Neon builtin.  */
25312 static rtx
25313 arm_expand_neon_args (rtx target, int icode, int have_retval,
25314                       neon_builtin_type_mode type_mode,
25315                       tree exp, int fcode, ...)
25316 {
25317   va_list ap;
25318   rtx pat;
25319   tree arg[NEON_MAX_BUILTIN_ARGS];
25320   rtx op[NEON_MAX_BUILTIN_ARGS];
25321   tree arg_type;
25322   tree formals;
25323   machine_mode tmode = insn_data[icode].operand[0].mode;
25324   machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25325   machine_mode other_mode;
25326   int argc = 0;
25327   int opno;
25328
25329   if (have_retval
25330       && (!target
25331           || GET_MODE (target) != tmode
25332           || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25333     target = gen_reg_rtx (tmode);
25334
25335   va_start (ap, fcode);
25336
25337   formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25338
25339   for (;;)
25340     {
25341       builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25342
25343       if (thisarg == NEON_ARG_STOP)
25344         break;
25345       else
25346         {
25347           opno = argc + have_retval;
25348           mode[argc] = insn_data[icode].operand[opno].mode;
25349           arg[argc] = CALL_EXPR_ARG (exp, argc);
25350           arg_type = TREE_VALUE (formals);
25351           if (thisarg == NEON_ARG_MEMORY)
25352             {
25353               other_mode = insn_data[icode].operand[1 - opno].mode;
25354               arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25355                                                     mode[argc], other_mode,
25356                                                     type_mode);
25357             }
25358
25359           /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25360              be returned.  */
25361           op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25362                                   (thisarg == NEON_ARG_MEMORY
25363                                    ? EXPAND_MEMORY : EXPAND_NORMAL));
25364
25365           switch (thisarg)
25366             {
25367             case NEON_ARG_COPY_TO_REG:
25368               /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25369               if (!(*insn_data[icode].operand[opno].predicate)
25370                      (op[argc], mode[argc]))
25371                 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25372               break;
25373
25374             case NEON_ARG_CONSTANT:
25375               /* FIXME: This error message is somewhat unhelpful.  */
25376               if (!(*insn_data[icode].operand[opno].predicate)
25377                     (op[argc], mode[argc]))
25378                 error ("argument must be a constant");
25379               break;
25380
25381             case NEON_ARG_MEMORY:
25382               /* Check if expand failed.  */
25383               if (op[argc] == const0_rtx)
25384                 return 0;
25385               gcc_assert (MEM_P (op[argc]));
25386               PUT_MODE (op[argc], mode[argc]);
25387               /* ??? arm_neon.h uses the same built-in functions for signed
25388                  and unsigned accesses, casting where necessary.  This isn't
25389                  alias safe.  */
25390               set_mem_alias_set (op[argc], 0);
25391               if (!(*insn_data[icode].operand[opno].predicate)
25392                     (op[argc], mode[argc]))
25393                 op[argc] = (replace_equiv_address
25394                             (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25395               break;
25396
25397             case NEON_ARG_STOP:
25398               gcc_unreachable ();
25399             }
25400
25401           argc++;
25402           formals = TREE_CHAIN (formals);
25403         }
25404     }
25405
25406   va_end (ap);
25407
25408   if (have_retval)
25409     switch (argc)
25410       {
25411       case 1:
25412         pat = GEN_FCN (icode) (target, op[0]);
25413         break;
25414
25415       case 2:
25416         pat = GEN_FCN (icode) (target, op[0], op[1]);
25417         break;
25418
25419       case 3:
25420         pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25421         break;
25422
25423       case 4:
25424         pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25425         break;
25426
25427       case 5:
25428         pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25429         break;
25430
25431       default:
25432         gcc_unreachable ();
25433       }
25434   else
25435     switch (argc)
25436       {
25437       case 1:
25438         pat = GEN_FCN (icode) (op[0]);
25439         break;
25440
25441       case 2:
25442         pat = GEN_FCN (icode) (op[0], op[1]);
25443         break;
25444
25445       case 3:
25446         pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25447         break;
25448
25449       case 4:
25450         pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25451         break;
25452
25453       case 5:
25454         pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25455         break;
25456
25457       default:
25458         gcc_unreachable ();
25459       }
25460
25461   if (!pat)
25462     return 0;
25463
25464   emit_insn (pat);
25465
25466   return target;
25467 }
25468
25469 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25470    constants defined per-instruction or per instruction-variant. Instead, the
25471    required info is looked up in the table neon_builtin_data.  */
25472 static rtx
25473 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25474 {
25475   neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25476   neon_itype itype = d->itype;
25477   enum insn_code icode = d->code;
25478   neon_builtin_type_mode type_mode = d->mode;
25479
25480   switch (itype)
25481     {
25482     case NEON_UNOP:
25483     case NEON_CONVERT:
25484     case NEON_DUPLANE:
25485       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25486         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25487
25488     case NEON_BINOP:
25489     case NEON_SETLANE:
25490     case NEON_SCALARMUL:
25491     case NEON_SCALARMULL:
25492     case NEON_SCALARMULH:
25493     case NEON_SHIFTINSERT:
25494     case NEON_LOGICBINOP:
25495       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25496         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25497         NEON_ARG_STOP);
25498
25499     case NEON_TERNOP:
25500       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25501         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25502         NEON_ARG_CONSTANT, NEON_ARG_STOP);
25503
25504     case NEON_GETLANE:
25505     case NEON_FIXCONV:
25506     case NEON_SHIFTIMM:
25507       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25508         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25509         NEON_ARG_STOP);
25510
25511     case NEON_CREATE:
25512       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25513         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25514
25515     case NEON_DUP:
25516     case NEON_RINT:
25517     case NEON_SPLIT:
25518     case NEON_FLOAT_WIDEN:
25519     case NEON_FLOAT_NARROW:
25520     case NEON_BSWAP:
25521     case NEON_REINTERP:
25522       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25523         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25524
25525     case NEON_COPYSIGNF:
25526     case NEON_COMBINE:
25527     case NEON_VTBL:
25528       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25529         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25530
25531     case NEON_LANEMUL:
25532     case NEON_LANEMULL:
25533     case NEON_LANEMULH:
25534       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25535         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25536         NEON_ARG_CONSTANT, NEON_ARG_STOP);
25537
25538     case NEON_LANEMAC:
25539       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25540         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25541         NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25542
25543     case NEON_SHIFTACC:
25544       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25545         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25546         NEON_ARG_CONSTANT, NEON_ARG_STOP);
25547
25548     case NEON_SCALARMAC:
25549       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25550         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25551         NEON_ARG_CONSTANT, NEON_ARG_STOP);
25552
25553     case NEON_SELECT:
25554     case NEON_VTBX:
25555       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25556         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25557         NEON_ARG_STOP);
25558
25559     case NEON_LOAD1:
25560     case NEON_LOADSTRUCT:
25561       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25562         NEON_ARG_MEMORY, NEON_ARG_STOP);
25563
25564     case NEON_LOAD1LANE:
25565     case NEON_LOADSTRUCTLANE:
25566       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25567         NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25568         NEON_ARG_STOP);
25569
25570     case NEON_STORE1:
25571     case NEON_STORESTRUCT:
25572       return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25573         NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25574
25575     case NEON_STORE1LANE:
25576     case NEON_STORESTRUCTLANE:
25577       return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25578         NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25579         NEON_ARG_STOP);
25580     }
25581
25582   gcc_unreachable ();
25583 }
25584
25585 /* Emit code to reinterpret one Neon type as another, without altering bits.  */
25586 void
25587 neon_reinterpret (rtx dest, rtx src)
25588 {
25589   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25590 }
25591
25592 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25593    not to early-clobber SRC registers in the process.
25594
25595    We assume that the operands described by SRC and DEST represent a
25596    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
25597    number of components into which the copy has been decomposed.  */
25598 void
25599 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25600 {
25601   unsigned int i;
25602
25603   if (!reg_overlap_mentioned_p (operands[0], operands[1])
25604       || REGNO (operands[0]) < REGNO (operands[1]))
25605     {
25606       for (i = 0; i < count; i++)
25607         {
25608           operands[2 * i] = dest[i];
25609           operands[2 * i + 1] = src[i];
25610         }
25611     }
25612   else
25613     {
25614       for (i = 0; i < count; i++)
25615         {
25616           operands[2 * i] = dest[count - i - 1];
25617           operands[2 * i + 1] = src[count - i - 1];
25618         }
25619     }
25620 }
25621
25622 /* Split operands into moves from op[1] + op[2] into op[0].  */
25623
25624 void
25625 neon_split_vcombine (rtx operands[3])
25626 {
25627   unsigned int dest = REGNO (operands[0]);
25628   unsigned int src1 = REGNO (operands[1]);
25629   unsigned int src2 = REGNO (operands[2]);
25630   machine_mode halfmode = GET_MODE (operands[1]);
25631   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25632   rtx destlo, desthi;
25633
25634   if (src1 == dest && src2 == dest + halfregs)
25635     {
25636       /* No-op move.  Can't split to nothing; emit something.  */
25637       emit_note (NOTE_INSN_DELETED);
25638       return;
25639     }
25640
25641   /* Preserve register attributes for variable tracking.  */
25642   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25643   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25644                                GET_MODE_SIZE (halfmode));
25645
25646   /* Special case of reversed high/low parts.  Use VSWP.  */
25647   if (src2 == dest && src1 == dest + halfregs)
25648     {
25649       rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25650       rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25651       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25652       return;
25653     }
25654
25655   if (!reg_overlap_mentioned_p (operands[2], destlo))
25656     {
25657       /* Try to avoid unnecessary moves if part of the result
25658          is in the right place already.  */
25659       if (src1 != dest)
25660         emit_move_insn (destlo, operands[1]);
25661       if (src2 != dest + halfregs)
25662         emit_move_insn (desthi, operands[2]);
25663     }
25664   else
25665     {
25666       if (src2 != dest + halfregs)
25667         emit_move_insn (desthi, operands[2]);
25668       if (src1 != dest)
25669         emit_move_insn (destlo, operands[1]);
25670     }
25671 }
25672
25673 /* Expand an expression EXP that calls a built-in function,
25674    with result going to TARGET if that's convenient
25675    (and in mode MODE if that's convenient).
25676    SUBTARGET may be used as the target for computing one of EXP's operands.
25677    IGNORE is nonzero if the value is to be ignored.  */
25678
25679 static rtx
25680 arm_expand_builtin (tree exp,
25681                     rtx target,
25682                     rtx subtarget ATTRIBUTE_UNUSED,
25683                     machine_mode mode ATTRIBUTE_UNUSED,
25684                     int ignore ATTRIBUTE_UNUSED)
25685 {
25686   const struct builtin_description * d;
25687   enum insn_code    icode;
25688   tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25689   tree              arg0;
25690   tree              arg1;
25691   tree              arg2;
25692   rtx               op0;
25693   rtx               op1;
25694   rtx               op2;
25695   rtx               pat;
25696   unsigned int      fcode = DECL_FUNCTION_CODE (fndecl);
25697   size_t            i;
25698   machine_mode tmode;
25699   machine_mode mode0;
25700   machine_mode mode1;
25701   machine_mode mode2;
25702   int opint;
25703   int selector;
25704   int mask;
25705   int imm;
25706
25707   if (fcode >= ARM_BUILTIN_NEON_BASE)
25708     return arm_expand_neon_builtin (fcode, exp, target);
25709
25710   switch (fcode)
25711     {
25712     case ARM_BUILTIN_GET_FPSCR:
25713     case ARM_BUILTIN_SET_FPSCR:
25714       if (fcode == ARM_BUILTIN_GET_FPSCR)
25715         {
25716           icode = CODE_FOR_get_fpscr;
25717           target = gen_reg_rtx (SImode);
25718           pat = GEN_FCN (icode) (target);
25719         }
25720       else
25721         {
25722           target = NULL_RTX;
25723           icode = CODE_FOR_set_fpscr;
25724           arg0 = CALL_EXPR_ARG (exp, 0);
25725           op0 = expand_normal (arg0);
25726           pat = GEN_FCN (icode) (op0);
25727         }
25728       emit_insn (pat);
25729       return target;
25730
25731     case ARM_BUILTIN_TEXTRMSB:
25732     case ARM_BUILTIN_TEXTRMUB:
25733     case ARM_BUILTIN_TEXTRMSH:
25734     case ARM_BUILTIN_TEXTRMUH:
25735     case ARM_BUILTIN_TEXTRMSW:
25736     case ARM_BUILTIN_TEXTRMUW:
25737       icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25738                : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25739                : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25740                : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25741                : CODE_FOR_iwmmxt_textrmw);
25742
25743       arg0 = CALL_EXPR_ARG (exp, 0);
25744       arg1 = CALL_EXPR_ARG (exp, 1);
25745       op0 = expand_normal (arg0);
25746       op1 = expand_normal (arg1);
25747       tmode = insn_data[icode].operand[0].mode;
25748       mode0 = insn_data[icode].operand[1].mode;
25749       mode1 = insn_data[icode].operand[2].mode;
25750
25751       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25752         op0 = copy_to_mode_reg (mode0, op0);
25753       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25754         {
25755           /* @@@ better error message */
25756           error ("selector must be an immediate");
25757           return gen_reg_rtx (tmode);
25758         }
25759
25760       opint = INTVAL (op1);
25761       if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25762         {
25763           if (opint > 7 || opint < 0)
25764             error ("the range of selector should be in 0 to 7");
25765         }
25766       else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25767         {
25768           if (opint > 3 || opint < 0)
25769             error ("the range of selector should be in 0 to 3");
25770         }
25771       else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW.  */
25772         {
25773           if (opint > 1 || opint < 0)
25774             error ("the range of selector should be in 0 to 1");
25775         }
25776
25777       if (target == 0
25778           || GET_MODE (target) != tmode
25779           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25780         target = gen_reg_rtx (tmode);
25781       pat = GEN_FCN (icode) (target, op0, op1);
25782       if (! pat)
25783         return 0;
25784       emit_insn (pat);
25785       return target;
25786
25787     case ARM_BUILTIN_WALIGNI:
25788       /* If op2 is immediate, call walighi, else call walighr.  */
25789       arg0 = CALL_EXPR_ARG (exp, 0);
25790       arg1 = CALL_EXPR_ARG (exp, 1);
25791       arg2 = CALL_EXPR_ARG (exp, 2);
25792       op0 = expand_normal (arg0);
25793       op1 = expand_normal (arg1);
25794       op2 = expand_normal (arg2);
25795       if (CONST_INT_P (op2))
25796         {
25797           icode = CODE_FOR_iwmmxt_waligni;
25798           tmode = insn_data[icode].operand[0].mode;
25799           mode0 = insn_data[icode].operand[1].mode;
25800           mode1 = insn_data[icode].operand[2].mode;
25801           mode2 = insn_data[icode].operand[3].mode;
25802           if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25803             op0 = copy_to_mode_reg (mode0, op0);
25804           if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25805             op1 = copy_to_mode_reg (mode1, op1);
25806           gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25807           selector = INTVAL (op2);
25808           if (selector > 7 || selector < 0)
25809             error ("the range of selector should be in 0 to 7");
25810         }
25811       else
25812         {
25813           icode = CODE_FOR_iwmmxt_walignr;
25814           tmode = insn_data[icode].operand[0].mode;
25815           mode0 = insn_data[icode].operand[1].mode;
25816           mode1 = insn_data[icode].operand[2].mode;
25817           mode2 = insn_data[icode].operand[3].mode;
25818           if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25819             op0 = copy_to_mode_reg (mode0, op0);
25820           if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25821             op1 = copy_to_mode_reg (mode1, op1);
25822           if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25823             op2 = copy_to_mode_reg (mode2, op2);
25824         }
25825       if (target == 0
25826           || GET_MODE (target) != tmode
25827           || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25828         target = gen_reg_rtx (tmode);
25829       pat = GEN_FCN (icode) (target, op0, op1, op2);
25830       if (!pat)
25831         return 0;
25832       emit_insn (pat);
25833       return target;
25834
25835     case ARM_BUILTIN_TINSRB:
25836     case ARM_BUILTIN_TINSRH:
25837     case ARM_BUILTIN_TINSRW:
25838     case ARM_BUILTIN_WMERGE:
25839       icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25840                : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25841                : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25842                : CODE_FOR_iwmmxt_tinsrw);
25843       arg0 = CALL_EXPR_ARG (exp, 0);
25844       arg1 = CALL_EXPR_ARG (exp, 1);
25845       arg2 = CALL_EXPR_ARG (exp, 2);
25846       op0 = expand_normal (arg0);
25847       op1 = expand_normal (arg1);
25848       op2 = expand_normal (arg2);
25849       tmode = insn_data[icode].operand[0].mode;
25850       mode0 = insn_data[icode].operand[1].mode;
25851       mode1 = insn_data[icode].operand[2].mode;
25852       mode2 = insn_data[icode].operand[3].mode;
25853
25854       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25855         op0 = copy_to_mode_reg (mode0, op0);
25856       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25857         op1 = copy_to_mode_reg (mode1, op1);
25858       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25859         {
25860           error ("selector must be an immediate");
25861           return const0_rtx;
25862         }
25863       if (icode == CODE_FOR_iwmmxt_wmerge)
25864         {
25865           selector = INTVAL (op2);
25866           if (selector > 7 || selector < 0)
25867             error ("the range of selector should be in 0 to 7");
25868         }
25869       if ((icode == CODE_FOR_iwmmxt_tinsrb)
25870           || (icode == CODE_FOR_iwmmxt_tinsrh)
25871           || (icode == CODE_FOR_iwmmxt_tinsrw))
25872         {
25873           mask = 0x01;
25874           selector= INTVAL (op2);
25875           if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25876             error ("the range of selector should be in 0 to 7");
25877           else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25878             error ("the range of selector should be in 0 to 3");
25879           else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25880             error ("the range of selector should be in 0 to 1");
25881           mask <<= selector;
25882           op2 = GEN_INT (mask);
25883         }
25884       if (target == 0
25885           || GET_MODE (target) != tmode
25886           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25887         target = gen_reg_rtx (tmode);
25888       pat = GEN_FCN (icode) (target, op0, op1, op2);
25889       if (! pat)
25890         return 0;
25891       emit_insn (pat);
25892       return target;
25893
25894     case ARM_BUILTIN_SETWCGR0:
25895     case ARM_BUILTIN_SETWCGR1:
25896     case ARM_BUILTIN_SETWCGR2:
25897     case ARM_BUILTIN_SETWCGR3:
25898       icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25899                : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25900                : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25901                : CODE_FOR_iwmmxt_setwcgr3);
25902       arg0 = CALL_EXPR_ARG (exp, 0);
25903       op0 = expand_normal (arg0);
25904       mode0 = insn_data[icode].operand[0].mode;
25905       if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25906         op0 = copy_to_mode_reg (mode0, op0);
25907       pat = GEN_FCN (icode) (op0);
25908       if (!pat)
25909         return 0;
25910       emit_insn (pat);
25911       return 0;
25912
25913     case ARM_BUILTIN_GETWCGR0:
25914     case ARM_BUILTIN_GETWCGR1:
25915     case ARM_BUILTIN_GETWCGR2:
25916     case ARM_BUILTIN_GETWCGR3:
25917       icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25918                : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25919                : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25920                : CODE_FOR_iwmmxt_getwcgr3);
25921       tmode = insn_data[icode].operand[0].mode;
25922       if (target == 0
25923           || GET_MODE (target) != tmode
25924           || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25925         target = gen_reg_rtx (tmode);
25926       pat = GEN_FCN (icode) (target);
25927       if (!pat)
25928         return 0;
25929       emit_insn (pat);
25930       return target;
25931
25932     case ARM_BUILTIN_WSHUFH:
25933       icode = CODE_FOR_iwmmxt_wshufh;
25934       arg0 = CALL_EXPR_ARG (exp, 0);
25935       arg1 = CALL_EXPR_ARG (exp, 1);
25936       op0 = expand_normal (arg0);
25937       op1 = expand_normal (arg1);
25938       tmode = insn_data[icode].operand[0].mode;
25939       mode1 = insn_data[icode].operand[1].mode;
25940       mode2 = insn_data[icode].operand[2].mode;
25941
25942       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25943         op0 = copy_to_mode_reg (mode1, op0);
25944       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25945         {
25946           error ("mask must be an immediate");
25947           return const0_rtx;
25948         }
25949       selector = INTVAL (op1);
25950       if (selector < 0 || selector > 255)
25951         error ("the range of mask should be in 0 to 255");
25952       if (target == 0
25953           || GET_MODE (target) != tmode
25954           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25955         target = gen_reg_rtx (tmode);
25956       pat = GEN_FCN (icode) (target, op0, op1);
25957       if (! pat)
25958         return 0;
25959       emit_insn (pat);
25960       return target;
25961
25962     case ARM_BUILTIN_WMADDS:
25963       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25964     case ARM_BUILTIN_WMADDSX:
25965       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25966     case ARM_BUILTIN_WMADDSN:
25967       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25968     case ARM_BUILTIN_WMADDU:
25969       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25970     case ARM_BUILTIN_WMADDUX:
25971       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25972     case ARM_BUILTIN_WMADDUN:
25973       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25974     case ARM_BUILTIN_WSADBZ:
25975       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25976     case ARM_BUILTIN_WSADHZ:
25977       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25978
25979       /* Several three-argument builtins.  */
25980     case ARM_BUILTIN_WMACS:
25981     case ARM_BUILTIN_WMACU:
25982     case ARM_BUILTIN_TMIA:
25983     case ARM_BUILTIN_TMIAPH:
25984     case ARM_BUILTIN_TMIATT:
25985     case ARM_BUILTIN_TMIATB:
25986     case ARM_BUILTIN_TMIABT:
25987     case ARM_BUILTIN_TMIABB:
25988     case ARM_BUILTIN_WQMIABB:
25989     case ARM_BUILTIN_WQMIABT:
25990     case ARM_BUILTIN_WQMIATB:
25991     case ARM_BUILTIN_WQMIATT:
25992     case ARM_BUILTIN_WQMIABBN:
25993     case ARM_BUILTIN_WQMIABTN:
25994     case ARM_BUILTIN_WQMIATBN:
25995     case ARM_BUILTIN_WQMIATTN:
25996     case ARM_BUILTIN_WMIABB:
25997     case ARM_BUILTIN_WMIABT:
25998     case ARM_BUILTIN_WMIATB:
25999     case ARM_BUILTIN_WMIATT:
26000     case ARM_BUILTIN_WMIABBN:
26001     case ARM_BUILTIN_WMIABTN:
26002     case ARM_BUILTIN_WMIATBN:
26003     case ARM_BUILTIN_WMIATTN:
26004     case ARM_BUILTIN_WMIAWBB:
26005     case ARM_BUILTIN_WMIAWBT:
26006     case ARM_BUILTIN_WMIAWTB:
26007     case ARM_BUILTIN_WMIAWTT:
26008     case ARM_BUILTIN_WMIAWBBN:
26009     case ARM_BUILTIN_WMIAWBTN:
26010     case ARM_BUILTIN_WMIAWTBN:
26011     case ARM_BUILTIN_WMIAWTTN:
26012     case ARM_BUILTIN_WSADB:
26013     case ARM_BUILTIN_WSADH:
26014       icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
26015                : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
26016                : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
26017                : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
26018                : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
26019                : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
26020                : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
26021                : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
26022                : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
26023                : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
26024                : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
26025                : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
26026                : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
26027                : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
26028                : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
26029                : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
26030                : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
26031                : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
26032                : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
26033                : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
26034                : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
26035                : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
26036                : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
26037                : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
26038                : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
26039                : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
26040                : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
26041                : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
26042                : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
26043                : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
26044                : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
26045                : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
26046                : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
26047                : CODE_FOR_iwmmxt_wsadh);
26048       arg0 = CALL_EXPR_ARG (exp, 0);
26049       arg1 = CALL_EXPR_ARG (exp, 1);
26050       arg2 = CALL_EXPR_ARG (exp, 2);
26051       op0 = expand_normal (arg0);
26052       op1 = expand_normal (arg1);
26053       op2 = expand_normal (arg2);
26054       tmode = insn_data[icode].operand[0].mode;
26055       mode0 = insn_data[icode].operand[1].mode;
26056       mode1 = insn_data[icode].operand[2].mode;
26057       mode2 = insn_data[icode].operand[3].mode;
26058
26059       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
26060         op0 = copy_to_mode_reg (mode0, op0);
26061       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
26062         op1 = copy_to_mode_reg (mode1, op1);
26063       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
26064         op2 = copy_to_mode_reg (mode2, op2);
26065       if (target == 0
26066           || GET_MODE (target) != tmode
26067           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
26068         target = gen_reg_rtx (tmode);
26069       pat = GEN_FCN (icode) (target, op0, op1, op2);
26070       if (! pat)
26071         return 0;
26072       emit_insn (pat);
26073       return target;
26074
26075     case ARM_BUILTIN_WZERO:
26076       target = gen_reg_rtx (DImode);
26077       emit_insn (gen_iwmmxt_clrdi (target));
26078       return target;
26079
26080     case ARM_BUILTIN_WSRLHI:
26081     case ARM_BUILTIN_WSRLWI:
26082     case ARM_BUILTIN_WSRLDI:
26083     case ARM_BUILTIN_WSLLHI:
26084     case ARM_BUILTIN_WSLLWI:
26085     case ARM_BUILTIN_WSLLDI:
26086     case ARM_BUILTIN_WSRAHI:
26087     case ARM_BUILTIN_WSRAWI:
26088     case ARM_BUILTIN_WSRADI:
26089     case ARM_BUILTIN_WRORHI:
26090     case ARM_BUILTIN_WRORWI:
26091     case ARM_BUILTIN_WRORDI:
26092     case ARM_BUILTIN_WSRLH:
26093     case ARM_BUILTIN_WSRLW:
26094     case ARM_BUILTIN_WSRLD:
26095     case ARM_BUILTIN_WSLLH:
26096     case ARM_BUILTIN_WSLLW:
26097     case ARM_BUILTIN_WSLLD:
26098     case ARM_BUILTIN_WSRAH:
26099     case ARM_BUILTIN_WSRAW:
26100     case ARM_BUILTIN_WSRAD:
26101     case ARM_BUILTIN_WRORH:
26102     case ARM_BUILTIN_WRORW:
26103     case ARM_BUILTIN_WRORD:
26104       icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
26105                : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
26106                : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
26107                : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
26108                : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
26109                : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
26110                : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
26111                : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
26112                : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
26113                : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
26114                : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
26115                : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
26116                : fcode == ARM_BUILTIN_WSRLH  ? CODE_FOR_lshrv4hi3_di
26117                : fcode == ARM_BUILTIN_WSRLW  ? CODE_FOR_lshrv2si3_di
26118                : fcode == ARM_BUILTIN_WSRLD  ? CODE_FOR_lshrdi3_di
26119                : fcode == ARM_BUILTIN_WSLLH  ? CODE_FOR_ashlv4hi3_di
26120                : fcode == ARM_BUILTIN_WSLLW  ? CODE_FOR_ashlv2si3_di
26121                : fcode == ARM_BUILTIN_WSLLD  ? CODE_FOR_ashldi3_di
26122                : fcode == ARM_BUILTIN_WSRAH  ? CODE_FOR_ashrv4hi3_di
26123                : fcode == ARM_BUILTIN_WSRAW  ? CODE_FOR_ashrv2si3_di
26124                : fcode == ARM_BUILTIN_WSRAD  ? CODE_FOR_ashrdi3_di
26125                : fcode == ARM_BUILTIN_WRORH  ? CODE_FOR_rorv4hi3_di
26126                : fcode == ARM_BUILTIN_WRORW  ? CODE_FOR_rorv2si3_di
26127                : fcode == ARM_BUILTIN_WRORD  ? CODE_FOR_rordi3_di
26128                : CODE_FOR_nothing);
26129       arg1 = CALL_EXPR_ARG (exp, 1);
26130       op1 = expand_normal (arg1);
26131       if (GET_MODE (op1) == VOIDmode)
26132         {
26133           imm = INTVAL (op1);
26134           if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
26135                || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
26136               && (imm < 0 || imm > 32))
26137             {
26138               if (fcode == ARM_BUILTIN_WRORHI)
26139                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_rori_pi16 in code.");
26140               else if (fcode == ARM_BUILTIN_WRORWI)
26141                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_rori_pi32 in code.");
26142               else if (fcode == ARM_BUILTIN_WRORH)
26143                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_ror_pi16 in code.");
26144               else
26145                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_ror_pi32 in code.");
26146             }
26147           else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
26148                    && (imm < 0 || imm > 64))
26149             {
26150               if (fcode == ARM_BUILTIN_WRORDI)
26151                 error ("the range of count should be in 0 to 64.  please check the intrinsic _mm_rori_si64 in code.");
26152               else
26153                 error ("the range of count should be in 0 to 64.  please check the intrinsic _mm_ror_si64 in code.");
26154             }
26155           else if (imm < 0)
26156             {
26157               if (fcode == ARM_BUILTIN_WSRLHI)
26158                 error ("the count should be no less than 0.  please check the intrinsic _mm_srli_pi16 in code.");
26159               else if (fcode == ARM_BUILTIN_WSRLWI)
26160                 error ("the count should be no less than 0.  please check the intrinsic _mm_srli_pi32 in code.");
26161               else if (fcode == ARM_BUILTIN_WSRLDI)
26162                 error ("the count should be no less than 0.  please check the intrinsic _mm_srli_si64 in code.");
26163               else if (fcode == ARM_BUILTIN_WSLLHI)
26164                 error ("the count should be no less than 0.  please check the intrinsic _mm_slli_pi16 in code.");
26165               else if (fcode == ARM_BUILTIN_WSLLWI)
26166                 error ("the count should be no less than 0.  please check the intrinsic _mm_slli_pi32 in code.");
26167               else if (fcode == ARM_BUILTIN_WSLLDI)
26168                 error ("the count should be no less than 0.  please check the intrinsic _mm_slli_si64 in code.");
26169               else if (fcode == ARM_BUILTIN_WSRAHI)
26170                 error ("the count should be no less than 0.  please check the intrinsic _mm_srai_pi16 in code.");
26171               else if (fcode == ARM_BUILTIN_WSRAWI)
26172                 error ("the count should be no less than 0.  please check the intrinsic _mm_srai_pi32 in code.");
26173               else if (fcode == ARM_BUILTIN_WSRADI)
26174                 error ("the count should be no less than 0.  please check the intrinsic _mm_srai_si64 in code.");
26175               else if (fcode == ARM_BUILTIN_WSRLH)
26176                 error ("the count should be no less than 0.  please check the intrinsic _mm_srl_pi16 in code.");
26177               else if (fcode == ARM_BUILTIN_WSRLW)
26178                 error ("the count should be no less than 0.  please check the intrinsic _mm_srl_pi32 in code.");
26179               else if (fcode == ARM_BUILTIN_WSRLD)
26180                 error ("the count should be no less than 0.  please check the intrinsic _mm_srl_si64 in code.");
26181               else if (fcode == ARM_BUILTIN_WSLLH)
26182                 error ("the count should be no less than 0.  please check the intrinsic _mm_sll_pi16 in code.");
26183               else if (fcode == ARM_BUILTIN_WSLLW)
26184                 error ("the count should be no less than 0.  please check the intrinsic _mm_sll_pi32 in code.");
26185               else if (fcode == ARM_BUILTIN_WSLLD)
26186                 error ("the count should be no less than 0.  please check the intrinsic _mm_sll_si64 in code.");
26187               else if (fcode == ARM_BUILTIN_WSRAH)
26188                 error ("the count should be no less than 0.  please check the intrinsic _mm_sra_pi16 in code.");
26189               else if (fcode == ARM_BUILTIN_WSRAW)
26190                 error ("the count should be no less than 0.  please check the intrinsic _mm_sra_pi32 in code.");
26191               else
26192                 error ("the count should be no less than 0.  please check the intrinsic _mm_sra_si64 in code.");
26193             }
26194         }
26195       return arm_expand_binop_builtin (icode, exp, target);
26196
26197     default:
26198       break;
26199     }
26200
26201   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
26202     if (d->code == (const enum arm_builtins) fcode)
26203       return arm_expand_binop_builtin (d->icode, exp, target);
26204
26205   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
26206     if (d->code == (const enum arm_builtins) fcode)
26207       return arm_expand_unop_builtin (d->icode, exp, target, 0);
26208
26209   for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
26210     if (d->code == (const enum arm_builtins) fcode)
26211       return arm_expand_ternop_builtin (d->icode, exp, target);
26212
26213   /* @@@ Should really do something sensible here.  */
26214   return NULL_RTX;
26215 }
26216 \f
26217 /* Return the number (counting from 0) of
26218    the least significant set bit in MASK.  */
26219
26220 inline static int
26221 number_of_first_bit_set (unsigned mask)
26222 {
26223   return ctz_hwi (mask);
26224 }
26225
26226 /* Like emit_multi_reg_push, but allowing for a different set of
26227    registers to be described as saved.  MASK is the set of registers
26228    to be saved; REAL_REGS is the set of registers to be described as
26229    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
26230
26231 static rtx_insn *
26232 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26233 {
26234   unsigned long regno;
26235   rtx par[10], tmp, reg;
26236   rtx_insn *insn;
26237   int i, j;
26238
26239   /* Build the parallel of the registers actually being stored.  */
26240   for (i = 0; mask; ++i, mask &= mask - 1)
26241     {
26242       regno = ctz_hwi (mask);
26243       reg = gen_rtx_REG (SImode, regno);
26244
26245       if (i == 0)
26246         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26247       else
26248         tmp = gen_rtx_USE (VOIDmode, reg);
26249
26250       par[i] = tmp;
26251     }
26252
26253   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26254   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26255   tmp = gen_frame_mem (BLKmode, tmp);
26256   tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26257   par[0] = tmp;
26258
26259   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26260   insn = emit_insn (tmp);
26261
26262   /* Always build the stack adjustment note for unwind info.  */
26263   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26264   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26265   par[0] = tmp;
26266
26267   /* Build the parallel of the registers recorded as saved for unwind.  */
26268   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26269     {
26270       regno = ctz_hwi (real_regs);
26271       reg = gen_rtx_REG (SImode, regno);
26272
26273       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26274       tmp = gen_frame_mem (SImode, tmp);
26275       tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26276       RTX_FRAME_RELATED_P (tmp) = 1;
26277       par[j + 1] = tmp;
26278     }
26279
26280   if (j == 0)
26281     tmp = par[0];
26282   else
26283     {
26284       RTX_FRAME_RELATED_P (par[0]) = 1;
26285       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26286     }
26287
26288   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26289
26290   return insn;
26291 }
26292
26293 /* Emit code to push or pop registers to or from the stack.  F is the
26294    assembly file.  MASK is the registers to pop.  */
26295 static void
26296 thumb_pop (FILE *f, unsigned long mask)
26297 {
26298   int regno;
26299   int lo_mask = mask & 0xFF;
26300   int pushed_words = 0;
26301
26302   gcc_assert (mask);
26303
26304   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26305     {
26306       /* Special case.  Do not generate a POP PC statement here, do it in
26307          thumb_exit() */
26308       thumb_exit (f, -1);
26309       return;
26310     }
26311
26312   fprintf (f, "\tpop\t{");
26313
26314   /* Look at the low registers first.  */
26315   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26316     {
26317       if (lo_mask & 1)
26318         {
26319           asm_fprintf (f, "%r", regno);
26320
26321           if ((lo_mask & ~1) != 0)
26322             fprintf (f, ", ");
26323
26324           pushed_words++;
26325         }
26326     }
26327
26328   if (mask & (1 << PC_REGNUM))
26329     {
26330       /* Catch popping the PC.  */
26331       if (TARGET_INTERWORK || TARGET_BACKTRACE
26332           || crtl->calls_eh_return)
26333         {
26334           /* The PC is never poped directly, instead
26335              it is popped into r3 and then BX is used.  */
26336           fprintf (f, "}\n");
26337
26338           thumb_exit (f, -1);
26339
26340           return;
26341         }
26342       else
26343         {
26344           if (mask & 0xFF)
26345             fprintf (f, ", ");
26346
26347           asm_fprintf (f, "%r", PC_REGNUM);
26348         }
26349     }
26350
26351   fprintf (f, "}\n");
26352 }
26353
26354 /* Generate code to return from a thumb function.
26355    If 'reg_containing_return_addr' is -1, then the return address is
26356    actually on the stack, at the stack pointer.  */
26357 static void
26358 thumb_exit (FILE *f, int reg_containing_return_addr)
26359 {
26360   unsigned regs_available_for_popping;
26361   unsigned regs_to_pop;
26362   int pops_needed;
26363   unsigned available;
26364   unsigned required;
26365   machine_mode mode;
26366   int size;
26367   int restore_a4 = FALSE;
26368
26369   /* Compute the registers we need to pop.  */
26370   regs_to_pop = 0;
26371   pops_needed = 0;
26372
26373   if (reg_containing_return_addr == -1)
26374     {
26375       regs_to_pop |= 1 << LR_REGNUM;
26376       ++pops_needed;
26377     }
26378
26379   if (TARGET_BACKTRACE)
26380     {
26381       /* Restore the (ARM) frame pointer and stack pointer.  */
26382       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26383       pops_needed += 2;
26384     }
26385
26386   /* If there is nothing to pop then just emit the BX instruction and
26387      return.  */
26388   if (pops_needed == 0)
26389     {
26390       if (crtl->calls_eh_return)
26391         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26392
26393       asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26394       return;
26395     }
26396   /* Otherwise if we are not supporting interworking and we have not created
26397      a backtrace structure and the function was not entered in ARM mode then
26398      just pop the return address straight into the PC.  */
26399   else if (!TARGET_INTERWORK
26400            && !TARGET_BACKTRACE
26401            && !is_called_in_ARM_mode (current_function_decl)
26402            && !crtl->calls_eh_return)
26403     {
26404       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26405       return;
26406     }
26407
26408   /* Find out how many of the (return) argument registers we can corrupt.  */
26409   regs_available_for_popping = 0;
26410
26411   /* If returning via __builtin_eh_return, the bottom three registers
26412      all contain information needed for the return.  */
26413   if (crtl->calls_eh_return)
26414     size = 12;
26415   else
26416     {
26417       /* If we can deduce the registers used from the function's
26418          return value.  This is more reliable that examining
26419          df_regs_ever_live_p () because that will be set if the register is
26420          ever used in the function, not just if the register is used
26421          to hold a return value.  */
26422
26423       if (crtl->return_rtx != 0)
26424         mode = GET_MODE (crtl->return_rtx);
26425       else
26426         mode = DECL_MODE (DECL_RESULT (current_function_decl));
26427
26428       size = GET_MODE_SIZE (mode);
26429
26430       if (size == 0)
26431         {
26432           /* In a void function we can use any argument register.
26433              In a function that returns a structure on the stack
26434              we can use the second and third argument registers.  */
26435           if (mode == VOIDmode)
26436             regs_available_for_popping =
26437               (1 << ARG_REGISTER (1))
26438               | (1 << ARG_REGISTER (2))
26439               | (1 << ARG_REGISTER (3));
26440           else
26441             regs_available_for_popping =
26442               (1 << ARG_REGISTER (2))
26443               | (1 << ARG_REGISTER (3));
26444         }
26445       else if (size <= 4)
26446         regs_available_for_popping =
26447           (1 << ARG_REGISTER (2))
26448           | (1 << ARG_REGISTER (3));
26449       else if (size <= 8)
26450         regs_available_for_popping =
26451           (1 << ARG_REGISTER (3));
26452     }
26453
26454   /* Match registers to be popped with registers into which we pop them.  */
26455   for (available = regs_available_for_popping,
26456        required  = regs_to_pop;
26457        required != 0 && available != 0;
26458        available &= ~(available & - available),
26459        required  &= ~(required  & - required))
26460     -- pops_needed;
26461
26462   /* If we have any popping registers left over, remove them.  */
26463   if (available > 0)
26464     regs_available_for_popping &= ~available;
26465
26466   /* Otherwise if we need another popping register we can use
26467      the fourth argument register.  */
26468   else if (pops_needed)
26469     {
26470       /* If we have not found any free argument registers and
26471          reg a4 contains the return address, we must move it.  */
26472       if (regs_available_for_popping == 0
26473           && reg_containing_return_addr == LAST_ARG_REGNUM)
26474         {
26475           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26476           reg_containing_return_addr = LR_REGNUM;
26477         }
26478       else if (size > 12)
26479         {
26480           /* Register a4 is being used to hold part of the return value,
26481              but we have dire need of a free, low register.  */
26482           restore_a4 = TRUE;
26483
26484           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26485         }
26486
26487       if (reg_containing_return_addr != LAST_ARG_REGNUM)
26488         {
26489           /* The fourth argument register is available.  */
26490           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26491
26492           --pops_needed;
26493         }
26494     }
26495
26496   /* Pop as many registers as we can.  */
26497   thumb_pop (f, regs_available_for_popping);
26498
26499   /* Process the registers we popped.  */
26500   if (reg_containing_return_addr == -1)
26501     {
26502       /* The return address was popped into the lowest numbered register.  */
26503       regs_to_pop &= ~(1 << LR_REGNUM);
26504
26505       reg_containing_return_addr =
26506         number_of_first_bit_set (regs_available_for_popping);
26507
26508       /* Remove this register for the mask of available registers, so that
26509          the return address will not be corrupted by further pops.  */
26510       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26511     }
26512
26513   /* If we popped other registers then handle them here.  */
26514   if (regs_available_for_popping)
26515     {
26516       int frame_pointer;
26517
26518       /* Work out which register currently contains the frame pointer.  */
26519       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26520
26521       /* Move it into the correct place.  */
26522       asm_fprintf (f, "\tmov\t%r, %r\n",
26523                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26524
26525       /* (Temporarily) remove it from the mask of popped registers.  */
26526       regs_available_for_popping &= ~(1 << frame_pointer);
26527       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26528
26529       if (regs_available_for_popping)
26530         {
26531           int stack_pointer;
26532
26533           /* We popped the stack pointer as well,
26534              find the register that contains it.  */
26535           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26536
26537           /* Move it into the stack register.  */
26538           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26539
26540           /* At this point we have popped all necessary registers, so
26541              do not worry about restoring regs_available_for_popping
26542              to its correct value:
26543
26544              assert (pops_needed == 0)
26545              assert (regs_available_for_popping == (1 << frame_pointer))
26546              assert (regs_to_pop == (1 << STACK_POINTER))  */
26547         }
26548       else
26549         {
26550           /* Since we have just move the popped value into the frame
26551              pointer, the popping register is available for reuse, and
26552              we know that we still have the stack pointer left to pop.  */
26553           regs_available_for_popping |= (1 << frame_pointer);
26554         }
26555     }
26556
26557   /* If we still have registers left on the stack, but we no longer have
26558      any registers into which we can pop them, then we must move the return
26559      address into the link register and make available the register that
26560      contained it.  */
26561   if (regs_available_for_popping == 0 && pops_needed > 0)
26562     {
26563       regs_available_for_popping |= 1 << reg_containing_return_addr;
26564
26565       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26566                    reg_containing_return_addr);
26567
26568       reg_containing_return_addr = LR_REGNUM;
26569     }
26570
26571   /* If we have registers left on the stack then pop some more.
26572      We know that at most we will want to pop FP and SP.  */
26573   if (pops_needed > 0)
26574     {
26575       int  popped_into;
26576       int  move_to;
26577
26578       thumb_pop (f, regs_available_for_popping);
26579
26580       /* We have popped either FP or SP.
26581          Move whichever one it is into the correct register.  */
26582       popped_into = number_of_first_bit_set (regs_available_for_popping);
26583       move_to     = number_of_first_bit_set (regs_to_pop);
26584
26585       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26586
26587       regs_to_pop &= ~(1 << move_to);
26588
26589       --pops_needed;
26590     }
26591
26592   /* If we still have not popped everything then we must have only
26593      had one register available to us and we are now popping the SP.  */
26594   if (pops_needed > 0)
26595     {
26596       int  popped_into;
26597
26598       thumb_pop (f, regs_available_for_popping);
26599
26600       popped_into = number_of_first_bit_set (regs_available_for_popping);
26601
26602       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26603       /*
26604         assert (regs_to_pop == (1 << STACK_POINTER))
26605         assert (pops_needed == 1)
26606       */
26607     }
26608
26609   /* If necessary restore the a4 register.  */
26610   if (restore_a4)
26611     {
26612       if (reg_containing_return_addr != LR_REGNUM)
26613         {
26614           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26615           reg_containing_return_addr = LR_REGNUM;
26616         }
26617
26618       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26619     }
26620
26621   if (crtl->calls_eh_return)
26622     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26623
26624   /* Return to caller.  */
26625   asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26626 }
26627 \f
26628 /* Scan INSN just before assembler is output for it.
26629    For Thumb-1, we track the status of the condition codes; this
26630    information is used in the cbranchsi4_insn pattern.  */
26631 void
26632 thumb1_final_prescan_insn (rtx_insn *insn)
26633 {
26634   if (flag_print_asm_name)
26635     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26636                  INSN_ADDRESSES (INSN_UID (insn)));
26637   /* Don't overwrite the previous setter when we get to a cbranch.  */
26638   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26639     {
26640       enum attr_conds conds;
26641
26642       if (cfun->machine->thumb1_cc_insn)
26643         {
26644           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26645               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26646             CC_STATUS_INIT;
26647         }
26648       conds = get_attr_conds (insn);
26649       if (conds == CONDS_SET)
26650         {
26651           rtx set = single_set (insn);
26652           cfun->machine->thumb1_cc_insn = insn;
26653           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26654           cfun->machine->thumb1_cc_op1 = const0_rtx;
26655           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26656           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26657             {
26658               rtx src1 = XEXP (SET_SRC (set), 1);
26659               if (src1 == const0_rtx)
26660                 cfun->machine->thumb1_cc_mode = CCmode;
26661             }
26662           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26663             {
26664               /* Record the src register operand instead of dest because
26665                  cprop_hardreg pass propagates src.  */
26666               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26667             }
26668         }
26669       else if (conds != CONDS_NOCOND)
26670         cfun->machine->thumb1_cc_insn = NULL_RTX;
26671     }
26672
26673     /* Check if unexpected far jump is used.  */
26674     if (cfun->machine->lr_save_eliminated
26675         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26676       internal_error("Unexpected thumb1 far jump");
26677 }
26678
26679 int
26680 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26681 {
26682   unsigned HOST_WIDE_INT mask = 0xff;
26683   int i;
26684
26685   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26686   if (val == 0) /* XXX */
26687     return 0;
26688
26689   for (i = 0; i < 25; i++)
26690     if ((val & (mask << i)) == val)
26691       return 1;
26692
26693   return 0;
26694 }
26695
26696 /* Returns nonzero if the current function contains,
26697    or might contain a far jump.  */
26698 static int
26699 thumb_far_jump_used_p (void)
26700 {
26701   rtx_insn *insn;
26702   bool far_jump = false;
26703   unsigned int func_size = 0;
26704
26705   /* This test is only important for leaf functions.  */
26706   /* assert (!leaf_function_p ()); */
26707
26708   /* If we have already decided that far jumps may be used,
26709      do not bother checking again, and always return true even if
26710      it turns out that they are not being used.  Once we have made
26711      the decision that far jumps are present (and that hence the link
26712      register will be pushed onto the stack) we cannot go back on it.  */
26713   if (cfun->machine->far_jump_used)
26714     return 1;
26715
26716   /* If this function is not being called from the prologue/epilogue
26717      generation code then it must be being called from the
26718      INITIAL_ELIMINATION_OFFSET macro.  */
26719   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26720     {
26721       /* In this case we know that we are being asked about the elimination
26722          of the arg pointer register.  If that register is not being used,
26723          then there are no arguments on the stack, and we do not have to
26724          worry that a far jump might force the prologue to push the link
26725          register, changing the stack offsets.  In this case we can just
26726          return false, since the presence of far jumps in the function will
26727          not affect stack offsets.
26728
26729          If the arg pointer is live (or if it was live, but has now been
26730          eliminated and so set to dead) then we do have to test to see if
26731          the function might contain a far jump.  This test can lead to some
26732          false negatives, since before reload is completed, then length of
26733          branch instructions is not known, so gcc defaults to returning their
26734          longest length, which in turn sets the far jump attribute to true.
26735
26736          A false negative will not result in bad code being generated, but it
26737          will result in a needless push and pop of the link register.  We
26738          hope that this does not occur too often.
26739
26740          If we need doubleword stack alignment this could affect the other
26741          elimination offsets so we can't risk getting it wrong.  */
26742       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26743         cfun->machine->arg_pointer_live = 1;
26744       else if (!cfun->machine->arg_pointer_live)
26745         return 0;
26746     }
26747
26748   /* We should not change far_jump_used during or after reload, as there is
26749      no chance to change stack frame layout.  */
26750   if (reload_in_progress || reload_completed)
26751     return 0;
26752
26753   /* Check to see if the function contains a branch
26754      insn with the far jump attribute set.  */
26755   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26756     {
26757       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26758         {
26759           far_jump = true;
26760         }
26761       func_size += get_attr_length (insn);
26762     }
26763
26764   /* Attribute far_jump will always be true for thumb1 before
26765      shorten_branch pass.  So checking far_jump attribute before
26766      shorten_branch isn't much useful.
26767
26768      Following heuristic tries to estimate more accurately if a far jump
26769      may finally be used.  The heuristic is very conservative as there is
26770      no chance to roll-back the decision of not to use far jump.
26771
26772      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26773      2-byte insn is associated with a 4 byte constant pool.  Using
26774      function size 2048/3 as the threshold is conservative enough.  */
26775   if (far_jump)
26776     {
26777       if ((func_size * 3) >= 2048)
26778         {
26779           /* Record the fact that we have decided that
26780              the function does use far jumps.  */
26781           cfun->machine->far_jump_used = 1;
26782           return 1;
26783         }
26784     }
26785
26786   return 0;
26787 }
26788
26789 /* Return nonzero if FUNC must be entered in ARM mode.  */
26790 int
26791 is_called_in_ARM_mode (tree func)
26792 {
26793   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26794
26795   /* Ignore the problem about functions whose address is taken.  */
26796   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26797     return TRUE;
26798
26799 #ifdef ARM_PE
26800   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26801 #else
26802   return FALSE;
26803 #endif
26804 }
26805
26806 /* Given the stack offsets and register mask in OFFSETS, decide how
26807    many additional registers to push instead of subtracting a constant
26808    from SP.  For epilogues the principle is the same except we use pop.
26809    FOR_PROLOGUE indicates which we're generating.  */
26810 static int
26811 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26812 {
26813   HOST_WIDE_INT amount;
26814   unsigned long live_regs_mask = offsets->saved_regs_mask;
26815   /* Extract a mask of the ones we can give to the Thumb's push/pop
26816      instruction.  */
26817   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26818   /* Then count how many other high registers will need to be pushed.  */
26819   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26820   int n_free, reg_base, size;
26821
26822   if (!for_prologue && frame_pointer_needed)
26823     amount = offsets->locals_base - offsets->saved_regs;
26824   else
26825     amount = offsets->outgoing_args - offsets->saved_regs;
26826
26827   /* If the stack frame size is 512 exactly, we can save one load
26828      instruction, which should make this a win even when optimizing
26829      for speed.  */
26830   if (!optimize_size && amount != 512)
26831     return 0;
26832
26833   /* Can't do this if there are high registers to push.  */
26834   if (high_regs_pushed != 0)
26835     return 0;
26836
26837   /* Shouldn't do it in the prologue if no registers would normally
26838      be pushed at all.  In the epilogue, also allow it if we'll have
26839      a pop insn for the PC.  */
26840   if  (l_mask == 0
26841        && (for_prologue
26842            || TARGET_BACKTRACE
26843            || (live_regs_mask & 1 << LR_REGNUM) == 0
26844            || TARGET_INTERWORK
26845            || crtl->args.pretend_args_size != 0))
26846     return 0;
26847
26848   /* Don't do this if thumb_expand_prologue wants to emit instructions
26849      between the push and the stack frame allocation.  */
26850   if (for_prologue
26851       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26852           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26853     return 0;
26854
26855   reg_base = 0;
26856   n_free = 0;
26857   if (!for_prologue)
26858     {
26859       size = arm_size_return_regs ();
26860       reg_base = ARM_NUM_INTS (size);
26861       live_regs_mask >>= reg_base;
26862     }
26863
26864   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26865          && (for_prologue || call_used_regs[reg_base + n_free]))
26866     {
26867       live_regs_mask >>= 1;
26868       n_free++;
26869     }
26870
26871   if (n_free == 0)
26872     return 0;
26873   gcc_assert (amount / 4 * 4 == amount);
26874
26875   if (amount >= 512 && (amount - n_free * 4) < 512)
26876     return (amount - 508) / 4;
26877   if (amount <= n_free * 4)
26878     return amount / 4;
26879   return 0;
26880 }
26881
26882 /* The bits which aren't usefully expanded as rtl.  */
26883 const char *
26884 thumb1_unexpanded_epilogue (void)
26885 {
26886   arm_stack_offsets *offsets;
26887   int regno;
26888   unsigned long live_regs_mask = 0;
26889   int high_regs_pushed = 0;
26890   int extra_pop;
26891   int had_to_push_lr;
26892   int size;
26893
26894   if (cfun->machine->return_used_this_function != 0)
26895     return "";
26896
26897   if (IS_NAKED (arm_current_func_type ()))
26898     return "";
26899
26900   offsets = arm_get_frame_offsets ();
26901   live_regs_mask = offsets->saved_regs_mask;
26902   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26903
26904   /* If we can deduce the registers used from the function's return value.
26905      This is more reliable that examining df_regs_ever_live_p () because that
26906      will be set if the register is ever used in the function, not just if
26907      the register is used to hold a return value.  */
26908   size = arm_size_return_regs ();
26909
26910   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26911   if (extra_pop > 0)
26912     {
26913       unsigned long extra_mask = (1 << extra_pop) - 1;
26914       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26915     }
26916
26917   /* The prolog may have pushed some high registers to use as
26918      work registers.  e.g. the testsuite file:
26919      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26920      compiles to produce:
26921         push    {r4, r5, r6, r7, lr}
26922         mov     r7, r9
26923         mov     r6, r8
26924         push    {r6, r7}
26925      as part of the prolog.  We have to undo that pushing here.  */
26926
26927   if (high_regs_pushed)
26928     {
26929       unsigned long mask = live_regs_mask & 0xff;
26930       int next_hi_reg;
26931
26932       /* The available low registers depend on the size of the value we are
26933          returning.  */
26934       if (size <= 12)
26935         mask |=  1 << 3;
26936       if (size <= 8)
26937         mask |= 1 << 2;
26938
26939       if (mask == 0)
26940         /* Oh dear!  We have no low registers into which we can pop
26941            high registers!  */
26942         internal_error
26943           ("no low registers available for popping high registers");
26944
26945       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26946         if (live_regs_mask & (1 << next_hi_reg))
26947           break;
26948
26949       while (high_regs_pushed)
26950         {
26951           /* Find lo register(s) into which the high register(s) can
26952              be popped.  */
26953           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26954             {
26955               if (mask & (1 << regno))
26956                 high_regs_pushed--;
26957               if (high_regs_pushed == 0)
26958                 break;
26959             }
26960
26961           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
26962
26963           /* Pop the values into the low register(s).  */
26964           thumb_pop (asm_out_file, mask);
26965
26966           /* Move the value(s) into the high registers.  */
26967           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26968             {
26969               if (mask & (1 << regno))
26970                 {
26971                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26972                                regno);
26973
26974                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26975                     if (live_regs_mask & (1 << next_hi_reg))
26976                       break;
26977                 }
26978             }
26979         }
26980       live_regs_mask &= ~0x0f00;
26981     }
26982
26983   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26984   live_regs_mask &= 0xff;
26985
26986   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26987     {
26988       /* Pop the return address into the PC.  */
26989       if (had_to_push_lr)
26990         live_regs_mask |= 1 << PC_REGNUM;
26991
26992       /* Either no argument registers were pushed or a backtrace
26993          structure was created which includes an adjusted stack
26994          pointer, so just pop everything.  */
26995       if (live_regs_mask)
26996         thumb_pop (asm_out_file, live_regs_mask);
26997
26998       /* We have either just popped the return address into the
26999          PC or it is was kept in LR for the entire function.
27000          Note that thumb_pop has already called thumb_exit if the
27001          PC was in the list.  */
27002       if (!had_to_push_lr)
27003         thumb_exit (asm_out_file, LR_REGNUM);
27004     }
27005   else
27006     {
27007       /* Pop everything but the return address.  */
27008       if (live_regs_mask)
27009         thumb_pop (asm_out_file, live_regs_mask);
27010
27011       if (had_to_push_lr)
27012         {
27013           if (size > 12)
27014             {
27015               /* We have no free low regs, so save one.  */
27016               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
27017                            LAST_ARG_REGNUM);
27018             }
27019
27020           /* Get the return address into a temporary register.  */
27021           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
27022
27023           if (size > 12)
27024             {
27025               /* Move the return address to lr.  */
27026               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
27027                            LAST_ARG_REGNUM);
27028               /* Restore the low register.  */
27029               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
27030                            IP_REGNUM);
27031               regno = LR_REGNUM;
27032             }
27033           else
27034             regno = LAST_ARG_REGNUM;
27035         }
27036       else
27037         regno = LR_REGNUM;
27038
27039       /* Remove the argument registers that were pushed onto the stack.  */
27040       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
27041                    SP_REGNUM, SP_REGNUM,
27042                    crtl->args.pretend_args_size);
27043
27044       thumb_exit (asm_out_file, regno);
27045     }
27046
27047   return "";
27048 }
27049
27050 /* Functions to save and restore machine-specific function data.  */
27051 static struct machine_function *
27052 arm_init_machine_status (void)
27053 {
27054   struct machine_function *machine;
27055   machine = ggc_cleared_alloc<machine_function> ();
27056
27057 #if ARM_FT_UNKNOWN != 0
27058   machine->func_type = ARM_FT_UNKNOWN;
27059 #endif
27060   return machine;
27061 }
27062
27063 /* Return an RTX indicating where the return address to the
27064    calling function can be found.  */
27065 rtx
27066 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27067 {
27068   if (count != 0)
27069     return NULL_RTX;
27070
27071   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27072 }
27073
27074 /* Do anything needed before RTL is emitted for each function.  */
27075 void
27076 arm_init_expanders (void)
27077 {
27078   /* Arrange to initialize and mark the machine per-function status.  */
27079   init_machine_status = arm_init_machine_status;
27080
27081   /* This is to stop the combine pass optimizing away the alignment
27082      adjustment of va_arg.  */
27083   /* ??? It is claimed that this should not be necessary.  */
27084   if (cfun)
27085     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27086 }
27087
27088
27089 /* Like arm_compute_initial_elimination offset.  Simpler because there
27090    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
27091    to point at the base of the local variables after static stack
27092    space for a function has been allocated.  */
27093
27094 HOST_WIDE_INT
27095 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27096 {
27097   arm_stack_offsets *offsets;
27098
27099   offsets = arm_get_frame_offsets ();
27100
27101   switch (from)
27102     {
27103     case ARG_POINTER_REGNUM:
27104       switch (to)
27105         {
27106         case STACK_POINTER_REGNUM:
27107           return offsets->outgoing_args - offsets->saved_args;
27108
27109         case FRAME_POINTER_REGNUM:
27110           return offsets->soft_frame - offsets->saved_args;
27111
27112         case ARM_HARD_FRAME_POINTER_REGNUM:
27113           return offsets->saved_regs - offsets->saved_args;
27114
27115         case THUMB_HARD_FRAME_POINTER_REGNUM:
27116           return offsets->locals_base - offsets->saved_args;
27117
27118         default:
27119           gcc_unreachable ();
27120         }
27121       break;
27122
27123     case FRAME_POINTER_REGNUM:
27124       switch (to)
27125         {
27126         case STACK_POINTER_REGNUM:
27127           return offsets->outgoing_args - offsets->soft_frame;
27128
27129         case ARM_HARD_FRAME_POINTER_REGNUM:
27130           return offsets->saved_regs - offsets->soft_frame;
27131
27132         case THUMB_HARD_FRAME_POINTER_REGNUM:
27133           return offsets->locals_base - offsets->soft_frame;
27134
27135         default:
27136           gcc_unreachable ();
27137         }
27138       break;
27139
27140     default:
27141       gcc_unreachable ();
27142     }
27143 }
27144
27145 /* Generate the function's prologue.  */
27146
27147 void
27148 thumb1_expand_prologue (void)
27149 {
27150   rtx_insn *insn;
27151
27152   HOST_WIDE_INT amount;
27153   arm_stack_offsets *offsets;
27154   unsigned long func_type;
27155   int regno;
27156   unsigned long live_regs_mask;
27157   unsigned long l_mask;
27158   unsigned high_regs_pushed = 0;
27159
27160   func_type = arm_current_func_type ();
27161
27162   /* Naked functions don't have prologues.  */
27163   if (IS_NAKED (func_type))
27164     return;
27165
27166   if (IS_INTERRUPT (func_type))
27167     {
27168       error ("interrupt Service Routines cannot be coded in Thumb mode");
27169       return;
27170     }
27171
27172   if (is_called_in_ARM_mode (current_function_decl))
27173     emit_insn (gen_prologue_thumb1_interwork ());
27174
27175   offsets = arm_get_frame_offsets ();
27176   live_regs_mask = offsets->saved_regs_mask;
27177
27178   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
27179   l_mask = live_regs_mask & 0x40ff;
27180   /* Then count how many other high registers will need to be pushed.  */
27181   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27182
27183   if (crtl->args.pretend_args_size)
27184     {
27185       rtx x = GEN_INT (-crtl->args.pretend_args_size);
27186
27187       if (cfun->machine->uses_anonymous_args)
27188         {
27189           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27190           unsigned long mask;
27191
27192           mask = 1ul << (LAST_ARG_REGNUM + 1);
27193           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27194
27195           insn = thumb1_emit_multi_reg_push (mask, 0);
27196         }
27197       else
27198         {
27199           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27200                                         stack_pointer_rtx, x));
27201         }
27202       RTX_FRAME_RELATED_P (insn) = 1;
27203     }
27204
27205   if (TARGET_BACKTRACE)
27206     {
27207       HOST_WIDE_INT offset = 0;
27208       unsigned work_register;
27209       rtx work_reg, x, arm_hfp_rtx;
27210
27211       /* We have been asked to create a stack backtrace structure.
27212          The code looks like this:
27213
27214          0   .align 2
27215          0   func:
27216          0     sub   SP, #16         Reserve space for 4 registers.
27217          2     push  {R7}            Push low registers.
27218          4     add   R7, SP, #20     Get the stack pointer before the push.
27219          6     str   R7, [SP, #8]    Store the stack pointer
27220                                         (before reserving the space).
27221          8     mov   R7, PC          Get hold of the start of this code + 12.
27222         10     str   R7, [SP, #16]   Store it.
27223         12     mov   R7, FP          Get hold of the current frame pointer.
27224         14     str   R7, [SP, #4]    Store it.
27225         16     mov   R7, LR          Get hold of the current return address.
27226         18     str   R7, [SP, #12]   Store it.
27227         20     add   R7, SP, #16     Point at the start of the
27228                                         backtrace structure.
27229         22     mov   FP, R7          Put this value into the frame pointer.  */
27230
27231       work_register = thumb_find_work_register (live_regs_mask);
27232       work_reg = gen_rtx_REG (SImode, work_register);
27233       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27234
27235       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27236                                     stack_pointer_rtx, GEN_INT (-16)));
27237       RTX_FRAME_RELATED_P (insn) = 1;
27238
27239       if (l_mask)
27240         {
27241           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27242           RTX_FRAME_RELATED_P (insn) = 1;
27243
27244           offset = bit_count (l_mask) * UNITS_PER_WORD;
27245         }
27246
27247       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27248       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27249
27250       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27251       x = gen_frame_mem (SImode, x);
27252       emit_move_insn (x, work_reg);
27253
27254       /* Make sure that the instruction fetching the PC is in the right place
27255          to calculate "start of backtrace creation code + 12".  */
27256       /* ??? The stores using the common WORK_REG ought to be enough to
27257          prevent the scheduler from doing anything weird.  Failing that
27258          we could always move all of the following into an UNSPEC_VOLATILE.  */
27259       if (l_mask)
27260         {
27261           x = gen_rtx_REG (SImode, PC_REGNUM);
27262           emit_move_insn (work_reg, x);
27263
27264           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27265           x = gen_frame_mem (SImode, x);
27266           emit_move_insn (x, work_reg);
27267
27268           emit_move_insn (work_reg, arm_hfp_rtx);
27269
27270           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27271           x = gen_frame_mem (SImode, x);
27272           emit_move_insn (x, work_reg);
27273         }
27274       else
27275         {
27276           emit_move_insn (work_reg, arm_hfp_rtx);
27277
27278           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27279           x = gen_frame_mem (SImode, x);
27280           emit_move_insn (x, work_reg);
27281
27282           x = gen_rtx_REG (SImode, PC_REGNUM);
27283           emit_move_insn (work_reg, x);
27284
27285           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27286           x = gen_frame_mem (SImode, x);
27287           emit_move_insn (x, work_reg);
27288         }
27289
27290       x = gen_rtx_REG (SImode, LR_REGNUM);
27291       emit_move_insn (work_reg, x);
27292
27293       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27294       x = gen_frame_mem (SImode, x);
27295       emit_move_insn (x, work_reg);
27296
27297       x = GEN_INT (offset + 12);
27298       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27299
27300       emit_move_insn (arm_hfp_rtx, work_reg);
27301     }
27302   /* Optimization:  If we are not pushing any low registers but we are going
27303      to push some high registers then delay our first push.  This will just
27304      be a push of LR and we can combine it with the push of the first high
27305      register.  */
27306   else if ((l_mask & 0xff) != 0
27307            || (high_regs_pushed == 0 && l_mask))
27308     {
27309       unsigned long mask = l_mask;
27310       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27311       insn = thumb1_emit_multi_reg_push (mask, mask);
27312       RTX_FRAME_RELATED_P (insn) = 1;
27313     }
27314
27315   if (high_regs_pushed)
27316     {
27317       unsigned pushable_regs;
27318       unsigned next_hi_reg;
27319       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27320                                                  : crtl->args.info.nregs;
27321       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27322
27323       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27324         if (live_regs_mask & (1 << next_hi_reg))
27325           break;
27326
27327       /* Here we need to mask out registers used for passing arguments
27328          even if they can be pushed.  This is to avoid using them to stash the high
27329          registers.  Such kind of stash may clobber the use of arguments.  */
27330       pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27331
27332       if (pushable_regs == 0)
27333         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27334
27335       while (high_regs_pushed > 0)
27336         {
27337           unsigned long real_regs_mask = 0;
27338
27339           for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27340             {
27341               if (pushable_regs & (1 << regno))
27342                 {
27343                   emit_move_insn (gen_rtx_REG (SImode, regno),
27344                                   gen_rtx_REG (SImode, next_hi_reg));
27345
27346                   high_regs_pushed --;
27347                   real_regs_mask |= (1 << next_hi_reg);
27348
27349                   if (high_regs_pushed)
27350                     {
27351                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27352                            next_hi_reg --)
27353                         if (live_regs_mask & (1 << next_hi_reg))
27354                           break;
27355                     }
27356                   else
27357                     {
27358                       pushable_regs &= ~((1 << regno) - 1);
27359                       break;
27360                     }
27361                 }
27362             }
27363
27364           /* If we had to find a work register and we have not yet
27365              saved the LR then add it to the list of regs to push.  */
27366           if (l_mask == (1 << LR_REGNUM))
27367             {
27368               pushable_regs |= l_mask;
27369               real_regs_mask |= l_mask;
27370               l_mask = 0;
27371             }
27372
27373           insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27374           RTX_FRAME_RELATED_P (insn) = 1;
27375         }
27376     }
27377
27378   /* Load the pic register before setting the frame pointer,
27379      so we can use r7 as a temporary work register.  */
27380   if (flag_pic && arm_pic_register != INVALID_REGNUM)
27381     arm_load_pic_register (live_regs_mask);
27382
27383   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27384     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27385                     stack_pointer_rtx);
27386
27387   if (flag_stack_usage_info)
27388     current_function_static_stack_size
27389       = offsets->outgoing_args - offsets->saved_args;
27390
27391   amount = offsets->outgoing_args - offsets->saved_regs;
27392   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27393   if (amount)
27394     {
27395       if (amount < 512)
27396         {
27397           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27398                                         GEN_INT (- amount)));
27399           RTX_FRAME_RELATED_P (insn) = 1;
27400         }
27401       else
27402         {
27403           rtx reg, dwarf;
27404
27405           /* The stack decrement is too big for an immediate value in a single
27406              insn.  In theory we could issue multiple subtracts, but after
27407              three of them it becomes more space efficient to place the full
27408              value in the constant pool and load into a register.  (Also the
27409              ARM debugger really likes to see only one stack decrement per
27410              function).  So instead we look for a scratch register into which
27411              we can load the decrement, and then we subtract this from the
27412              stack pointer.  Unfortunately on the thumb the only available
27413              scratch registers are the argument registers, and we cannot use
27414              these as they may hold arguments to the function.  Instead we
27415              attempt to locate a call preserved register which is used by this
27416              function.  If we can find one, then we know that it will have
27417              been pushed at the start of the prologue and so we can corrupt
27418              it now.  */
27419           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27420             if (live_regs_mask & (1 << regno))
27421               break;
27422
27423           gcc_assert(regno <= LAST_LO_REGNUM);
27424
27425           reg = gen_rtx_REG (SImode, regno);
27426
27427           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27428
27429           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27430                                         stack_pointer_rtx, reg));
27431
27432           dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27433                                plus_constant (Pmode, stack_pointer_rtx,
27434                                               -amount));
27435           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27436           RTX_FRAME_RELATED_P (insn) = 1;
27437         }
27438     }
27439
27440   if (frame_pointer_needed)
27441     thumb_set_frame_pointer (offsets);
27442
27443   /* If we are profiling, make sure no instructions are scheduled before
27444      the call to mcount.  Similarly if the user has requested no
27445      scheduling in the prolog.  Similarly if we want non-call exceptions
27446      using the EABI unwinder, to prevent faulting instructions from being
27447      swapped with a stack adjustment.  */
27448   if (crtl->profile || !TARGET_SCHED_PROLOG
27449       || (arm_except_unwind_info (&global_options) == UI_TARGET
27450           && cfun->can_throw_non_call_exceptions))
27451     emit_insn (gen_blockage ());
27452
27453   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27454   if (live_regs_mask & 0xff)
27455     cfun->machine->lr_save_eliminated = 0;
27456 }
27457
27458 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27459    POP instruction can be generated.  LR should be replaced by PC.  All
27460    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27461    all we really need to check here is if single register is to be
27462    returned, or multiple register return.  */
27463 void
27464 thumb2_expand_return (bool simple_return)
27465 {
27466   int i, num_regs;
27467   unsigned long saved_regs_mask;
27468   arm_stack_offsets *offsets;
27469
27470   offsets = arm_get_frame_offsets ();
27471   saved_regs_mask = offsets->saved_regs_mask;
27472
27473   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27474     if (saved_regs_mask & (1 << i))
27475       num_regs++;
27476
27477   if (!simple_return && saved_regs_mask)
27478     {
27479       if (num_regs == 1)
27480         {
27481           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27482           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27483           rtx addr = gen_rtx_MEM (SImode,
27484                                   gen_rtx_POST_INC (SImode,
27485                                                     stack_pointer_rtx));
27486           set_mem_alias_set (addr, get_frame_alias_set ());
27487           XVECEXP (par, 0, 0) = ret_rtx;
27488           XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27489           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27490           emit_jump_insn (par);
27491         }
27492       else
27493         {
27494           saved_regs_mask &= ~ (1 << LR_REGNUM);
27495           saved_regs_mask |=   (1 << PC_REGNUM);
27496           arm_emit_multi_reg_pop (saved_regs_mask);
27497         }
27498     }
27499   else
27500     {
27501       emit_jump_insn (simple_return_rtx);
27502     }
27503 }
27504
27505 void
27506 thumb1_expand_epilogue (void)
27507 {
27508   HOST_WIDE_INT amount;
27509   arm_stack_offsets *offsets;
27510   int regno;
27511
27512   /* Naked functions don't have prologues.  */
27513   if (IS_NAKED (arm_current_func_type ()))
27514     return;
27515
27516   offsets = arm_get_frame_offsets ();
27517   amount = offsets->outgoing_args - offsets->saved_regs;
27518
27519   if (frame_pointer_needed)
27520     {
27521       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27522       amount = offsets->locals_base - offsets->saved_regs;
27523     }
27524   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27525
27526   gcc_assert (amount >= 0);
27527   if (amount)
27528     {
27529       emit_insn (gen_blockage ());
27530
27531       if (amount < 512)
27532         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27533                                GEN_INT (amount)));
27534       else
27535         {
27536           /* r3 is always free in the epilogue.  */
27537           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27538
27539           emit_insn (gen_movsi (reg, GEN_INT (amount)));
27540           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27541         }
27542     }
27543
27544   /* Emit a USE (stack_pointer_rtx), so that
27545      the stack adjustment will not be deleted.  */
27546   emit_insn (gen_force_register_use (stack_pointer_rtx));
27547
27548   if (crtl->profile || !TARGET_SCHED_PROLOG)
27549     emit_insn (gen_blockage ());
27550
27551   /* Emit a clobber for each insn that will be restored in the epilogue,
27552      so that flow2 will get register lifetimes correct.  */
27553   for (regno = 0; regno < 13; regno++)
27554     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27555       emit_clobber (gen_rtx_REG (SImode, regno));
27556
27557   if (! df_regs_ever_live_p (LR_REGNUM))
27558     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27559 }
27560
27561 /* Epilogue code for APCS frame.  */
27562 static void
27563 arm_expand_epilogue_apcs_frame (bool really_return)
27564 {
27565   unsigned long func_type;
27566   unsigned long saved_regs_mask;
27567   int num_regs = 0;
27568   int i;
27569   int floats_from_frame = 0;
27570   arm_stack_offsets *offsets;
27571
27572   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27573   func_type = arm_current_func_type ();
27574
27575   /* Get frame offsets for ARM.  */
27576   offsets = arm_get_frame_offsets ();
27577   saved_regs_mask = offsets->saved_regs_mask;
27578
27579   /* Find the offset of the floating-point save area in the frame.  */
27580   floats_from_frame
27581     = (offsets->saved_args
27582        + arm_compute_static_chain_stack_bytes ()
27583        - offsets->frame);
27584
27585   /* Compute how many core registers saved and how far away the floats are.  */
27586   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27587     if (saved_regs_mask & (1 << i))
27588       {
27589         num_regs++;
27590         floats_from_frame += 4;
27591       }
27592
27593   if (TARGET_HARD_FLOAT && TARGET_VFP)
27594     {
27595       int start_reg;
27596       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27597
27598       /* The offset is from IP_REGNUM.  */
27599       int saved_size = arm_get_vfp_saved_size ();
27600       if (saved_size > 0)
27601         {
27602           rtx_insn *insn;
27603           floats_from_frame += saved_size;
27604           insn = emit_insn (gen_addsi3 (ip_rtx,
27605                                         hard_frame_pointer_rtx,
27606                                         GEN_INT (-floats_from_frame)));
27607           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27608                                        ip_rtx, hard_frame_pointer_rtx);
27609         }
27610
27611       /* Generate VFP register multi-pop.  */
27612       start_reg = FIRST_VFP_REGNUM;
27613
27614       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27615         /* Look for a case where a reg does not need restoring.  */
27616         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27617             && (!df_regs_ever_live_p (i + 1)
27618                 || call_used_regs[i + 1]))
27619           {
27620             if (start_reg != i)
27621               arm_emit_vfp_multi_reg_pop (start_reg,
27622                                           (i - start_reg) / 2,
27623                                           gen_rtx_REG (SImode,
27624                                                        IP_REGNUM));
27625             start_reg = i + 2;
27626           }
27627
27628       /* Restore the remaining regs that we have discovered (or possibly
27629          even all of them, if the conditional in the for loop never
27630          fired).  */
27631       if (start_reg != i)
27632         arm_emit_vfp_multi_reg_pop (start_reg,
27633                                     (i - start_reg) / 2,
27634                                     gen_rtx_REG (SImode, IP_REGNUM));
27635     }
27636
27637   if (TARGET_IWMMXT)
27638     {
27639       /* The frame pointer is guaranteed to be non-double-word aligned, as
27640          it is set to double-word-aligned old_stack_pointer - 4.  */
27641       rtx_insn *insn;
27642       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27643
27644       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27645         if (df_regs_ever_live_p (i) && !call_used_regs[i])
27646           {
27647             rtx addr = gen_frame_mem (V2SImode,
27648                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27649                                                 - lrm_count * 4));
27650             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27651             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27652                                                gen_rtx_REG (V2SImode, i),
27653                                                NULL_RTX);
27654             lrm_count += 2;
27655           }
27656     }
27657
27658   /* saved_regs_mask should contain IP which contains old stack pointer
27659      at the time of activation creation.  Since SP and IP are adjacent registers,
27660      we can restore the value directly into SP.  */
27661   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27662   saved_regs_mask &= ~(1 << IP_REGNUM);
27663   saved_regs_mask |= (1 << SP_REGNUM);
27664
27665   /* There are two registers left in saved_regs_mask - LR and PC.  We
27666      only need to restore LR (the return address), but to
27667      save time we can load it directly into PC, unless we need a
27668      special function exit sequence, or we are not really returning.  */
27669   if (really_return
27670       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27671       && !crtl->calls_eh_return)
27672     /* Delete LR from the register mask, so that LR on
27673        the stack is loaded into the PC in the register mask.  */
27674     saved_regs_mask &= ~(1 << LR_REGNUM);
27675   else
27676     saved_regs_mask &= ~(1 << PC_REGNUM);
27677
27678   num_regs = bit_count (saved_regs_mask);
27679   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27680     {
27681       rtx_insn *insn;
27682       emit_insn (gen_blockage ());
27683       /* Unwind the stack to just below the saved registers.  */
27684       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27685                                     hard_frame_pointer_rtx,
27686                                     GEN_INT (- 4 * num_regs)));
27687
27688       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27689                                    stack_pointer_rtx, hard_frame_pointer_rtx);
27690     }
27691
27692   arm_emit_multi_reg_pop (saved_regs_mask);
27693
27694   if (IS_INTERRUPT (func_type))
27695     {
27696       /* Interrupt handlers will have pushed the
27697          IP onto the stack, so restore it now.  */
27698       rtx_insn *insn;
27699       rtx addr = gen_rtx_MEM (SImode,
27700                               gen_rtx_POST_INC (SImode,
27701                               stack_pointer_rtx));
27702       set_mem_alias_set (addr, get_frame_alias_set ());
27703       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27704       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27705                                          gen_rtx_REG (SImode, IP_REGNUM),
27706                                          NULL_RTX);
27707     }
27708
27709   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27710     return;
27711
27712   if (crtl->calls_eh_return)
27713     emit_insn (gen_addsi3 (stack_pointer_rtx,
27714                            stack_pointer_rtx,
27715                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27716
27717   if (IS_STACKALIGN (func_type))
27718     /* Restore the original stack pointer.  Before prologue, the stack was
27719        realigned and the original stack pointer saved in r0.  For details,
27720        see comment in arm_expand_prologue.  */
27721     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27722
27723   emit_jump_insn (simple_return_rtx);
27724 }
27725
27726 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27727    function is not a sibcall.  */
27728 void
27729 arm_expand_epilogue (bool really_return)
27730 {
27731   unsigned long func_type;
27732   unsigned long saved_regs_mask;
27733   int num_regs = 0;
27734   int i;
27735   int amount;
27736   arm_stack_offsets *offsets;
27737
27738   func_type = arm_current_func_type ();
27739
27740   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27741      let output_return_instruction take care of instruction emission if any.  */
27742   if (IS_NAKED (func_type)
27743       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27744     {
27745       if (really_return)
27746         emit_jump_insn (simple_return_rtx);
27747       return;
27748     }
27749
27750   /* If we are throwing an exception, then we really must be doing a
27751      return, so we can't tail-call.  */
27752   gcc_assert (!crtl->calls_eh_return || really_return);
27753
27754   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27755     {
27756       arm_expand_epilogue_apcs_frame (really_return);
27757       return;
27758     }
27759
27760   /* Get frame offsets for ARM.  */
27761   offsets = arm_get_frame_offsets ();
27762   saved_regs_mask = offsets->saved_regs_mask;
27763   num_regs = bit_count (saved_regs_mask);
27764
27765   if (frame_pointer_needed)
27766     {
27767       rtx_insn *insn;
27768       /* Restore stack pointer if necessary.  */
27769       if (TARGET_ARM)
27770         {
27771           /* In ARM mode, frame pointer points to first saved register.
27772              Restore stack pointer to last saved register.  */
27773           amount = offsets->frame - offsets->saved_regs;
27774
27775           /* Force out any pending memory operations that reference stacked data
27776              before stack de-allocation occurs.  */
27777           emit_insn (gen_blockage ());
27778           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27779                             hard_frame_pointer_rtx,
27780                             GEN_INT (amount)));
27781           arm_add_cfa_adjust_cfa_note (insn, amount,
27782                                        stack_pointer_rtx,
27783                                        hard_frame_pointer_rtx);
27784
27785           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27786              deleted.  */
27787           emit_insn (gen_force_register_use (stack_pointer_rtx));
27788         }
27789       else
27790         {
27791           /* In Thumb-2 mode, the frame pointer points to the last saved
27792              register.  */
27793           amount = offsets->locals_base - offsets->saved_regs;
27794           if (amount)
27795             {
27796               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27797                                 hard_frame_pointer_rtx,
27798                                 GEN_INT (amount)));
27799               arm_add_cfa_adjust_cfa_note (insn, amount,
27800                                            hard_frame_pointer_rtx,
27801                                            hard_frame_pointer_rtx);
27802             }
27803
27804           /* Force out any pending memory operations that reference stacked data
27805              before stack de-allocation occurs.  */
27806           emit_insn (gen_blockage ());
27807           insn = emit_insn (gen_movsi (stack_pointer_rtx,
27808                                        hard_frame_pointer_rtx));
27809           arm_add_cfa_adjust_cfa_note (insn, 0,
27810                                        stack_pointer_rtx,
27811                                        hard_frame_pointer_rtx);
27812           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27813              deleted.  */
27814           emit_insn (gen_force_register_use (stack_pointer_rtx));
27815         }
27816     }
27817   else
27818     {
27819       /* Pop off outgoing args and local frame to adjust stack pointer to
27820          last saved register.  */
27821       amount = offsets->outgoing_args - offsets->saved_regs;
27822       if (amount)
27823         {
27824           rtx_insn *tmp;
27825           /* Force out any pending memory operations that reference stacked data
27826              before stack de-allocation occurs.  */
27827           emit_insn (gen_blockage ());
27828           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27829                                        stack_pointer_rtx,
27830                                        GEN_INT (amount)));
27831           arm_add_cfa_adjust_cfa_note (tmp, amount,
27832                                        stack_pointer_rtx, stack_pointer_rtx);
27833           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27834              not deleted.  */
27835           emit_insn (gen_force_register_use (stack_pointer_rtx));
27836         }
27837     }
27838
27839   if (TARGET_HARD_FLOAT && TARGET_VFP)
27840     {
27841       /* Generate VFP register multi-pop.  */
27842       int end_reg = LAST_VFP_REGNUM + 1;
27843
27844       /* Scan the registers in reverse order.  We need to match
27845          any groupings made in the prologue and generate matching
27846          vldm operations.  The need to match groups is because,
27847          unlike pop, vldm can only do consecutive regs.  */
27848       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27849         /* Look for a case where a reg does not need restoring.  */
27850         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27851             && (!df_regs_ever_live_p (i + 1)
27852                 || call_used_regs[i + 1]))
27853           {
27854             /* Restore the regs discovered so far (from reg+2 to
27855                end_reg).  */
27856             if (end_reg > i + 2)
27857               arm_emit_vfp_multi_reg_pop (i + 2,
27858                                           (end_reg - (i + 2)) / 2,
27859                                           stack_pointer_rtx);
27860             end_reg = i;
27861           }
27862
27863       /* Restore the remaining regs that we have discovered (or possibly
27864          even all of them, if the conditional in the for loop never
27865          fired).  */
27866       if (end_reg > i + 2)
27867         arm_emit_vfp_multi_reg_pop (i + 2,
27868                                     (end_reg - (i + 2)) / 2,
27869                                     stack_pointer_rtx);
27870     }
27871
27872   if (TARGET_IWMMXT)
27873     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27874       if (df_regs_ever_live_p (i) && !call_used_regs[i])
27875         {
27876           rtx_insn *insn;
27877           rtx addr = gen_rtx_MEM (V2SImode,
27878                                   gen_rtx_POST_INC (SImode,
27879                                                     stack_pointer_rtx));
27880           set_mem_alias_set (addr, get_frame_alias_set ());
27881           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27882           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27883                                              gen_rtx_REG (V2SImode, i),
27884                                              NULL_RTX);
27885           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27886                                        stack_pointer_rtx, stack_pointer_rtx);
27887         }
27888
27889   if (saved_regs_mask)
27890     {
27891       rtx insn;
27892       bool return_in_pc = false;
27893
27894       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27895           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27896           && !IS_STACKALIGN (func_type)
27897           && really_return
27898           && crtl->args.pretend_args_size == 0
27899           && saved_regs_mask & (1 << LR_REGNUM)
27900           && !crtl->calls_eh_return)
27901         {
27902           saved_regs_mask &= ~(1 << LR_REGNUM);
27903           saved_regs_mask |= (1 << PC_REGNUM);
27904           return_in_pc = true;
27905         }
27906
27907       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27908         {
27909           for (i = 0; i <= LAST_ARM_REGNUM; i++)
27910             if (saved_regs_mask & (1 << i))
27911               {
27912                 rtx addr = gen_rtx_MEM (SImode,
27913                                         gen_rtx_POST_INC (SImode,
27914                                                           stack_pointer_rtx));
27915                 set_mem_alias_set (addr, get_frame_alias_set ());
27916
27917                 if (i == PC_REGNUM)
27918                   {
27919                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27920                     XVECEXP (insn, 0, 0) = ret_rtx;
27921                     XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27922                                                         gen_rtx_REG (SImode, i),
27923                                                         addr);
27924                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27925                     insn = emit_jump_insn (insn);
27926                   }
27927                 else
27928                   {
27929                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27930                                                  addr));
27931                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27932                                                        gen_rtx_REG (SImode, i),
27933                                                        NULL_RTX);
27934                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27935                                                  stack_pointer_rtx,
27936                                                  stack_pointer_rtx);
27937                   }
27938               }
27939         }
27940       else
27941         {
27942           if (TARGET_LDRD
27943               && current_tune->prefer_ldrd_strd
27944               && !optimize_function_for_size_p (cfun))
27945             {
27946               if (TARGET_THUMB2)
27947                 thumb2_emit_ldrd_pop (saved_regs_mask);
27948               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27949                 arm_emit_ldrd_pop (saved_regs_mask);
27950               else
27951                 arm_emit_multi_reg_pop (saved_regs_mask);
27952             }
27953           else
27954             arm_emit_multi_reg_pop (saved_regs_mask);
27955         }
27956
27957       if (return_in_pc == true)
27958         return;
27959     }
27960
27961   if (crtl->args.pretend_args_size)
27962     {
27963       int i, j;
27964       rtx dwarf = NULL_RTX;
27965       rtx_insn *tmp =
27966         emit_insn (gen_addsi3 (stack_pointer_rtx,
27967                                stack_pointer_rtx,
27968                                GEN_INT (crtl->args.pretend_args_size)));
27969
27970       RTX_FRAME_RELATED_P (tmp) = 1;
27971
27972       if (cfun->machine->uses_anonymous_args)
27973         {
27974           /* Restore pretend args.  Refer arm_expand_prologue on how to save
27975              pretend_args in stack.  */
27976           int num_regs = crtl->args.pretend_args_size / 4;
27977           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27978           for (j = 0, i = 0; j < num_regs; i++)
27979             if (saved_regs_mask & (1 << i))
27980               {
27981                 rtx reg = gen_rtx_REG (SImode, i);
27982                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27983                 j++;
27984               }
27985           REG_NOTES (tmp) = dwarf;
27986         }
27987       arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27988                                    stack_pointer_rtx, stack_pointer_rtx);
27989     }
27990
27991   if (!really_return)
27992     return;
27993
27994   if (crtl->calls_eh_return)
27995     emit_insn (gen_addsi3 (stack_pointer_rtx,
27996                            stack_pointer_rtx,
27997                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27998
27999   if (IS_STACKALIGN (func_type))
28000     /* Restore the original stack pointer.  Before prologue, the stack was
28001        realigned and the original stack pointer saved in r0.  For details,
28002        see comment in arm_expand_prologue.  */
28003     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
28004
28005   emit_jump_insn (simple_return_rtx);
28006 }
28007
28008 /* Implementation of insn prologue_thumb1_interwork.  This is the first
28009    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
28010
28011 const char *
28012 thumb1_output_interwork (void)
28013 {
28014   const char * name;
28015   FILE *f = asm_out_file;
28016
28017   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28018   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28019               == SYMBOL_REF);
28020   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28021
28022   /* Generate code sequence to switch us into Thumb mode.  */
28023   /* The .code 32 directive has already been emitted by
28024      ASM_DECLARE_FUNCTION_NAME.  */
28025   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28026   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28027
28028   /* Generate a label, so that the debugger will notice the
28029      change in instruction sets.  This label is also used by
28030      the assembler to bypass the ARM code when this function
28031      is called from a Thumb encoded function elsewhere in the
28032      same file.  Hence the definition of STUB_NAME here must
28033      agree with the definition in gas/config/tc-arm.c.  */
28034
28035 #define STUB_NAME ".real_start_of"
28036
28037   fprintf (f, "\t.code\t16\n");
28038 #ifdef ARM_PE
28039   if (arm_dllexport_name_p (name))
28040     name = arm_strip_name_encoding (name);
28041 #endif
28042   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28043   fprintf (f, "\t.thumb_func\n");
28044   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28045
28046   return "";
28047 }
28048
28049 /* Handle the case of a double word load into a low register from
28050    a computed memory address.  The computed address may involve a
28051    register which is overwritten by the load.  */
28052 const char *
28053 thumb_load_double_from_address (rtx *operands)
28054 {
28055   rtx addr;
28056   rtx base;
28057   rtx offset;
28058   rtx arg1;
28059   rtx arg2;
28060
28061   gcc_assert (REG_P (operands[0]));
28062   gcc_assert (MEM_P (operands[1]));
28063
28064   /* Get the memory address.  */
28065   addr = XEXP (operands[1], 0);
28066
28067   /* Work out how the memory address is computed.  */
28068   switch (GET_CODE (addr))
28069     {
28070     case REG:
28071       operands[2] = adjust_address (operands[1], SImode, 4);
28072
28073       if (REGNO (operands[0]) == REGNO (addr))
28074         {
28075           output_asm_insn ("ldr\t%H0, %2", operands);
28076           output_asm_insn ("ldr\t%0, %1", operands);
28077         }
28078       else
28079         {
28080           output_asm_insn ("ldr\t%0, %1", operands);
28081           output_asm_insn ("ldr\t%H0, %2", operands);
28082         }
28083       break;
28084
28085     case CONST:
28086       /* Compute <address> + 4 for the high order load.  */
28087       operands[2] = adjust_address (operands[1], SImode, 4);
28088
28089       output_asm_insn ("ldr\t%0, %1", operands);
28090       output_asm_insn ("ldr\t%H0, %2", operands);
28091       break;
28092
28093     case PLUS:
28094       arg1   = XEXP (addr, 0);
28095       arg2   = XEXP (addr, 1);
28096
28097       if (CONSTANT_P (arg1))
28098         base = arg2, offset = arg1;
28099       else
28100         base = arg1, offset = arg2;
28101
28102       gcc_assert (REG_P (base));
28103
28104       /* Catch the case of <address> = <reg> + <reg> */
28105       if (REG_P (offset))
28106         {
28107           int reg_offset = REGNO (offset);
28108           int reg_base   = REGNO (base);
28109           int reg_dest   = REGNO (operands[0]);
28110
28111           /* Add the base and offset registers together into the
28112              higher destination register.  */
28113           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28114                        reg_dest + 1, reg_base, reg_offset);
28115
28116           /* Load the lower destination register from the address in
28117              the higher destination register.  */
28118           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28119                        reg_dest, reg_dest + 1);
28120
28121           /* Load the higher destination register from its own address
28122              plus 4.  */
28123           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28124                        reg_dest + 1, reg_dest + 1);
28125         }
28126       else
28127         {
28128           /* Compute <address> + 4 for the high order load.  */
28129           operands[2] = adjust_address (operands[1], SImode, 4);
28130
28131           /* If the computed address is held in the low order register
28132              then load the high order register first, otherwise always
28133              load the low order register first.  */
28134           if (REGNO (operands[0]) == REGNO (base))
28135             {
28136               output_asm_insn ("ldr\t%H0, %2", operands);
28137               output_asm_insn ("ldr\t%0, %1", operands);
28138             }
28139           else
28140             {
28141               output_asm_insn ("ldr\t%0, %1", operands);
28142               output_asm_insn ("ldr\t%H0, %2", operands);
28143             }
28144         }
28145       break;
28146
28147     case LABEL_REF:
28148       /* With no registers to worry about we can just load the value
28149          directly.  */
28150       operands[2] = adjust_address (operands[1], SImode, 4);
28151
28152       output_asm_insn ("ldr\t%H0, %2", operands);
28153       output_asm_insn ("ldr\t%0, %1", operands);
28154       break;
28155
28156     default:
28157       gcc_unreachable ();
28158     }
28159
28160   return "";
28161 }
28162
28163 const char *
28164 thumb_output_move_mem_multiple (int n, rtx *operands)
28165 {
28166   rtx tmp;
28167
28168   switch (n)
28169     {
28170     case 2:
28171       if (REGNO (operands[4]) > REGNO (operands[5]))
28172         {
28173           tmp = operands[4];
28174           operands[4] = operands[5];
28175           operands[5] = tmp;
28176         }
28177       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28178       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28179       break;
28180
28181     case 3:
28182       if (REGNO (operands[4]) > REGNO (operands[5]))
28183         {
28184           tmp = operands[4];
28185           operands[4] = operands[5];
28186           operands[5] = tmp;
28187         }
28188       if (REGNO (operands[5]) > REGNO (operands[6]))
28189         {
28190           tmp = operands[5];
28191           operands[5] = operands[6];
28192           operands[6] = tmp;
28193         }
28194       if (REGNO (operands[4]) > REGNO (operands[5]))
28195         {
28196           tmp = operands[4];
28197           operands[4] = operands[5];
28198           operands[5] = tmp;
28199         }
28200
28201       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28202       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28203       break;
28204
28205     default:
28206       gcc_unreachable ();
28207     }
28208
28209   return "";
28210 }
28211
28212 /* Output a call-via instruction for thumb state.  */
28213 const char *
28214 thumb_call_via_reg (rtx reg)
28215 {
28216   int regno = REGNO (reg);
28217   rtx *labelp;
28218
28219   gcc_assert (regno < LR_REGNUM);
28220
28221   /* If we are in the normal text section we can use a single instance
28222      per compilation unit.  If we are doing function sections, then we need
28223      an entry per section, since we can't rely on reachability.  */
28224   if (in_section == text_section)
28225     {
28226       thumb_call_reg_needed = 1;
28227
28228       if (thumb_call_via_label[regno] == NULL)
28229         thumb_call_via_label[regno] = gen_label_rtx ();
28230       labelp = thumb_call_via_label + regno;
28231     }
28232   else
28233     {
28234       if (cfun->machine->call_via[regno] == NULL)
28235         cfun->machine->call_via[regno] = gen_label_rtx ();
28236       labelp = cfun->machine->call_via + regno;
28237     }
28238
28239   output_asm_insn ("bl\t%a0", labelp);
28240   return "";
28241 }
28242
28243 /* Routines for generating rtl.  */
28244 void
28245 thumb_expand_movmemqi (rtx *operands)
28246 {
28247   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28248   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28249   HOST_WIDE_INT len = INTVAL (operands[2]);
28250   HOST_WIDE_INT offset = 0;
28251
28252   while (len >= 12)
28253     {
28254       emit_insn (gen_movmem12b (out, in, out, in));
28255       len -= 12;
28256     }
28257
28258   if (len >= 8)
28259     {
28260       emit_insn (gen_movmem8b (out, in, out, in));
28261       len -= 8;
28262     }
28263
28264   if (len >= 4)
28265     {
28266       rtx reg = gen_reg_rtx (SImode);
28267       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28268       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28269       len -= 4;
28270       offset += 4;
28271     }
28272
28273   if (len >= 2)
28274     {
28275       rtx reg = gen_reg_rtx (HImode);
28276       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28277                                               plus_constant (Pmode, in,
28278                                                              offset))));
28279       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28280                                                                 offset)),
28281                             reg));
28282       len -= 2;
28283       offset += 2;
28284     }
28285
28286   if (len)
28287     {
28288       rtx reg = gen_reg_rtx (QImode);
28289       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28290                                               plus_constant (Pmode, in,
28291                                                              offset))));
28292       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28293                                                                 offset)),
28294                             reg));
28295     }
28296 }
28297
28298 void
28299 thumb_reload_out_hi (rtx *operands)
28300 {
28301   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28302 }
28303
28304 /* Handle reading a half-word from memory during reload.  */
28305 void
28306 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28307 {
28308   gcc_unreachable ();
28309 }
28310
28311 /* Return the length of a function name prefix
28312     that starts with the character 'c'.  */
28313 static int
28314 arm_get_strip_length (int c)
28315 {
28316   switch (c)
28317     {
28318     ARM_NAME_ENCODING_LENGTHS
28319       default: return 0;
28320     }
28321 }
28322
28323 /* Return a pointer to a function's name with any
28324    and all prefix encodings stripped from it.  */
28325 const char *
28326 arm_strip_name_encoding (const char *name)
28327 {
28328   int skip;
28329
28330   while ((skip = arm_get_strip_length (* name)))
28331     name += skip;
28332
28333   return name;
28334 }
28335
28336 /* If there is a '*' anywhere in the name's prefix, then
28337    emit the stripped name verbatim, otherwise prepend an
28338    underscore if leading underscores are being used.  */
28339 void
28340 arm_asm_output_labelref (FILE *stream, const char *name)
28341 {
28342   int skip;
28343   int verbatim = 0;
28344
28345   while ((skip = arm_get_strip_length (* name)))
28346     {
28347       verbatim |= (*name == '*');
28348       name += skip;
28349     }
28350
28351   if (verbatim)
28352     fputs (name, stream);
28353   else
28354     asm_fprintf (stream, "%U%s", name);
28355 }
28356
28357 /* This function is used to emit an EABI tag and its associated value.
28358    We emit the numerical value of the tag in case the assembler does not
28359    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28360    the tag name in a comment so that anyone reading the assembler output
28361    will know which tag is being set.
28362
28363    This function is not static because arm-c.c needs it too.  */
28364
28365 void
28366 arm_emit_eabi_attribute (const char *name, int num, int val)
28367 {
28368   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28369   if (flag_verbose_asm || flag_debug_asm)
28370     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28371   asm_fprintf (asm_out_file, "\n");
28372 }
28373
28374 static void
28375 arm_file_start (void)
28376 {
28377   int val;
28378
28379   if (TARGET_UNIFIED_ASM)
28380     asm_fprintf (asm_out_file, "\t.syntax unified\n");
28381
28382   if (TARGET_BPABI)
28383     {
28384       const char *fpu_name;
28385       if (arm_selected_arch)
28386         {
28387           /* armv7ve doesn't support any extensions.  */
28388           if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28389             {
28390               /* Keep backward compatability for assemblers
28391                  which don't support armv7ve.  */
28392               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28393               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28394               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28395               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28396               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28397             }
28398           else
28399             {
28400               const char* pos = strchr (arm_selected_arch->name, '+');
28401               if (pos)
28402                 {
28403                   char buf[15];
28404                   gcc_assert (strlen (arm_selected_arch->name)
28405                               <= sizeof (buf) / sizeof (*pos));
28406                   strncpy (buf, arm_selected_arch->name,
28407                                 (pos - arm_selected_arch->name) * sizeof (*pos));
28408                   buf[pos - arm_selected_arch->name] = '\0';
28409                   asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28410                   asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28411                 }
28412               else
28413                 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28414             }
28415         }
28416       else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28417         asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28418       else
28419         {
28420           const char* truncated_name
28421             = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28422           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28423         }
28424
28425       if (TARGET_SOFT_FLOAT)
28426         {
28427           fpu_name = "softvfp";
28428         }
28429       else
28430         {
28431           fpu_name = arm_fpu_desc->name;
28432           if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28433             {
28434               if (TARGET_HARD_FLOAT)
28435                 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28436               if (TARGET_HARD_FLOAT_ABI)
28437                 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28438             }
28439         }
28440       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28441
28442       /* Some of these attributes only apply when the corresponding features
28443          are used.  However we don't have any easy way of figuring this out.
28444          Conservatively record the setting that would have been used.  */
28445
28446       if (flag_rounding_math)
28447         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28448
28449       if (!flag_unsafe_math_optimizations)
28450         {
28451           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28452           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28453         }
28454       if (flag_signaling_nans)
28455         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28456
28457       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28458                            flag_finite_math_only ? 1 : 3);
28459
28460       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28461       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28462       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28463                                flag_short_enums ? 1 : 2);
28464
28465       /* Tag_ABI_optimization_goals.  */
28466       if (optimize_size)
28467         val = 4;
28468       else if (optimize >= 2)
28469         val = 2;
28470       else if (optimize)
28471         val = 1;
28472       else
28473         val = 6;
28474       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28475
28476       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28477                                unaligned_access);
28478
28479       if (arm_fp16_format)
28480         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28481                              (int) arm_fp16_format);
28482
28483       if (arm_lang_output_object_attributes_hook)
28484         arm_lang_output_object_attributes_hook();
28485     }
28486
28487   default_file_start ();
28488 }
28489
28490 static void
28491 arm_file_end (void)
28492 {
28493   int regno;
28494
28495   if (NEED_INDICATE_EXEC_STACK)
28496     /* Add .note.GNU-stack.  */
28497     file_end_indicate_exec_stack ();
28498
28499   if (! thumb_call_reg_needed)
28500     return;
28501
28502   switch_to_section (text_section);
28503   asm_fprintf (asm_out_file, "\t.code 16\n");
28504   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28505
28506   for (regno = 0; regno < LR_REGNUM; regno++)
28507     {
28508       rtx label = thumb_call_via_label[regno];
28509
28510       if (label != 0)
28511         {
28512           targetm.asm_out.internal_label (asm_out_file, "L",
28513                                           CODE_LABEL_NUMBER (label));
28514           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28515         }
28516     }
28517 }
28518
28519 #ifndef ARM_PE
28520 /* Symbols in the text segment can be accessed without indirecting via the
28521    constant pool; it may take an extra binary operation, but this is still
28522    faster than indirecting via memory.  Don't do this when not optimizing,
28523    since we won't be calculating al of the offsets necessary to do this
28524    simplification.  */
28525
28526 static void
28527 arm_encode_section_info (tree decl, rtx rtl, int first)
28528 {
28529   if (optimize > 0 && TREE_CONSTANT (decl))
28530     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28531
28532   default_encode_section_info (decl, rtl, first);
28533 }
28534 #endif /* !ARM_PE */
28535
28536 static void
28537 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28538 {
28539   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28540       && !strcmp (prefix, "L"))
28541     {
28542       arm_ccfsm_state = 0;
28543       arm_target_insn = NULL;
28544     }
28545   default_internal_label (stream, prefix, labelno);
28546 }
28547
28548 /* Output code to add DELTA to the first argument, and then jump
28549    to FUNCTION.  Used for C++ multiple inheritance.  */
28550 static void
28551 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28552                      HOST_WIDE_INT delta,
28553                      HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28554                      tree function)
28555 {
28556   static int thunk_label = 0;
28557   char label[256];
28558   char labelpc[256];
28559   int mi_delta = delta;
28560   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28561   int shift = 0;
28562   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28563                     ? 1 : 0);
28564   if (mi_delta < 0)
28565     mi_delta = - mi_delta;
28566
28567   final_start_function (emit_barrier (), file, 1);
28568
28569   if (TARGET_THUMB1)
28570     {
28571       int labelno = thunk_label++;
28572       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28573       /* Thunks are entered in arm mode when avaiable.  */
28574       if (TARGET_THUMB1_ONLY)
28575         {
28576           /* push r3 so we can use it as a temporary.  */
28577           /* TODO: Omit this save if r3 is not used.  */
28578           fputs ("\tpush {r3}\n", file);
28579           fputs ("\tldr\tr3, ", file);
28580         }
28581       else
28582         {
28583           fputs ("\tldr\tr12, ", file);
28584         }
28585       assemble_name (file, label);
28586       fputc ('\n', file);
28587       if (flag_pic)
28588         {
28589           /* If we are generating PIC, the ldr instruction below loads
28590              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
28591              the address of the add + 8, so we have:
28592
28593              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28594                  = target + 1.
28595
28596              Note that we have "+ 1" because some versions of GNU ld
28597              don't set the low bit of the result for R_ARM_REL32
28598              relocations against thumb function symbols.
28599              On ARMv6M this is +4, not +8.  */
28600           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28601           assemble_name (file, labelpc);
28602           fputs (":\n", file);
28603           if (TARGET_THUMB1_ONLY)
28604             {
28605               /* This is 2 insns after the start of the thunk, so we know it
28606                  is 4-byte aligned.  */
28607               fputs ("\tadd\tr3, pc, r3\n", file);
28608               fputs ("\tmov r12, r3\n", file);
28609             }
28610           else
28611             fputs ("\tadd\tr12, pc, r12\n", file);
28612         }
28613       else if (TARGET_THUMB1_ONLY)
28614         fputs ("\tmov r12, r3\n", file);
28615     }
28616   if (TARGET_THUMB1_ONLY)
28617     {
28618       if (mi_delta > 255)
28619         {
28620           fputs ("\tldr\tr3, ", file);
28621           assemble_name (file, label);
28622           fputs ("+4\n", file);
28623           asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28624                        mi_op, this_regno, this_regno);
28625         }
28626       else if (mi_delta != 0)
28627         {
28628           asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28629                        mi_op, this_regno, this_regno,
28630                        mi_delta);
28631         }
28632     }
28633   else
28634     {
28635       /* TODO: Use movw/movt for large constants when available.  */
28636       while (mi_delta != 0)
28637         {
28638           if ((mi_delta & (3 << shift)) == 0)
28639             shift += 2;
28640           else
28641             {
28642               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28643                            mi_op, this_regno, this_regno,
28644                            mi_delta & (0xff << shift));
28645               mi_delta &= ~(0xff << shift);
28646               shift += 8;
28647             }
28648         }
28649     }
28650   if (TARGET_THUMB1)
28651     {
28652       if (TARGET_THUMB1_ONLY)
28653         fputs ("\tpop\t{r3}\n", file);
28654
28655       fprintf (file, "\tbx\tr12\n");
28656       ASM_OUTPUT_ALIGN (file, 2);
28657       assemble_name (file, label);
28658       fputs (":\n", file);
28659       if (flag_pic)
28660         {
28661           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
28662           rtx tem = XEXP (DECL_RTL (function), 0);
28663           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28664              pipeline offset is four rather than eight.  Adjust the offset
28665              accordingly.  */
28666           tem = plus_constant (GET_MODE (tem), tem,
28667                                TARGET_THUMB1_ONLY ? -3 : -7);
28668           tem = gen_rtx_MINUS (GET_MODE (tem),
28669                                tem,
28670                                gen_rtx_SYMBOL_REF (Pmode,
28671                                                    ggc_strdup (labelpc)));
28672           assemble_integer (tem, 4, BITS_PER_WORD, 1);
28673         }
28674       else
28675         /* Output ".word .LTHUNKn".  */
28676         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28677
28678       if (TARGET_THUMB1_ONLY && mi_delta > 255)
28679         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28680     }
28681   else
28682     {
28683       fputs ("\tb\t", file);
28684       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28685       if (NEED_PLT_RELOC)
28686         fputs ("(PLT)", file);
28687       fputc ('\n', file);
28688     }
28689
28690   final_end_function ();
28691 }
28692
28693 int
28694 arm_emit_vector_const (FILE *file, rtx x)
28695 {
28696   int i;
28697   const char * pattern;
28698
28699   gcc_assert (GET_CODE (x) == CONST_VECTOR);
28700
28701   switch (GET_MODE (x))
28702     {
28703     case V2SImode: pattern = "%08x"; break;
28704     case V4HImode: pattern = "%04x"; break;
28705     case V8QImode: pattern = "%02x"; break;
28706     default:       gcc_unreachable ();
28707     }
28708
28709   fprintf (file, "0x");
28710   for (i = CONST_VECTOR_NUNITS (x); i--;)
28711     {
28712       rtx element;
28713
28714       element = CONST_VECTOR_ELT (x, i);
28715       fprintf (file, pattern, INTVAL (element));
28716     }
28717
28718   return 1;
28719 }
28720
28721 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28722    HFmode constant pool entries are actually loaded with ldr.  */
28723 void
28724 arm_emit_fp16_const (rtx c)
28725 {
28726   REAL_VALUE_TYPE r;
28727   long bits;
28728
28729   REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28730   bits = real_to_target (NULL, &r, HFmode);
28731   if (WORDS_BIG_ENDIAN)
28732     assemble_zeros (2);
28733   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28734   if (!WORDS_BIG_ENDIAN)
28735     assemble_zeros (2);
28736 }
28737
28738 const char *
28739 arm_output_load_gr (rtx *operands)
28740 {
28741   rtx reg;
28742   rtx offset;
28743   rtx wcgr;
28744   rtx sum;
28745
28746   if (!MEM_P (operands [1])
28747       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28748       || !REG_P (reg = XEXP (sum, 0))
28749       || !CONST_INT_P (offset = XEXP (sum, 1))
28750       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28751     return "wldrw%?\t%0, %1";
28752
28753   /* Fix up an out-of-range load of a GR register.  */
28754   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28755   wcgr = operands[0];
28756   operands[0] = reg;
28757   output_asm_insn ("ldr%?\t%0, %1", operands);
28758
28759   operands[0] = wcgr;
28760   operands[1] = reg;
28761   output_asm_insn ("tmcr%?\t%0, %1", operands);
28762   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28763
28764   return "";
28765 }
28766
28767 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28768
28769    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28770    named arg and all anonymous args onto the stack.
28771    XXX I know the prologue shouldn't be pushing registers, but it is faster
28772    that way.  */
28773
28774 static void
28775 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28776                             machine_mode mode,
28777                             tree type,
28778                             int *pretend_size,
28779                             int second_time ATTRIBUTE_UNUSED)
28780 {
28781   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28782   int nregs;
28783
28784   cfun->machine->uses_anonymous_args = 1;
28785   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28786     {
28787       nregs = pcum->aapcs_ncrn;
28788       if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28789         nregs++;
28790     }
28791   else
28792     nregs = pcum->nregs;
28793
28794   if (nregs < NUM_ARG_REGS)
28795     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28796 }
28797
28798 /* We can't rely on the caller doing the proper promotion when
28799    using APCS or ATPCS.  */
28800
28801 static bool
28802 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28803 {
28804     return !TARGET_AAPCS_BASED;
28805 }
28806
28807 static machine_mode
28808 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28809                            machine_mode mode,
28810                            int *punsignedp ATTRIBUTE_UNUSED,
28811                            const_tree fntype ATTRIBUTE_UNUSED,
28812                            int for_return ATTRIBUTE_UNUSED)
28813 {
28814   if (GET_MODE_CLASS (mode) == MODE_INT
28815       && GET_MODE_SIZE (mode) < 4)
28816     return SImode;
28817
28818   return mode;
28819 }
28820
28821 /* AAPCS based ABIs use short enums by default.  */
28822
28823 static bool
28824 arm_default_short_enums (void)
28825 {
28826   return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28827 }
28828
28829
28830 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
28831
28832 static bool
28833 arm_align_anon_bitfield (void)
28834 {
28835   return TARGET_AAPCS_BASED;
28836 }
28837
28838
28839 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
28840
28841 static tree
28842 arm_cxx_guard_type (void)
28843 {
28844   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28845 }
28846
28847
28848 /* The EABI says test the least significant bit of a guard variable.  */
28849
28850 static bool
28851 arm_cxx_guard_mask_bit (void)
28852 {
28853   return TARGET_AAPCS_BASED;
28854 }
28855
28856
28857 /* The EABI specifies that all array cookies are 8 bytes long.  */
28858
28859 static tree
28860 arm_get_cookie_size (tree type)
28861 {
28862   tree size;
28863
28864   if (!TARGET_AAPCS_BASED)
28865     return default_cxx_get_cookie_size (type);
28866
28867   size = build_int_cst (sizetype, 8);
28868   return size;
28869 }
28870
28871
28872 /* The EABI says that array cookies should also contain the element size.  */
28873
28874 static bool
28875 arm_cookie_has_size (void)
28876 {
28877   return TARGET_AAPCS_BASED;
28878 }
28879
28880
28881 /* The EABI says constructors and destructors should return a pointer to
28882    the object constructed/destroyed.  */
28883
28884 static bool
28885 arm_cxx_cdtor_returns_this (void)
28886 {
28887   return TARGET_AAPCS_BASED;
28888 }
28889
28890 /* The EABI says that an inline function may never be the key
28891    method.  */
28892
28893 static bool
28894 arm_cxx_key_method_may_be_inline (void)
28895 {
28896   return !TARGET_AAPCS_BASED;
28897 }
28898
28899 static void
28900 arm_cxx_determine_class_data_visibility (tree decl)
28901 {
28902   if (!TARGET_AAPCS_BASED
28903       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28904     return;
28905
28906   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28907      is exported.  However, on systems without dynamic vague linkage,
28908      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
28909   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28910     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28911   else
28912     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28913   DECL_VISIBILITY_SPECIFIED (decl) = 1;
28914 }
28915
28916 static bool
28917 arm_cxx_class_data_always_comdat (void)
28918 {
28919   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28920      vague linkage if the class has no key function.  */
28921   return !TARGET_AAPCS_BASED;
28922 }
28923
28924
28925 /* The EABI says __aeabi_atexit should be used to register static
28926    destructors.  */
28927
28928 static bool
28929 arm_cxx_use_aeabi_atexit (void)
28930 {
28931   return TARGET_AAPCS_BASED;
28932 }
28933
28934
28935 void
28936 arm_set_return_address (rtx source, rtx scratch)
28937 {
28938   arm_stack_offsets *offsets;
28939   HOST_WIDE_INT delta;
28940   rtx addr;
28941   unsigned long saved_regs;
28942
28943   offsets = arm_get_frame_offsets ();
28944   saved_regs = offsets->saved_regs_mask;
28945
28946   if ((saved_regs & (1 << LR_REGNUM)) == 0)
28947     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28948   else
28949     {
28950       if (frame_pointer_needed)
28951         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28952       else
28953         {
28954           /* LR will be the first saved register.  */
28955           delta = offsets->outgoing_args - (offsets->frame + 4);
28956
28957
28958           if (delta >= 4096)
28959             {
28960               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28961                                      GEN_INT (delta & ~4095)));
28962               addr = scratch;
28963               delta &= 4095;
28964             }
28965           else
28966             addr = stack_pointer_rtx;
28967
28968           addr = plus_constant (Pmode, addr, delta);
28969         }
28970       emit_move_insn (gen_frame_mem (Pmode, addr), source);
28971     }
28972 }
28973
28974
28975 void
28976 thumb_set_return_address (rtx source, rtx scratch)
28977 {
28978   arm_stack_offsets *offsets;
28979   HOST_WIDE_INT delta;
28980   HOST_WIDE_INT limit;
28981   int reg;
28982   rtx addr;
28983   unsigned long mask;
28984
28985   emit_use (source);
28986
28987   offsets = arm_get_frame_offsets ();
28988   mask = offsets->saved_regs_mask;
28989   if (mask & (1 << LR_REGNUM))
28990     {
28991       limit = 1024;
28992       /* Find the saved regs.  */
28993       if (frame_pointer_needed)
28994         {
28995           delta = offsets->soft_frame - offsets->saved_args;
28996           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28997           if (TARGET_THUMB1)
28998             limit = 128;
28999         }
29000       else
29001         {
29002           delta = offsets->outgoing_args - offsets->saved_args;
29003           reg = SP_REGNUM;
29004         }
29005       /* Allow for the stack frame.  */
29006       if (TARGET_THUMB1 && TARGET_BACKTRACE)
29007         delta -= 16;
29008       /* The link register is always the first saved register.  */
29009       delta -= 4;
29010
29011       /* Construct the address.  */
29012       addr = gen_rtx_REG (SImode, reg);
29013       if (delta > limit)
29014         {
29015           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29016           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29017           addr = scratch;
29018         }
29019       else
29020         addr = plus_constant (Pmode, addr, delta);
29021
29022       emit_move_insn (gen_frame_mem (Pmode, addr), source);
29023     }
29024   else
29025     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29026 }
29027
29028 /* Implements target hook vector_mode_supported_p.  */
29029 bool
29030 arm_vector_mode_supported_p (machine_mode mode)
29031 {
29032   /* Neon also supports V2SImode, etc. listed in the clause below.  */
29033   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29034       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
29035     return true;
29036
29037   if ((TARGET_NEON || TARGET_IWMMXT)
29038       && ((mode == V2SImode)
29039           || (mode == V4HImode)
29040           || (mode == V8QImode)))
29041     return true;
29042
29043   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29044       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29045       || mode == V2HAmode))
29046     return true;
29047
29048   return false;
29049 }
29050
29051 /* Implements target hook array_mode_supported_p.  */
29052
29053 static bool
29054 arm_array_mode_supported_p (machine_mode mode,
29055                             unsigned HOST_WIDE_INT nelems)
29056 {
29057   if (TARGET_NEON
29058       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29059       && (nelems >= 2 && nelems <= 4))
29060     return true;
29061
29062   return false;
29063 }
29064
29065 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29066    registers when autovectorizing for Neon, at least until multiple vector
29067    widths are supported properly by the middle-end.  */
29068
29069 static machine_mode
29070 arm_preferred_simd_mode (machine_mode mode)
29071 {
29072   if (TARGET_NEON)
29073     switch (mode)
29074       {
29075       case SFmode:
29076         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29077       case SImode:
29078         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29079       case HImode:
29080         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29081       case QImode:
29082         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29083       case DImode:
29084         if (!TARGET_NEON_VECTORIZE_DOUBLE)
29085           return V2DImode;
29086         break;
29087
29088       default:;
29089       }
29090
29091   if (TARGET_REALLY_IWMMXT)
29092     switch (mode)
29093       {
29094       case SImode:
29095         return V2SImode;
29096       case HImode:
29097         return V4HImode;
29098       case QImode:
29099         return V8QImode;
29100
29101       default:;
29102       }
29103
29104   return word_mode;
29105 }
29106
29107 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29108
29109    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
29110    using r0-r4 for function arguments, r7 for the stack frame and don't have
29111    enough left over to do doubleword arithmetic.  For Thumb-2 all the
29112    potentially problematic instructions accept high registers so this is not
29113    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29114    that require many low registers.  */
29115 static bool
29116 arm_class_likely_spilled_p (reg_class_t rclass)
29117 {
29118   if ((TARGET_THUMB1 && rclass == LO_REGS)
29119       || rclass  == CC_REG)
29120     return true;
29121
29122   return false;
29123 }
29124
29125 /* Implements target hook small_register_classes_for_mode_p.  */
29126 bool
29127 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29128 {
29129   return TARGET_THUMB1;
29130 }
29131
29132 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29133    ARM insns and therefore guarantee that the shift count is modulo 256.
29134    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29135    guarantee no particular behavior for out-of-range counts.  */
29136
29137 static unsigned HOST_WIDE_INT
29138 arm_shift_truncation_mask (machine_mode mode)
29139 {
29140   return mode == SImode ? 255 : 0;
29141 }
29142
29143
29144 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29145
29146 unsigned int
29147 arm_dbx_register_number (unsigned int regno)
29148 {
29149   if (regno < 16)
29150     return regno;
29151
29152   if (IS_VFP_REGNUM (regno))
29153     {
29154       /* See comment in arm_dwarf_register_span.  */
29155       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29156         return 64 + regno - FIRST_VFP_REGNUM;
29157       else
29158         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29159     }
29160
29161   if (IS_IWMMXT_GR_REGNUM (regno))
29162     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29163
29164   if (IS_IWMMXT_REGNUM (regno))
29165     return 112 + regno - FIRST_IWMMXT_REGNUM;
29166
29167   gcc_unreachable ();
29168 }
29169
29170 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29171    GCC models tham as 64 32-bit registers, so we need to describe this to
29172    the DWARF generation code.  Other registers can use the default.  */
29173 static rtx
29174 arm_dwarf_register_span (rtx rtl)
29175 {
29176   machine_mode mode;
29177   unsigned regno;
29178   rtx parts[16];
29179   int nregs;
29180   int i;
29181
29182   regno = REGNO (rtl);
29183   if (!IS_VFP_REGNUM (regno))
29184     return NULL_RTX;
29185
29186   /* XXX FIXME: The EABI defines two VFP register ranges:
29187         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29188         256-287: D0-D31
29189      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29190      corresponding D register.  Until GDB supports this, we shall use the
29191      legacy encodings.  We also use these encodings for D0-D15 for
29192      compatibility with older debuggers.  */
29193   mode = GET_MODE (rtl);
29194   if (GET_MODE_SIZE (mode) < 8)
29195     return NULL_RTX;
29196
29197   if (VFP_REGNO_OK_FOR_SINGLE (regno))
29198     {
29199       nregs = GET_MODE_SIZE (mode) / 4;
29200       for (i = 0; i < nregs; i += 2)
29201         if (TARGET_BIG_END)
29202           {
29203             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29204             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29205           }
29206         else
29207           {
29208             parts[i] = gen_rtx_REG (SImode, regno + i);
29209             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29210           }
29211     }
29212   else
29213     {
29214       nregs = GET_MODE_SIZE (mode) / 8;
29215       for (i = 0; i < nregs; i++)
29216         parts[i] = gen_rtx_REG (DImode, regno + i);
29217     }
29218
29219   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29220 }
29221
29222 #if ARM_UNWIND_INFO
29223 /* Emit unwind directives for a store-multiple instruction or stack pointer
29224    push during alignment.
29225    These should only ever be generated by the function prologue code, so
29226    expect them to have a particular form.
29227    The store-multiple instruction sometimes pushes pc as the last register,
29228    although it should not be tracked into unwind information, or for -Os
29229    sometimes pushes some dummy registers before first register that needs
29230    to be tracked in unwind information; such dummy registers are there just
29231    to avoid separate stack adjustment, and will not be restored in the
29232    epilogue.  */
29233
29234 static void
29235 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29236 {
29237   int i;
29238   HOST_WIDE_INT offset;
29239   HOST_WIDE_INT nregs;
29240   int reg_size;
29241   unsigned reg;
29242   unsigned lastreg;
29243   unsigned padfirst = 0, padlast = 0;
29244   rtx e;
29245
29246   e = XVECEXP (p, 0, 0);
29247   gcc_assert (GET_CODE (e) == SET);
29248
29249   /* First insn will adjust the stack pointer.  */
29250   gcc_assert (GET_CODE (e) == SET
29251               && REG_P (SET_DEST (e))
29252               && REGNO (SET_DEST (e)) == SP_REGNUM
29253               && GET_CODE (SET_SRC (e)) == PLUS);
29254
29255   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29256   nregs = XVECLEN (p, 0) - 1;
29257   gcc_assert (nregs);
29258
29259   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29260   if (reg < 16)
29261     {
29262       /* For -Os dummy registers can be pushed at the beginning to
29263          avoid separate stack pointer adjustment.  */
29264       e = XVECEXP (p, 0, 1);
29265       e = XEXP (SET_DEST (e), 0);
29266       if (GET_CODE (e) == PLUS)
29267         padfirst = INTVAL (XEXP (e, 1));
29268       gcc_assert (padfirst == 0 || optimize_size);
29269       /* The function prologue may also push pc, but not annotate it as it is
29270          never restored.  We turn this into a stack pointer adjustment.  */
29271       e = XVECEXP (p, 0, nregs);
29272       e = XEXP (SET_DEST (e), 0);
29273       if (GET_CODE (e) == PLUS)
29274         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29275       else
29276         padlast = offset - 4;
29277       gcc_assert (padlast == 0 || padlast == 4);
29278       if (padlast == 4)
29279         fprintf (asm_out_file, "\t.pad #4\n");
29280       reg_size = 4;
29281       fprintf (asm_out_file, "\t.save {");
29282     }
29283   else if (IS_VFP_REGNUM (reg))
29284     {
29285       reg_size = 8;
29286       fprintf (asm_out_file, "\t.vsave {");
29287     }
29288   else
29289     /* Unknown register type.  */
29290     gcc_unreachable ();
29291
29292   /* If the stack increment doesn't match the size of the saved registers,
29293      something has gone horribly wrong.  */
29294   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29295
29296   offset = padfirst;
29297   lastreg = 0;
29298   /* The remaining insns will describe the stores.  */
29299   for (i = 1; i <= nregs; i++)
29300     {
29301       /* Expect (set (mem <addr>) (reg)).
29302          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29303       e = XVECEXP (p, 0, i);
29304       gcc_assert (GET_CODE (e) == SET
29305                   && MEM_P (SET_DEST (e))
29306                   && REG_P (SET_SRC (e)));
29307
29308       reg = REGNO (SET_SRC (e));
29309       gcc_assert (reg >= lastreg);
29310
29311       if (i != 1)
29312         fprintf (asm_out_file, ", ");
29313       /* We can't use %r for vfp because we need to use the
29314          double precision register names.  */
29315       if (IS_VFP_REGNUM (reg))
29316         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29317       else
29318         asm_fprintf (asm_out_file, "%r", reg);
29319
29320 #ifdef ENABLE_CHECKING
29321       /* Check that the addresses are consecutive.  */
29322       e = XEXP (SET_DEST (e), 0);
29323       if (GET_CODE (e) == PLUS)
29324         gcc_assert (REG_P (XEXP (e, 0))
29325                     && REGNO (XEXP (e, 0)) == SP_REGNUM
29326                     && CONST_INT_P (XEXP (e, 1))
29327                     && offset == INTVAL (XEXP (e, 1)));
29328       else
29329         gcc_assert (i == 1
29330                     && REG_P (e)
29331                     && REGNO (e) == SP_REGNUM);
29332       offset += reg_size;
29333 #endif
29334     }
29335   fprintf (asm_out_file, "}\n");
29336   if (padfirst)
29337     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29338 }
29339
29340 /*  Emit unwind directives for a SET.  */
29341
29342 static void
29343 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29344 {
29345   rtx e0;
29346   rtx e1;
29347   unsigned reg;
29348
29349   e0 = XEXP (p, 0);
29350   e1 = XEXP (p, 1);
29351   switch (GET_CODE (e0))
29352     {
29353     case MEM:
29354       /* Pushing a single register.  */
29355       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29356           || !REG_P (XEXP (XEXP (e0, 0), 0))
29357           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29358         abort ();
29359
29360       asm_fprintf (asm_out_file, "\t.save ");
29361       if (IS_VFP_REGNUM (REGNO (e1)))
29362         asm_fprintf(asm_out_file, "{d%d}\n",
29363                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29364       else
29365         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29366       break;
29367
29368     case REG:
29369       if (REGNO (e0) == SP_REGNUM)
29370         {
29371           /* A stack increment.  */
29372           if (GET_CODE (e1) != PLUS
29373               || !REG_P (XEXP (e1, 0))
29374               || REGNO (XEXP (e1, 0)) != SP_REGNUM
29375               || !CONST_INT_P (XEXP (e1, 1)))
29376             abort ();
29377
29378           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29379                        -INTVAL (XEXP (e1, 1)));
29380         }
29381       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29382         {
29383           HOST_WIDE_INT offset;
29384
29385           if (GET_CODE (e1) == PLUS)
29386             {
29387               if (!REG_P (XEXP (e1, 0))
29388                   || !CONST_INT_P (XEXP (e1, 1)))
29389                 abort ();
29390               reg = REGNO (XEXP (e1, 0));
29391               offset = INTVAL (XEXP (e1, 1));
29392               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29393                            HARD_FRAME_POINTER_REGNUM, reg,
29394                            offset);
29395             }
29396           else if (REG_P (e1))
29397             {
29398               reg = REGNO (e1);
29399               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29400                            HARD_FRAME_POINTER_REGNUM, reg);
29401             }
29402           else
29403             abort ();
29404         }
29405       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29406         {
29407           /* Move from sp to reg.  */
29408           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29409         }
29410      else if (GET_CODE (e1) == PLUS
29411               && REG_P (XEXP (e1, 0))
29412               && REGNO (XEXP (e1, 0)) == SP_REGNUM
29413               && CONST_INT_P (XEXP (e1, 1)))
29414         {
29415           /* Set reg to offset from sp.  */
29416           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29417                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29418         }
29419       else
29420         abort ();
29421       break;
29422
29423     default:
29424       abort ();
29425     }
29426 }
29427
29428
29429 /* Emit unwind directives for the given insn.  */
29430
29431 static void
29432 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29433 {
29434   rtx note, pat;
29435   bool handled_one = false;
29436
29437   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29438     return;
29439
29440   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29441       && (TREE_NOTHROW (current_function_decl)
29442           || crtl->all_throwers_are_sibcalls))
29443     return;
29444
29445   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29446     return;
29447
29448   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29449     {
29450       switch (REG_NOTE_KIND (note))
29451         {
29452         case REG_FRAME_RELATED_EXPR:
29453           pat = XEXP (note, 0);
29454           goto found;
29455
29456         case REG_CFA_REGISTER:
29457           pat = XEXP (note, 0);
29458           if (pat == NULL)
29459             {
29460               pat = PATTERN (insn);
29461               if (GET_CODE (pat) == PARALLEL)
29462                 pat = XVECEXP (pat, 0, 0);
29463             }
29464
29465           /* Only emitted for IS_STACKALIGN re-alignment.  */
29466           {
29467             rtx dest, src;
29468             unsigned reg;
29469
29470             src = SET_SRC (pat);
29471             dest = SET_DEST (pat);
29472
29473             gcc_assert (src == stack_pointer_rtx);
29474             reg = REGNO (dest);
29475             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29476                          reg + 0x90, reg);
29477           }
29478           handled_one = true;
29479           break;
29480
29481         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
29482            to get correct dwarf information for shrink-wrap.  We should not
29483            emit unwind information for it because these are used either for
29484            pretend arguments or notes to adjust sp and restore registers from
29485            stack.  */
29486         case REG_CFA_DEF_CFA:
29487         case REG_CFA_ADJUST_CFA:
29488         case REG_CFA_RESTORE:
29489           return;
29490
29491         case REG_CFA_EXPRESSION:
29492         case REG_CFA_OFFSET:
29493           /* ??? Only handling here what we actually emit.  */
29494           gcc_unreachable ();
29495
29496         default:
29497           break;
29498         }
29499     }
29500   if (handled_one)
29501     return;
29502   pat = PATTERN (insn);
29503  found:
29504
29505   switch (GET_CODE (pat))
29506     {
29507     case SET:
29508       arm_unwind_emit_set (asm_out_file, pat);
29509       break;
29510
29511     case SEQUENCE:
29512       /* Store multiple.  */
29513       arm_unwind_emit_sequence (asm_out_file, pat);
29514       break;
29515
29516     default:
29517       abort();
29518     }
29519 }
29520
29521
29522 /* Output a reference from a function exception table to the type_info
29523    object X.  The EABI specifies that the symbol should be relocated by
29524    an R_ARM_TARGET2 relocation.  */
29525
29526 static bool
29527 arm_output_ttype (rtx x)
29528 {
29529   fputs ("\t.word\t", asm_out_file);
29530   output_addr_const (asm_out_file, x);
29531   /* Use special relocations for symbol references.  */
29532   if (!CONST_INT_P (x))
29533     fputs ("(TARGET2)", asm_out_file);
29534   fputc ('\n', asm_out_file);
29535
29536   return TRUE;
29537 }
29538
29539 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
29540
29541 static void
29542 arm_asm_emit_except_personality (rtx personality)
29543 {
29544   fputs ("\t.personality\t", asm_out_file);
29545   output_addr_const (asm_out_file, personality);
29546   fputc ('\n', asm_out_file);
29547 }
29548
29549 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
29550
29551 static void
29552 arm_asm_init_sections (void)
29553 {
29554   exception_section = get_unnamed_section (0, output_section_asm_op,
29555                                            "\t.handlerdata");
29556 }
29557 #endif /* ARM_UNWIND_INFO */
29558
29559 /* Output unwind directives for the start/end of a function.  */
29560
29561 void
29562 arm_output_fn_unwind (FILE * f, bool prologue)
29563 {
29564   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29565     return;
29566
29567   if (prologue)
29568     fputs ("\t.fnstart\n", f);
29569   else
29570     {
29571       /* If this function will never be unwound, then mark it as such.
29572          The came condition is used in arm_unwind_emit to suppress
29573          the frame annotations.  */
29574       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29575           && (TREE_NOTHROW (current_function_decl)
29576               || crtl->all_throwers_are_sibcalls))
29577         fputs("\t.cantunwind\n", f);
29578
29579       fputs ("\t.fnend\n", f);
29580     }
29581 }
29582
29583 static bool
29584 arm_emit_tls_decoration (FILE *fp, rtx x)
29585 {
29586   enum tls_reloc reloc;
29587   rtx val;
29588
29589   val = XVECEXP (x, 0, 0);
29590   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29591
29592   output_addr_const (fp, val);
29593
29594   switch (reloc)
29595     {
29596     case TLS_GD32:
29597       fputs ("(tlsgd)", fp);
29598       break;
29599     case TLS_LDM32:
29600       fputs ("(tlsldm)", fp);
29601       break;
29602     case TLS_LDO32:
29603       fputs ("(tlsldo)", fp);
29604       break;
29605     case TLS_IE32:
29606       fputs ("(gottpoff)", fp);
29607       break;
29608     case TLS_LE32:
29609       fputs ("(tpoff)", fp);
29610       break;
29611     case TLS_DESCSEQ:
29612       fputs ("(tlsdesc)", fp);
29613       break;
29614     default:
29615       gcc_unreachable ();
29616     }
29617
29618   switch (reloc)
29619     {
29620     case TLS_GD32:
29621     case TLS_LDM32:
29622     case TLS_IE32:
29623     case TLS_DESCSEQ:
29624       fputs (" + (. - ", fp);
29625       output_addr_const (fp, XVECEXP (x, 0, 2));
29626       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29627       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29628       output_addr_const (fp, XVECEXP (x, 0, 3));
29629       fputc (')', fp);
29630       break;
29631     default:
29632       break;
29633     }
29634
29635   return TRUE;
29636 }
29637
29638 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
29639
29640 static void
29641 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29642 {
29643   gcc_assert (size == 4);
29644   fputs ("\t.word\t", file);
29645   output_addr_const (file, x);
29646   fputs ("(tlsldo)", file);
29647 }
29648
29649 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
29650
29651 static bool
29652 arm_output_addr_const_extra (FILE *fp, rtx x)
29653 {
29654   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29655     return arm_emit_tls_decoration (fp, x);
29656   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29657     {
29658       char label[256];
29659       int labelno = INTVAL (XVECEXP (x, 0, 0));
29660
29661       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29662       assemble_name_raw (fp, label);
29663
29664       return TRUE;
29665     }
29666   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29667     {
29668       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29669       if (GOT_PCREL)
29670         fputs ("+.", fp);
29671       fputs ("-(", fp);
29672       output_addr_const (fp, XVECEXP (x, 0, 0));
29673       fputc (')', fp);
29674       return TRUE;
29675     }
29676   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29677     {
29678       output_addr_const (fp, XVECEXP (x, 0, 0));
29679       if (GOT_PCREL)
29680         fputs ("+.", fp);
29681       fputs ("-(", fp);
29682       output_addr_const (fp, XVECEXP (x, 0, 1));
29683       fputc (')', fp);
29684       return TRUE;
29685     }
29686   else if (GET_CODE (x) == CONST_VECTOR)
29687     return arm_emit_vector_const (fp, x);
29688
29689   return FALSE;
29690 }
29691
29692 /* Output assembly for a shift instruction.
29693    SET_FLAGS determines how the instruction modifies the condition codes.
29694    0 - Do not set condition codes.
29695    1 - Set condition codes.
29696    2 - Use smallest instruction.  */
29697 const char *
29698 arm_output_shift(rtx * operands, int set_flags)
29699 {
29700   char pattern[100];
29701   static const char flag_chars[3] = {'?', '.', '!'};
29702   const char *shift;
29703   HOST_WIDE_INT val;
29704   char c;
29705
29706   c = flag_chars[set_flags];
29707   if (TARGET_UNIFIED_ASM)
29708     {
29709       shift = shift_op(operands[3], &val);
29710       if (shift)
29711         {
29712           if (val != -1)
29713             operands[2] = GEN_INT(val);
29714           sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29715         }
29716       else
29717         sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29718     }
29719   else
29720     sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29721   output_asm_insn (pattern, operands);
29722   return "";
29723 }
29724
29725 /* Output assembly for a WMMX immediate shift instruction.  */
29726 const char *
29727 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29728 {
29729   int shift = INTVAL (operands[2]);
29730   char templ[50];
29731   machine_mode opmode = GET_MODE (operands[0]);
29732
29733   gcc_assert (shift >= 0);
29734
29735   /* If the shift value in the register versions is > 63 (for D qualifier),
29736      31 (for W qualifier) or 15 (for H qualifier).  */
29737   if (((opmode == V4HImode) && (shift > 15))
29738         || ((opmode == V2SImode) && (shift > 31))
29739         || ((opmode == DImode) && (shift > 63)))
29740   {
29741     if (wror_or_wsra)
29742       {
29743         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29744         output_asm_insn (templ, operands);
29745         if (opmode == DImode)
29746           {
29747             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29748             output_asm_insn (templ, operands);
29749           }
29750       }
29751     else
29752       {
29753         /* The destination register will contain all zeros.  */
29754         sprintf (templ, "wzero\t%%0");
29755         output_asm_insn (templ, operands);
29756       }
29757     return "";
29758   }
29759
29760   if ((opmode == DImode) && (shift > 32))
29761     {
29762       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29763       output_asm_insn (templ, operands);
29764       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29765       output_asm_insn (templ, operands);
29766     }
29767   else
29768     {
29769       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29770       output_asm_insn (templ, operands);
29771     }
29772   return "";
29773 }
29774
29775 /* Output assembly for a WMMX tinsr instruction.  */
29776 const char *
29777 arm_output_iwmmxt_tinsr (rtx *operands)
29778 {
29779   int mask = INTVAL (operands[3]);
29780   int i;
29781   char templ[50];
29782   int units = mode_nunits[GET_MODE (operands[0])];
29783   gcc_assert ((mask & (mask - 1)) == 0);
29784   for (i = 0; i < units; ++i)
29785     {
29786       if ((mask & 0x01) == 1)
29787         {
29788           break;
29789         }
29790       mask >>= 1;
29791     }
29792   gcc_assert (i < units);
29793   {
29794     switch (GET_MODE (operands[0]))
29795       {
29796       case V8QImode:
29797         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29798         break;
29799       case V4HImode:
29800         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29801         break;
29802       case V2SImode:
29803         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29804         break;
29805       default:
29806         gcc_unreachable ();
29807         break;
29808       }
29809     output_asm_insn (templ, operands);
29810   }
29811   return "";
29812 }
29813
29814 /* Output a Thumb-1 casesi dispatch sequence.  */
29815 const char *
29816 thumb1_output_casesi (rtx *operands)
29817 {
29818   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29819
29820   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29821
29822   switch (GET_MODE(diff_vec))
29823     {
29824     case QImode:
29825       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29826               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29827     case HImode:
29828       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29829               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29830     case SImode:
29831       return "bl\t%___gnu_thumb1_case_si";
29832     default:
29833       gcc_unreachable ();
29834     }
29835 }
29836
29837 /* Output a Thumb-2 casesi instruction.  */
29838 const char *
29839 thumb2_output_casesi (rtx *operands)
29840 {
29841   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
29842
29843   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29844
29845   output_asm_insn ("cmp\t%0, %1", operands);
29846   output_asm_insn ("bhi\t%l3", operands);
29847   switch (GET_MODE(diff_vec))
29848     {
29849     case QImode:
29850       return "tbb\t[%|pc, %0]";
29851     case HImode:
29852       return "tbh\t[%|pc, %0, lsl #1]";
29853     case SImode:
29854       if (flag_pic)
29855         {
29856           output_asm_insn ("adr\t%4, %l2", operands);
29857           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29858           output_asm_insn ("add\t%4, %4, %5", operands);
29859           return "bx\t%4";
29860         }
29861       else
29862         {
29863           output_asm_insn ("adr\t%4, %l2", operands);
29864           return "ldr\t%|pc, [%4, %0, lsl #2]";
29865         }
29866     default:
29867       gcc_unreachable ();
29868     }
29869 }
29870
29871 /* Most ARM cores are single issue, but some newer ones can dual issue.
29872    The scheduler descriptions rely on this being correct.  */
29873 static int
29874 arm_issue_rate (void)
29875 {
29876   switch (arm_tune)
29877     {
29878     case cortexa15:
29879     case cortexa57:
29880       return 3;
29881
29882     case cortexr4:
29883     case cortexr4f:
29884     case cortexr5:
29885     case genericv7a:
29886     case cortexa5:
29887     case cortexa7:
29888     case cortexa8:
29889     case cortexa9:
29890     case cortexa12:
29891     case cortexa53:
29892     case fa726te:
29893     case marvell_pj4:
29894       return 2;
29895
29896     default:
29897       return 1;
29898     }
29899 }
29900
29901 /* A table and a function to perform ARM-specific name mangling for
29902    NEON vector types in order to conform to the AAPCS (see "Procedure
29903    Call Standard for the ARM Architecture", Appendix A).  To qualify
29904    for emission with the mangled names defined in that document, a
29905    vector type must not only be of the correct mode but also be
29906    composed of NEON vector element types (e.g. __builtin_neon_qi).  */
29907 typedef struct
29908 {
29909   machine_mode mode;
29910   const char *element_type_name;
29911   const char *aapcs_name;
29912 } arm_mangle_map_entry;
29913
29914 static arm_mangle_map_entry arm_mangle_map[] = {
29915   /* 64-bit containerized types.  */
29916   { V8QImode,  "__builtin_neon_qi",     "15__simd64_int8_t" },
29917   { V8QImode,  "__builtin_neon_uqi",    "16__simd64_uint8_t" },
29918   { V4HImode,  "__builtin_neon_hi",     "16__simd64_int16_t" },
29919   { V4HImode,  "__builtin_neon_uhi",    "17__simd64_uint16_t" },
29920   { V4HFmode,  "__builtin_neon_hf",     "18__simd64_float16_t" },
29921   { V2SImode,  "__builtin_neon_si",     "16__simd64_int32_t" },
29922   { V2SImode,  "__builtin_neon_usi",    "17__simd64_uint32_t" },
29923   { V2SFmode,  "__builtin_neon_sf",     "18__simd64_float32_t" },
29924   { V8QImode,  "__builtin_neon_poly8",  "16__simd64_poly8_t" },
29925   { V4HImode,  "__builtin_neon_poly16", "17__simd64_poly16_t" },
29926
29927   /* 128-bit containerized types.  */
29928   { V16QImode, "__builtin_neon_qi",     "16__simd128_int8_t" },
29929   { V16QImode, "__builtin_neon_uqi",    "17__simd128_uint8_t" },
29930   { V8HImode,  "__builtin_neon_hi",     "17__simd128_int16_t" },
29931   { V8HImode,  "__builtin_neon_uhi",    "18__simd128_uint16_t" },
29932   { V4SImode,  "__builtin_neon_si",     "17__simd128_int32_t" },
29933   { V4SImode,  "__builtin_neon_usi",    "18__simd128_uint32_t" },
29934   { V4SFmode,  "__builtin_neon_sf",     "19__simd128_float32_t" },
29935   { V16QImode, "__builtin_neon_poly8",  "17__simd128_poly8_t" },
29936   { V8HImode,  "__builtin_neon_poly16", "18__simd128_poly16_t" },
29937   { VOIDmode, NULL, NULL }
29938 };
29939
29940 const char *
29941 arm_mangle_type (const_tree type)
29942 {
29943   arm_mangle_map_entry *pos = arm_mangle_map;
29944
29945   /* The ARM ABI documents (10th October 2008) say that "__va_list"
29946      has to be managled as if it is in the "std" namespace.  */
29947   if (TARGET_AAPCS_BASED
29948       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29949     return "St9__va_list";
29950
29951   /* Half-precision float.  */
29952   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29953     return "Dh";
29954
29955   if (TREE_CODE (type) != VECTOR_TYPE)
29956     return NULL;
29957
29958   /* Check the mode of the vector type, and the name of the vector
29959      element type, against the table.  */
29960   while (pos->mode != VOIDmode)
29961     {
29962       tree elt_type = TREE_TYPE (type);
29963
29964       if (pos->mode == TYPE_MODE (type)
29965           && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29966           && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29967                       pos->element_type_name))
29968         return pos->aapcs_name;
29969
29970       pos++;
29971     }
29972
29973   /* Use the default mangling for unrecognized (possibly user-defined)
29974      vector types.  */
29975   return NULL;
29976 }
29977
29978 /* Order of allocation of core registers for Thumb: this allocation is
29979    written over the corresponding initial entries of the array
29980    initialized with REG_ALLOC_ORDER.  We allocate all low registers
29981    first.  Saving and restoring a low register is usually cheaper than
29982    using a call-clobbered high register.  */
29983
29984 static const int thumb_core_reg_alloc_order[] =
29985 {
29986    3,  2,  1,  0,  4,  5,  6,  7,
29987   14, 12,  8,  9, 10, 11
29988 };
29989
29990 /* Adjust register allocation order when compiling for Thumb.  */
29991
29992 void
29993 arm_order_regs_for_local_alloc (void)
29994 {
29995   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29996   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29997   if (TARGET_THUMB)
29998     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29999             sizeof (thumb_core_reg_alloc_order));
30000 }
30001
30002 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
30003
30004 bool
30005 arm_frame_pointer_required (void)
30006 {
30007   return (cfun->has_nonlocal_label
30008           || SUBTARGET_FRAME_POINTER_REQUIRED
30009           || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
30010 }
30011
30012 /* Only thumb1 can't support conditional execution, so return true if
30013    the target is not thumb1.  */
30014 static bool
30015 arm_have_conditional_execution (void)
30016 {
30017   return !TARGET_THUMB1;
30018 }
30019
30020 tree
30021 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
30022 {
30023   machine_mode in_mode, out_mode;
30024   int in_n, out_n;
30025   bool out_unsigned_p = TYPE_UNSIGNED (type_out);
30026
30027   if (TREE_CODE (type_out) != VECTOR_TYPE
30028       || TREE_CODE (type_in) != VECTOR_TYPE)
30029     return NULL_TREE;
30030
30031   out_mode = TYPE_MODE (TREE_TYPE (type_out));
30032   out_n = TYPE_VECTOR_SUBPARTS (type_out);
30033   in_mode = TYPE_MODE (TREE_TYPE (type_in));
30034   in_n = TYPE_VECTOR_SUBPARTS (type_in);
30035
30036 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
30037    decl of the vectorized builtin for the appropriate vector mode.
30038    NULL_TREE is returned if no such builtin is available.  */
30039 #undef ARM_CHECK_BUILTIN_MODE
30040 #define ARM_CHECK_BUILTIN_MODE(C)    \
30041   (TARGET_NEON && TARGET_FPU_ARMV8   \
30042    && flag_unsafe_math_optimizations \
30043    && ARM_CHECK_BUILTIN_MODE_1 (C))
30044
30045 #undef ARM_CHECK_BUILTIN_MODE_1
30046 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30047   (out_mode == SFmode && out_n == C \
30048    && in_mode == SFmode && in_n == C)
30049
30050 #undef ARM_FIND_VRINT_VARIANT
30051 #define ARM_FIND_VRINT_VARIANT(N) \
30052   (ARM_CHECK_BUILTIN_MODE (2) \
30053     ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
30054     : (ARM_CHECK_BUILTIN_MODE (4) \
30055       ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
30056       : NULL_TREE))
30057
30058   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
30059     {
30060       enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
30061       switch (fn)
30062         {
30063           case BUILT_IN_FLOORF:
30064             return ARM_FIND_VRINT_VARIANT (vrintm);
30065           case BUILT_IN_CEILF:
30066             return ARM_FIND_VRINT_VARIANT (vrintp);
30067           case BUILT_IN_TRUNCF:
30068             return ARM_FIND_VRINT_VARIANT (vrintz);
30069           case BUILT_IN_ROUNDF:
30070             return ARM_FIND_VRINT_VARIANT (vrinta);
30071 #undef ARM_CHECK_BUILTIN_MODE_1
30072 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30073   (out_mode == SImode && out_n == C \
30074    && in_mode == SFmode && in_n == C)
30075
30076 #define ARM_FIND_VCVT_VARIANT(N) \
30077   (ARM_CHECK_BUILTIN_MODE (2) \
30078    ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
30079    : (ARM_CHECK_BUILTIN_MODE (4) \
30080      ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
30081      : NULL_TREE))
30082
30083 #define ARM_FIND_VCVTU_VARIANT(N) \
30084   (ARM_CHECK_BUILTIN_MODE (2) \
30085    ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
30086    : (ARM_CHECK_BUILTIN_MODE (4) \
30087      ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
30088      : NULL_TREE))
30089           case BUILT_IN_LROUNDF:
30090             return out_unsigned_p
30091                      ? ARM_FIND_VCVTU_VARIANT (vcvta)
30092                      : ARM_FIND_VCVT_VARIANT (vcvta);
30093           case BUILT_IN_LCEILF:
30094             return out_unsigned_p
30095                      ? ARM_FIND_VCVTU_VARIANT (vcvtp)
30096                      : ARM_FIND_VCVT_VARIANT (vcvtp);
30097           case BUILT_IN_LFLOORF:
30098             return out_unsigned_p
30099                      ? ARM_FIND_VCVTU_VARIANT (vcvtm)
30100                      : ARM_FIND_VCVT_VARIANT (vcvtm);
30101 #undef ARM_CHECK_BUILTIN_MODE
30102 #define ARM_CHECK_BUILTIN_MODE(C, N) \
30103   (out_mode == N##mode && out_n == C \
30104    && in_mode == N##mode && in_n == C)
30105           case BUILT_IN_BSWAP16:
30106             if (ARM_CHECK_BUILTIN_MODE (4, HI))
30107               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
30108             else if (ARM_CHECK_BUILTIN_MODE (8, HI))
30109               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
30110             else
30111               return NULL_TREE;
30112           case BUILT_IN_BSWAP32:
30113             if (ARM_CHECK_BUILTIN_MODE (2, SI))
30114               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
30115             else if (ARM_CHECK_BUILTIN_MODE (4, SI))
30116               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
30117             else
30118               return NULL_TREE;
30119           case BUILT_IN_BSWAP64:
30120             if (ARM_CHECK_BUILTIN_MODE (2, DI))
30121               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
30122             else
30123               return NULL_TREE;
30124           case BUILT_IN_COPYSIGNF:
30125             if (ARM_CHECK_BUILTIN_MODE (2, SF))
30126               return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
30127             else if (ARM_CHECK_BUILTIN_MODE (4, SF))
30128               return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
30129             else
30130               return NULL_TREE;
30131
30132           default:
30133             return NULL_TREE;
30134         }
30135     }
30136   return NULL_TREE;
30137 }
30138 #undef ARM_FIND_VCVT_VARIANT
30139 #undef ARM_FIND_VCVTU_VARIANT
30140 #undef ARM_CHECK_BUILTIN_MODE
30141 #undef ARM_FIND_VRINT_VARIANT
30142
30143
30144 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
30145 static HOST_WIDE_INT
30146 arm_vector_alignment (const_tree type)
30147 {
30148   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30149
30150   if (TARGET_AAPCS_BASED)
30151     align = MIN (align, 64);
30152
30153   return align;
30154 }
30155
30156 static unsigned int
30157 arm_autovectorize_vector_sizes (void)
30158 {
30159   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
30160 }
30161
30162 static bool
30163 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30164 {
30165   /* Vectors which aren't in packed structures will not be less aligned than
30166      the natural alignment of their element type, so this is safe.  */
30167   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30168     return !is_packed;
30169
30170   return default_builtin_vector_alignment_reachable (type, is_packed);
30171 }
30172
30173 static bool
30174 arm_builtin_support_vector_misalignment (machine_mode mode,
30175                                          const_tree type, int misalignment,
30176                                          bool is_packed)
30177 {
30178   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30179     {
30180       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30181
30182       if (is_packed)
30183         return align == 1;
30184
30185       /* If the misalignment is unknown, we should be able to handle the access
30186          so long as it is not to a member of a packed data structure.  */
30187       if (misalignment == -1)
30188         return true;
30189
30190       /* Return true if the misalignment is a multiple of the natural alignment
30191          of the vector's element type.  This is probably always going to be
30192          true in practice, since we've already established that this isn't a
30193          packed access.  */
30194       return ((misalignment % align) == 0);
30195     }
30196
30197   return default_builtin_support_vector_misalignment (mode, type, misalignment,
30198                                                       is_packed);
30199 }
30200
30201 static void
30202 arm_conditional_register_usage (void)
30203 {
30204   int regno;
30205
30206   if (TARGET_THUMB1 && optimize_size)
30207     {
30208       /* When optimizing for size on Thumb-1, it's better not
30209         to use the HI regs, because of the overhead of
30210         stacking them.  */
30211       for (regno = FIRST_HI_REGNUM;
30212            regno <= LAST_HI_REGNUM; ++regno)
30213         fixed_regs[regno] = call_used_regs[regno] = 1;
30214     }
30215
30216   /* The link register can be clobbered by any branch insn,
30217      but we have no way to track that at present, so mark
30218      it as unavailable.  */
30219   if (TARGET_THUMB1)
30220     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30221
30222   if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
30223     {
30224       /* VFPv3 registers are disabled when earlier VFP
30225          versions are selected due to the definition of
30226          LAST_VFP_REGNUM.  */
30227       for (regno = FIRST_VFP_REGNUM;
30228            regno <= LAST_VFP_REGNUM; ++ regno)
30229         {
30230           fixed_regs[regno] = 0;
30231           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30232             || regno >= FIRST_VFP_REGNUM + 32;
30233         }
30234     }
30235
30236   if (TARGET_REALLY_IWMMXT)
30237     {
30238       regno = FIRST_IWMMXT_GR_REGNUM;
30239       /* The 2002/10/09 revision of the XScale ABI has wCG0
30240          and wCG1 as call-preserved registers.  The 2002/11/21
30241          revision changed this so that all wCG registers are
30242          scratch registers.  */
30243       for (regno = FIRST_IWMMXT_GR_REGNUM;
30244            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30245         fixed_regs[regno] = 0;
30246       /* The XScale ABI has wR0 - wR9 as scratch registers,
30247          the rest as call-preserved registers.  */
30248       for (regno = FIRST_IWMMXT_REGNUM;
30249            regno <= LAST_IWMMXT_REGNUM; ++ regno)
30250         {
30251           fixed_regs[regno] = 0;
30252           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30253         }
30254     }
30255
30256   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30257     {
30258       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30259       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30260     }
30261   else if (TARGET_APCS_STACK)
30262     {
30263       fixed_regs[10]     = 1;
30264       call_used_regs[10] = 1;
30265     }
30266   /* -mcaller-super-interworking reserves r11 for calls to
30267      _interwork_r11_call_via_rN().  Making the register global
30268      is an easy way of ensuring that it remains valid for all
30269      calls.  */
30270   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30271       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30272     {
30273       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30274       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30275       if (TARGET_CALLER_INTERWORKING)
30276         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30277     }
30278   SUBTARGET_CONDITIONAL_REGISTER_USAGE
30279 }
30280
30281 static reg_class_t
30282 arm_preferred_rename_class (reg_class_t rclass)
30283 {
30284   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30285      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
30286      and code size can be reduced.  */
30287   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30288     return LO_REGS;
30289   else
30290     return NO_REGS;
30291 }
30292
30293 /* Compute the atrribute "length" of insn "*push_multi".
30294    So this function MUST be kept in sync with that insn pattern.  */
30295 int
30296 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30297 {
30298   int i, regno, hi_reg;
30299   int num_saves = XVECLEN (parallel_op, 0);
30300
30301   /* ARM mode.  */
30302   if (TARGET_ARM)
30303     return 4;
30304   /* Thumb1 mode.  */
30305   if (TARGET_THUMB1)
30306     return 2;
30307
30308   /* Thumb2 mode.  */
30309   regno = REGNO (first_op);
30310   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30311   for (i = 1; i < num_saves && !hi_reg; i++)
30312     {
30313       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30314       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30315     }
30316
30317   if (!hi_reg)
30318     return 2;
30319   return 4;
30320 }
30321
30322 /* Compute the number of instructions emitted by output_move_double.  */
30323 int
30324 arm_count_output_move_double_insns (rtx *operands)
30325 {
30326   int count;
30327   rtx ops[2];
30328   /* output_move_double may modify the operands array, so call it
30329      here on a copy of the array.  */
30330   ops[0] = operands[0];
30331   ops[1] = operands[1];
30332   output_move_double (ops, false, &count);
30333   return count;
30334 }
30335
30336 int
30337 vfp3_const_double_for_fract_bits (rtx operand)
30338 {
30339   REAL_VALUE_TYPE r0;
30340
30341   if (!CONST_DOUBLE_P (operand))
30342     return 0;
30343
30344   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30345   if (exact_real_inverse (DFmode, &r0))
30346     {
30347       if (exact_real_truncate (DFmode, &r0))
30348         {
30349           HOST_WIDE_INT value = real_to_integer (&r0);
30350           value = value & 0xffffffff;
30351           if ((value != 0) && ( (value & (value - 1)) == 0))
30352             return int_log2 (value);
30353         }
30354     }
30355   return 0;
30356 }
30357
30358 int
30359 vfp3_const_double_for_bits (rtx operand)
30360 {
30361   REAL_VALUE_TYPE r0;
30362
30363   if (!CONST_DOUBLE_P (operand))
30364     return 0;
30365
30366   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30367   if (exact_real_truncate (DFmode, &r0))
30368     {
30369       HOST_WIDE_INT value = real_to_integer (&r0);
30370       value = value & 0xffffffff;
30371       if ((value != 0) && ( (value & (value - 1)) == 0))
30372         return int_log2 (value);
30373     }
30374
30375   return 0;
30376 }
30377 \f
30378 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
30379
30380 static void
30381 arm_pre_atomic_barrier (enum memmodel model)
30382 {
30383   if (need_atomic_barrier_p (model, true))
30384     emit_insn (gen_memory_barrier ());
30385 }
30386
30387 static void
30388 arm_post_atomic_barrier (enum memmodel model)
30389 {
30390   if (need_atomic_barrier_p (model, false))
30391     emit_insn (gen_memory_barrier ());
30392 }
30393
30394 /* Emit the load-exclusive and store-exclusive instructions.
30395    Use acquire and release versions if necessary.  */
30396
30397 static void
30398 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30399 {
30400   rtx (*gen) (rtx, rtx);
30401
30402   if (acq)
30403     {
30404       switch (mode)
30405         {
30406         case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30407         case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30408         case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30409         case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30410         default:
30411           gcc_unreachable ();
30412         }
30413     }
30414   else
30415     {
30416       switch (mode)
30417         {
30418         case QImode: gen = gen_arm_load_exclusiveqi; break;
30419         case HImode: gen = gen_arm_load_exclusivehi; break;
30420         case SImode: gen = gen_arm_load_exclusivesi; break;
30421         case DImode: gen = gen_arm_load_exclusivedi; break;
30422         default:
30423           gcc_unreachable ();
30424         }
30425     }
30426
30427   emit_insn (gen (rval, mem));
30428 }
30429
30430 static void
30431 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30432                           rtx mem, bool rel)
30433 {
30434   rtx (*gen) (rtx, rtx, rtx);
30435
30436   if (rel)
30437     {
30438       switch (mode)
30439         {
30440         case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30441         case HImode: gen = gen_arm_store_release_exclusivehi; break;
30442         case SImode: gen = gen_arm_store_release_exclusivesi; break;
30443         case DImode: gen = gen_arm_store_release_exclusivedi; break;
30444         default:
30445           gcc_unreachable ();
30446         }
30447     }
30448   else
30449     {
30450       switch (mode)
30451         {
30452         case QImode: gen = gen_arm_store_exclusiveqi; break;
30453         case HImode: gen = gen_arm_store_exclusivehi; break;
30454         case SImode: gen = gen_arm_store_exclusivesi; break;
30455         case DImode: gen = gen_arm_store_exclusivedi; break;
30456         default:
30457           gcc_unreachable ();
30458         }
30459     }
30460
30461   emit_insn (gen (bval, rval, mem));
30462 }
30463
30464 /* Mark the previous jump instruction as unlikely.  */
30465
30466 static void
30467 emit_unlikely_jump (rtx insn)
30468 {
30469   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30470
30471   insn = emit_jump_insn (insn);
30472   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30473 }
30474
30475 /* Expand a compare and swap pattern.  */
30476
30477 void
30478 arm_expand_compare_and_swap (rtx operands[])
30479 {
30480   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30481   machine_mode mode;
30482   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30483
30484   bval = operands[0];
30485   rval = operands[1];
30486   mem = operands[2];
30487   oldval = operands[3];
30488   newval = operands[4];
30489   is_weak = operands[5];
30490   mod_s = operands[6];
30491   mod_f = operands[7];
30492   mode = GET_MODE (mem);
30493
30494   /* Normally the succ memory model must be stronger than fail, but in the
30495      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30496      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
30497
30498   if (TARGET_HAVE_LDACQ
30499       && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30500       && INTVAL (mod_s) == MEMMODEL_RELEASE)
30501     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30502
30503   switch (mode)
30504     {
30505     case QImode:
30506     case HImode:
30507       /* For narrow modes, we're going to perform the comparison in SImode,
30508          so do the zero-extension now.  */
30509       rval = gen_reg_rtx (SImode);
30510       oldval = convert_modes (SImode, mode, oldval, true);
30511       /* FALLTHRU */
30512
30513     case SImode:
30514       /* Force the value into a register if needed.  We waited until after
30515          the zero-extension above to do this properly.  */
30516       if (!arm_add_operand (oldval, SImode))
30517         oldval = force_reg (SImode, oldval);
30518       break;
30519
30520     case DImode:
30521       if (!cmpdi_operand (oldval, mode))
30522         oldval = force_reg (mode, oldval);
30523       break;
30524
30525     default:
30526       gcc_unreachable ();
30527     }
30528
30529   switch (mode)
30530     {
30531     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30532     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30533     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30534     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30535     default:
30536       gcc_unreachable ();
30537     }
30538
30539   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30540
30541   if (mode == QImode || mode == HImode)
30542     emit_move_insn (operands[1], gen_lowpart (mode, rval));
30543
30544   /* In all cases, we arrange for success to be signaled by Z set.
30545      This arrangement allows for the boolean result to be used directly
30546      in a subsequent branch, post optimization.  */
30547   x = gen_rtx_REG (CCmode, CC_REGNUM);
30548   x = gen_rtx_EQ (SImode, x, const0_rtx);
30549   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30550 }
30551
30552 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
30553    another memory store between the load-exclusive and store-exclusive can
30554    reset the monitor from Exclusive to Open state.  This means we must wait
30555    until after reload to split the pattern, lest we get a register spill in
30556    the middle of the atomic sequence.  */
30557
30558 void
30559 arm_split_compare_and_swap (rtx operands[])
30560 {
30561   rtx rval, mem, oldval, newval, scratch;
30562   machine_mode mode;
30563   enum memmodel mod_s, mod_f;
30564   bool is_weak;
30565   rtx_code_label *label1, *label2;
30566   rtx x, cond;
30567
30568   rval = operands[0];
30569   mem = operands[1];
30570   oldval = operands[2];
30571   newval = operands[3];
30572   is_weak = (operands[4] != const0_rtx);
30573   mod_s = (enum memmodel) INTVAL (operands[5]);
30574   mod_f = (enum memmodel) INTVAL (operands[6]);
30575   scratch = operands[7];
30576   mode = GET_MODE (mem);
30577
30578   bool use_acquire = TARGET_HAVE_LDACQ
30579                      && !(mod_s == MEMMODEL_RELAXED
30580                           || mod_s == MEMMODEL_CONSUME
30581                           || mod_s == MEMMODEL_RELEASE);
30582
30583   bool use_release = TARGET_HAVE_LDACQ
30584                      && !(mod_s == MEMMODEL_RELAXED
30585                           || mod_s == MEMMODEL_CONSUME
30586                           || mod_s == MEMMODEL_ACQUIRE);
30587
30588   /* Checks whether a barrier is needed and emits one accordingly.  */
30589   if (!(use_acquire || use_release))
30590     arm_pre_atomic_barrier (mod_s);
30591
30592   label1 = NULL;
30593   if (!is_weak)
30594     {
30595       label1 = gen_label_rtx ();
30596       emit_label (label1);
30597     }
30598   label2 = gen_label_rtx ();
30599
30600   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30601
30602   cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30603   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30604   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30605                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30606   emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30607
30608   arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30609
30610   /* Weak or strong, we want EQ to be true for success, so that we
30611      match the flags that we got from the compare above.  */
30612   cond = gen_rtx_REG (CCmode, CC_REGNUM);
30613   x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30614   emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30615
30616   if (!is_weak)
30617     {
30618       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30619       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30620                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30621       emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30622     }
30623
30624   if (mod_f != MEMMODEL_RELAXED)
30625     emit_label (label2);
30626
30627   /* Checks whether a barrier is needed and emits one accordingly.  */
30628   if (!(use_acquire || use_release))
30629     arm_post_atomic_barrier (mod_s);
30630
30631   if (mod_f == MEMMODEL_RELAXED)
30632     emit_label (label2);
30633 }
30634
30635 void
30636 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30637                      rtx value, rtx model_rtx, rtx cond)
30638 {
30639   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30640   machine_mode mode = GET_MODE (mem);
30641   machine_mode wmode = (mode == DImode ? DImode : SImode);
30642   rtx_code_label *label;
30643   rtx x;
30644
30645   bool use_acquire = TARGET_HAVE_LDACQ
30646                      && !(model == MEMMODEL_RELAXED
30647                           || model == MEMMODEL_CONSUME
30648                           || model == MEMMODEL_RELEASE);
30649
30650   bool use_release = TARGET_HAVE_LDACQ
30651                      && !(model == MEMMODEL_RELAXED
30652                           || model == MEMMODEL_CONSUME
30653                           || model == MEMMODEL_ACQUIRE);
30654
30655   /* Checks whether a barrier is needed and emits one accordingly.  */
30656   if (!(use_acquire || use_release))
30657     arm_pre_atomic_barrier (model);
30658
30659   label = gen_label_rtx ();
30660   emit_label (label);
30661
30662   if (new_out)
30663     new_out = gen_lowpart (wmode, new_out);
30664   if (old_out)
30665     old_out = gen_lowpart (wmode, old_out);
30666   else
30667     old_out = new_out;
30668   value = simplify_gen_subreg (wmode, value, mode, 0);
30669
30670   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30671
30672   switch (code)
30673     {
30674     case SET:
30675       new_out = value;
30676       break;
30677
30678     case NOT:
30679       x = gen_rtx_AND (wmode, old_out, value);
30680       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30681       x = gen_rtx_NOT (wmode, new_out);
30682       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30683       break;
30684
30685     case MINUS:
30686       if (CONST_INT_P (value))
30687         {
30688           value = GEN_INT (-INTVAL (value));
30689           code = PLUS;
30690         }
30691       /* FALLTHRU */
30692
30693     case PLUS:
30694       if (mode == DImode)
30695         {
30696           /* DImode plus/minus need to clobber flags.  */
30697           /* The adddi3 and subdi3 patterns are incorrectly written so that
30698              they require matching operands, even when we could easily support
30699              three operands.  Thankfully, this can be fixed up post-splitting,
30700              as the individual add+adc patterns do accept three operands and
30701              post-reload cprop can make these moves go away.  */
30702           emit_move_insn (new_out, old_out);
30703           if (code == PLUS)
30704             x = gen_adddi3 (new_out, new_out, value);
30705           else
30706             x = gen_subdi3 (new_out, new_out, value);
30707           emit_insn (x);
30708           break;
30709         }
30710       /* FALLTHRU */
30711
30712     default:
30713       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30714       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30715       break;
30716     }
30717
30718   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30719                             use_release);
30720
30721   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30722   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30723
30724   /* Checks whether a barrier is needed and emits one accordingly.  */
30725   if (!(use_acquire || use_release))
30726     arm_post_atomic_barrier (model);
30727 }
30728 \f
30729 #define MAX_VECT_LEN 16
30730
30731 struct expand_vec_perm_d
30732 {
30733   rtx target, op0, op1;
30734   unsigned char perm[MAX_VECT_LEN];
30735   machine_mode vmode;
30736   unsigned char nelt;
30737   bool one_vector_p;
30738   bool testing_p;
30739 };
30740
30741 /* Generate a variable permutation.  */
30742
30743 static void
30744 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30745 {
30746   machine_mode vmode = GET_MODE (target);
30747   bool one_vector_p = rtx_equal_p (op0, op1);
30748
30749   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30750   gcc_checking_assert (GET_MODE (op0) == vmode);
30751   gcc_checking_assert (GET_MODE (op1) == vmode);
30752   gcc_checking_assert (GET_MODE (sel) == vmode);
30753   gcc_checking_assert (TARGET_NEON);
30754
30755   if (one_vector_p)
30756     {
30757       if (vmode == V8QImode)
30758         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30759       else
30760         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30761     }
30762   else
30763     {
30764       rtx pair;
30765
30766       if (vmode == V8QImode)
30767         {
30768           pair = gen_reg_rtx (V16QImode);
30769           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30770           pair = gen_lowpart (TImode, pair);
30771           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30772         }
30773       else
30774         {
30775           pair = gen_reg_rtx (OImode);
30776           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30777           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30778         }
30779     }
30780 }
30781
30782 void
30783 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30784 {
30785   machine_mode vmode = GET_MODE (target);
30786   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30787   bool one_vector_p = rtx_equal_p (op0, op1);
30788   rtx rmask[MAX_VECT_LEN], mask;
30789
30790   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
30791      numbering of elements for big-endian, we must reverse the order.  */
30792   gcc_checking_assert (!BYTES_BIG_ENDIAN);
30793
30794   /* The VTBL instruction does not use a modulo index, so we must take care
30795      of that ourselves.  */
30796   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30797   for (i = 0; i < nelt; ++i)
30798     rmask[i] = mask;
30799   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30800   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30801
30802   arm_expand_vec_perm_1 (target, op0, op1, sel);
30803 }
30804
30805 /* Generate or test for an insn that supports a constant permutation.  */
30806
30807 /* Recognize patterns for the VUZP insns.  */
30808
30809 static bool
30810 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30811 {
30812   unsigned int i, odd, mask, nelt = d->nelt;
30813   rtx out0, out1, in0, in1, x;
30814   rtx (*gen)(rtx, rtx, rtx, rtx);
30815
30816   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30817     return false;
30818
30819   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
30820   if (d->perm[0] == 0)
30821     odd = 0;
30822   else if (d->perm[0] == 1)
30823     odd = 1;
30824   else
30825     return false;
30826   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30827
30828   for (i = 0; i < nelt; i++)
30829     {
30830       unsigned elt = (i * 2 + odd) & mask;
30831       if (d->perm[i] != elt)
30832         return false;
30833     }
30834
30835   /* Success!  */
30836   if (d->testing_p)
30837     return true;
30838
30839   switch (d->vmode)
30840     {
30841     case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30842     case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
30843     case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
30844     case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
30845     case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
30846     case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
30847     case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
30848     case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
30849     default:
30850       gcc_unreachable ();
30851     }
30852
30853   in0 = d->op0;
30854   in1 = d->op1;
30855   if (BYTES_BIG_ENDIAN)
30856     {
30857       x = in0, in0 = in1, in1 = x;
30858       odd = !odd;
30859     }
30860
30861   out0 = d->target;
30862   out1 = gen_reg_rtx (d->vmode);
30863   if (odd)
30864     x = out0, out0 = out1, out1 = x;
30865
30866   emit_insn (gen (out0, in0, in1, out1));
30867   return true;
30868 }
30869
30870 /* Recognize patterns for the VZIP insns.  */
30871
30872 static bool
30873 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30874 {
30875   unsigned int i, high, mask, nelt = d->nelt;
30876   rtx out0, out1, in0, in1, x;
30877   rtx (*gen)(rtx, rtx, rtx, rtx);
30878
30879   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30880     return false;
30881
30882   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
30883   high = nelt / 2;
30884   if (d->perm[0] == high)
30885     ;
30886   else if (d->perm[0] == 0)
30887     high = 0;
30888   else
30889     return false;
30890   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30891
30892   for (i = 0; i < nelt / 2; i++)
30893     {
30894       unsigned elt = (i + high) & mask;
30895       if (d->perm[i * 2] != elt)
30896         return false;
30897       elt = (elt + nelt) & mask;
30898       if (d->perm[i * 2 + 1] != elt)
30899         return false;
30900     }
30901
30902   /* Success!  */
30903   if (d->testing_p)
30904     return true;
30905
30906   switch (d->vmode)
30907     {
30908     case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30909     case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
30910     case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
30911     case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
30912     case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
30913     case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
30914     case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
30915     case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
30916     default:
30917       gcc_unreachable ();
30918     }
30919
30920   in0 = d->op0;
30921   in1 = d->op1;
30922   if (BYTES_BIG_ENDIAN)
30923     {
30924       x = in0, in0 = in1, in1 = x;
30925       high = !high;
30926     }
30927
30928   out0 = d->target;
30929   out1 = gen_reg_rtx (d->vmode);
30930   if (high)
30931     x = out0, out0 = out1, out1 = x;
30932
30933   emit_insn (gen (out0, in0, in1, out1));
30934   return true;
30935 }
30936
30937 /* Recognize patterns for the VREV insns.  */
30938
30939 static bool
30940 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30941 {
30942   unsigned int i, j, diff, nelt = d->nelt;
30943   rtx (*gen)(rtx, rtx, rtx);
30944
30945   if (!d->one_vector_p)
30946     return false;
30947
30948   diff = d->perm[0];
30949   switch (diff)
30950     {
30951     case 7:
30952       switch (d->vmode)
30953         {
30954         case V16QImode: gen = gen_neon_vrev64v16qi; break;
30955         case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
30956         default:
30957           return false;
30958         }
30959       break;
30960     case 3:
30961       switch (d->vmode)
30962         {
30963         case V16QImode: gen = gen_neon_vrev32v16qi; break;
30964         case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
30965         case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
30966         case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
30967         default:
30968           return false;
30969         }
30970       break;
30971     case 1:
30972       switch (d->vmode)
30973         {
30974         case V16QImode: gen = gen_neon_vrev16v16qi; break;
30975         case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
30976         case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
30977         case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
30978         case V4SImode:  gen = gen_neon_vrev64v4si;  break;
30979         case V2SImode:  gen = gen_neon_vrev64v2si;  break;
30980         case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
30981         case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
30982         default:
30983           return false;
30984         }
30985       break;
30986     default:
30987       return false;
30988     }
30989
30990   for (i = 0; i < nelt ; i += diff + 1)
30991     for (j = 0; j <= diff; j += 1)
30992       {
30993         /* This is guaranteed to be true as the value of diff
30994            is 7, 3, 1 and we should have enough elements in the
30995            queue to generate this. Getting a vector mask with a
30996            value of diff other than these values implies that
30997            something is wrong by the time we get here.  */
30998         gcc_assert (i + j < nelt);
30999         if (d->perm[i + j] != i + diff - j)
31000           return false;
31001       }
31002
31003   /* Success! */
31004   if (d->testing_p)
31005     return true;
31006
31007   /* ??? The third operand is an artifact of the builtin infrastructure
31008      and is ignored by the actual instruction.  */
31009   emit_insn (gen (d->target, d->op0, const0_rtx));
31010   return true;
31011 }
31012
31013 /* Recognize patterns for the VTRN insns.  */
31014
31015 static bool
31016 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31017 {
31018   unsigned int i, odd, mask, nelt = d->nelt;
31019   rtx out0, out1, in0, in1, x;
31020   rtx (*gen)(rtx, rtx, rtx, rtx);
31021
31022   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31023     return false;
31024
31025   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
31026   if (d->perm[0] == 0)
31027     odd = 0;
31028   else if (d->perm[0] == 1)
31029     odd = 1;
31030   else
31031     return false;
31032   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31033
31034   for (i = 0; i < nelt; i += 2)
31035     {
31036       if (d->perm[i] != i + odd)
31037         return false;
31038       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31039         return false;
31040     }
31041
31042   /* Success!  */
31043   if (d->testing_p)
31044     return true;
31045
31046   switch (d->vmode)
31047     {
31048     case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
31049     case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
31050     case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
31051     case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
31052     case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
31053     case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
31054     case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
31055     case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
31056     default:
31057       gcc_unreachable ();
31058     }
31059
31060   in0 = d->op0;
31061   in1 = d->op1;
31062   if (BYTES_BIG_ENDIAN)
31063     {
31064       x = in0, in0 = in1, in1 = x;
31065       odd = !odd;
31066     }
31067
31068   out0 = d->target;
31069   out1 = gen_reg_rtx (d->vmode);
31070   if (odd)
31071     x = out0, out0 = out1, out1 = x;
31072
31073   emit_insn (gen (out0, in0, in1, out1));
31074   return true;
31075 }
31076
31077 /* Recognize patterns for the VEXT insns.  */
31078
31079 static bool
31080 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31081 {
31082   unsigned int i, nelt = d->nelt;
31083   rtx (*gen) (rtx, rtx, rtx, rtx);
31084   rtx offset;
31085
31086   unsigned int location;
31087
31088   unsigned int next  = d->perm[0] + 1;
31089
31090   /* TODO: Handle GCC's numbering of elements for big-endian.  */
31091   if (BYTES_BIG_ENDIAN)
31092     return false;
31093
31094   /* Check if the extracted indexes are increasing by one.  */
31095   for (i = 1; i < nelt; next++, i++)
31096     {
31097       /* If we hit the most significant element of the 2nd vector in
31098          the previous iteration, no need to test further.  */
31099       if (next == 2 * nelt)
31100         return false;
31101
31102       /* If we are operating on only one vector: it could be a
31103          rotation.  If there are only two elements of size < 64, let
31104          arm_evpc_neon_vrev catch it.  */
31105       if (d->one_vector_p && (next == nelt))
31106         {
31107           if ((nelt == 2) && (d->vmode != V2DImode))
31108             return false;
31109           else
31110             next = 0;
31111         }
31112
31113       if (d->perm[i] != next)
31114         return false;
31115     }
31116
31117   location = d->perm[0];
31118
31119   switch (d->vmode)
31120     {
31121     case V16QImode: gen = gen_neon_vextv16qi; break;
31122     case V8QImode: gen = gen_neon_vextv8qi; break;
31123     case V4HImode: gen = gen_neon_vextv4hi; break;
31124     case V8HImode: gen = gen_neon_vextv8hi; break;
31125     case V2SImode: gen = gen_neon_vextv2si; break;
31126     case V4SImode: gen = gen_neon_vextv4si; break;
31127     case V2SFmode: gen = gen_neon_vextv2sf; break;
31128     case V4SFmode: gen = gen_neon_vextv4sf; break;
31129     case V2DImode: gen = gen_neon_vextv2di; break;
31130     default:
31131       return false;
31132     }
31133
31134   /* Success! */
31135   if (d->testing_p)
31136     return true;
31137
31138   offset = GEN_INT (location);
31139   emit_insn (gen (d->target, d->op0, d->op1, offset));
31140   return true;
31141 }
31142
31143 /* The NEON VTBL instruction is a fully variable permuation that's even
31144    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
31145    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
31146    can do slightly better by expanding this as a constant where we don't
31147    have to apply a mask.  */
31148
31149 static bool
31150 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31151 {
31152   rtx rperm[MAX_VECT_LEN], sel;
31153   machine_mode vmode = d->vmode;
31154   unsigned int i, nelt = d->nelt;
31155
31156   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31157      numbering of elements for big-endian, we must reverse the order.  */
31158   if (BYTES_BIG_ENDIAN)
31159     return false;
31160
31161   if (d->testing_p)
31162     return true;
31163
31164   /* Generic code will try constant permutation twice.  Once with the
31165      original mode and again with the elements lowered to QImode.
31166      So wait and don't do the selector expansion ourselves.  */
31167   if (vmode != V8QImode && vmode != V16QImode)
31168     return false;
31169
31170   for (i = 0; i < nelt; ++i)
31171     rperm[i] = GEN_INT (d->perm[i]);
31172   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31173   sel = force_reg (vmode, sel);
31174
31175   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31176   return true;
31177 }
31178
31179 static bool
31180 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31181 {
31182   /* Check if the input mask matches vext before reordering the
31183      operands.  */
31184   if (TARGET_NEON)
31185     if (arm_evpc_neon_vext (d))
31186       return true;
31187
31188   /* The pattern matching functions above are written to look for a small
31189      number to begin the sequence (0, 1, N/2).  If we begin with an index
31190      from the second operand, we can swap the operands.  */
31191   if (d->perm[0] >= d->nelt)
31192     {
31193       unsigned i, nelt = d->nelt;
31194       rtx x;
31195
31196       for (i = 0; i < nelt; ++i)
31197         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
31198
31199       x = d->op0;
31200       d->op0 = d->op1;
31201       d->op1 = x;
31202     }
31203
31204   if (TARGET_NEON)
31205     {
31206       if (arm_evpc_neon_vuzp (d))
31207         return true;
31208       if (arm_evpc_neon_vzip (d))
31209         return true;
31210       if (arm_evpc_neon_vrev (d))
31211         return true;
31212       if (arm_evpc_neon_vtrn (d))
31213         return true;
31214       return arm_evpc_neon_vtbl (d);
31215     }
31216   return false;
31217 }
31218
31219 /* Expand a vec_perm_const pattern.  */
31220
31221 bool
31222 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
31223 {
31224   struct expand_vec_perm_d d;
31225   int i, nelt, which;
31226
31227   d.target = target;
31228   d.op0 = op0;
31229   d.op1 = op1;
31230
31231   d.vmode = GET_MODE (target);
31232   gcc_assert (VECTOR_MODE_P (d.vmode));
31233   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31234   d.testing_p = false;
31235
31236   for (i = which = 0; i < nelt; ++i)
31237     {
31238       rtx e = XVECEXP (sel, 0, i);
31239       int ei = INTVAL (e) & (2 * nelt - 1);
31240       which |= (ei < nelt ? 1 : 2);
31241       d.perm[i] = ei;
31242     }
31243
31244   switch (which)
31245     {
31246     default:
31247       gcc_unreachable();
31248
31249     case 3:
31250       d.one_vector_p = false;
31251       if (!rtx_equal_p (op0, op1))
31252         break;
31253
31254       /* The elements of PERM do not suggest that only the first operand
31255          is used, but both operands are identical.  Allow easier matching
31256          of the permutation by folding the permutation into the single
31257          input vector.  */
31258       /* FALLTHRU */
31259     case 2:
31260       for (i = 0; i < nelt; ++i)
31261         d.perm[i] &= nelt - 1;
31262       d.op0 = op1;
31263       d.one_vector_p = true;
31264       break;
31265
31266     case 1:
31267       d.op1 = op0;
31268       d.one_vector_p = true;
31269       break;
31270     }
31271
31272   return arm_expand_vec_perm_const_1 (&d);
31273 }
31274
31275 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
31276
31277 static bool
31278 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
31279                                  const unsigned char *sel)
31280 {
31281   struct expand_vec_perm_d d;
31282   unsigned int i, nelt, which;
31283   bool ret;
31284
31285   d.vmode = vmode;
31286   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31287   d.testing_p = true;
31288   memcpy (d.perm, sel, nelt);
31289
31290   /* Categorize the set of elements in the selector.  */
31291   for (i = which = 0; i < nelt; ++i)
31292     {
31293       unsigned char e = d.perm[i];
31294       gcc_assert (e < 2 * nelt);
31295       which |= (e < nelt ? 1 : 2);
31296     }
31297
31298   /* For all elements from second vector, fold the elements to first.  */
31299   if (which == 2)
31300     for (i = 0; i < nelt; ++i)
31301       d.perm[i] -= nelt;
31302
31303   /* Check whether the mask can be applied to the vector type.  */
31304   d.one_vector_p = (which != 3);
31305
31306   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31307   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31308   if (!d.one_vector_p)
31309     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31310
31311   start_sequence ();
31312   ret = arm_expand_vec_perm_const_1 (&d);
31313   end_sequence ();
31314
31315   return ret;
31316 }
31317
31318 bool
31319 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31320 {
31321   /* If we are soft float and we do not have ldrd
31322      then all auto increment forms are ok.  */
31323   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31324     return true;
31325
31326   switch (code)
31327     {
31328       /* Post increment and Pre Decrement are supported for all
31329          instruction forms except for vector forms.  */
31330     case ARM_POST_INC:
31331     case ARM_PRE_DEC:
31332       if (VECTOR_MODE_P (mode))
31333         {
31334           if (code != ARM_PRE_DEC)
31335             return true;
31336           else
31337             return false;
31338         }
31339
31340       return true;
31341
31342     case ARM_POST_DEC:
31343     case ARM_PRE_INC:
31344       /* Without LDRD and mode size greater than
31345          word size, there is no point in auto-incrementing
31346          because ldm and stm will not have these forms.  */
31347       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31348         return false;
31349
31350       /* Vector and floating point modes do not support
31351          these auto increment forms.  */
31352       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31353         return false;
31354
31355       return true;
31356
31357     default:
31358       return false;
31359
31360     }
31361
31362   return false;
31363 }
31364
31365 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31366    on ARM, since we know that shifts by negative amounts are no-ops.
31367    Additionally, the default expansion code is not available or suitable
31368    for post-reload insn splits (this can occur when the register allocator
31369    chooses not to do a shift in NEON).
31370
31371    This function is used in both initial expand and post-reload splits, and
31372    handles all kinds of 64-bit shifts.
31373
31374    Input requirements:
31375     - It is safe for the input and output to be the same register, but
31376       early-clobber rules apply for the shift amount and scratch registers.
31377     - Shift by register requires both scratch registers.  In all other cases
31378       the scratch registers may be NULL.
31379     - Ashiftrt by a register also clobbers the CC register.  */
31380 void
31381 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31382                                rtx amount, rtx scratch1, rtx scratch2)
31383 {
31384   rtx out_high = gen_highpart (SImode, out);
31385   rtx out_low = gen_lowpart (SImode, out);
31386   rtx in_high = gen_highpart (SImode, in);
31387   rtx in_low = gen_lowpart (SImode, in);
31388
31389   /* Terminology:
31390         in = the register pair containing the input value.
31391         out = the destination register pair.
31392         up = the high- or low-part of each pair.
31393         down = the opposite part to "up".
31394      In a shift, we can consider bits to shift from "up"-stream to
31395      "down"-stream, so in a left-shift "up" is the low-part and "down"
31396      is the high-part of each register pair.  */
31397
31398   rtx out_up   = code == ASHIFT ? out_low : out_high;
31399   rtx out_down = code == ASHIFT ? out_high : out_low;
31400   rtx in_up   = code == ASHIFT ? in_low : in_high;
31401   rtx in_down = code == ASHIFT ? in_high : in_low;
31402
31403   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31404   gcc_assert (out
31405               && (REG_P (out) || GET_CODE (out) == SUBREG)
31406               && GET_MODE (out) == DImode);
31407   gcc_assert (in
31408               && (REG_P (in) || GET_CODE (in) == SUBREG)
31409               && GET_MODE (in) == DImode);
31410   gcc_assert (amount
31411               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31412                    && GET_MODE (amount) == SImode)
31413                   || CONST_INT_P (amount)));
31414   gcc_assert (scratch1 == NULL
31415               || (GET_CODE (scratch1) == SCRATCH)
31416               || (GET_MODE (scratch1) == SImode
31417                   && REG_P (scratch1)));
31418   gcc_assert (scratch2 == NULL
31419               || (GET_CODE (scratch2) == SCRATCH)
31420               || (GET_MODE (scratch2) == SImode
31421                   && REG_P (scratch2)));
31422   gcc_assert (!REG_P (out) || !REG_P (amount)
31423               || !HARD_REGISTER_P (out)
31424               || (REGNO (out) != REGNO (amount)
31425                   && REGNO (out) + 1 != REGNO (amount)));
31426
31427   /* Macros to make following code more readable.  */
31428   #define SUB_32(DEST,SRC) \
31429             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31430   #define RSB_32(DEST,SRC) \
31431             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31432   #define SUB_S_32(DEST,SRC) \
31433             gen_addsi3_compare0 ((DEST), (SRC), \
31434                                  GEN_INT (-32))
31435   #define SET(DEST,SRC) \
31436             gen_rtx_SET (SImode, (DEST), (SRC))
31437   #define SHIFT(CODE,SRC,AMOUNT) \
31438             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31439   #define LSHIFT(CODE,SRC,AMOUNT) \
31440             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31441                             SImode, (SRC), (AMOUNT))
31442   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31443             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31444                             SImode, (SRC), (AMOUNT))
31445   #define ORR(A,B) \
31446             gen_rtx_IOR (SImode, (A), (B))
31447   #define BRANCH(COND,LABEL) \
31448             gen_arm_cond_branch ((LABEL), \
31449                                  gen_rtx_ ## COND (CCmode, cc_reg, \
31450                                                    const0_rtx), \
31451                                  cc_reg)
31452
31453   /* Shifts by register and shifts by constant are handled separately.  */
31454   if (CONST_INT_P (amount))
31455     {
31456       /* We have a shift-by-constant.  */
31457
31458       /* First, handle out-of-range shift amounts.
31459          In both cases we try to match the result an ARM instruction in a
31460          shift-by-register would give.  This helps reduce execution
31461          differences between optimization levels, but it won't stop other
31462          parts of the compiler doing different things.  This is "undefined
31463          behaviour, in any case.  */
31464       if (INTVAL (amount) <= 0)
31465         emit_insn (gen_movdi (out, in));
31466       else if (INTVAL (amount) >= 64)
31467         {
31468           if (code == ASHIFTRT)
31469             {
31470               rtx const31_rtx = GEN_INT (31);
31471               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31472               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31473             }
31474           else
31475             emit_insn (gen_movdi (out, const0_rtx));
31476         }
31477
31478       /* Now handle valid shifts. */
31479       else if (INTVAL (amount) < 32)
31480         {
31481           /* Shifts by a constant less than 32.  */
31482           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31483
31484           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31485           emit_insn (SET (out_down,
31486                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
31487                                out_down)));
31488           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31489         }
31490       else
31491         {
31492           /* Shifts by a constant greater than 31.  */
31493           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31494
31495           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31496           if (code == ASHIFTRT)
31497             emit_insn (gen_ashrsi3 (out_up, in_up,
31498                                     GEN_INT (31)));
31499           else
31500             emit_insn (SET (out_up, const0_rtx));
31501         }
31502     }
31503   else
31504     {
31505       /* We have a shift-by-register.  */
31506       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31507
31508       /* This alternative requires the scratch registers.  */
31509       gcc_assert (scratch1 && REG_P (scratch1));
31510       gcc_assert (scratch2 && REG_P (scratch2));
31511
31512       /* We will need the values "amount-32" and "32-amount" later.
31513          Swapping them around now allows the later code to be more general. */
31514       switch (code)
31515         {
31516         case ASHIFT:
31517           emit_insn (SUB_32 (scratch1, amount));
31518           emit_insn (RSB_32 (scratch2, amount));
31519           break;
31520         case ASHIFTRT:
31521           emit_insn (RSB_32 (scratch1, amount));
31522           /* Also set CC = amount > 32.  */
31523           emit_insn (SUB_S_32 (scratch2, amount));
31524           break;
31525         case LSHIFTRT:
31526           emit_insn (RSB_32 (scratch1, amount));
31527           emit_insn (SUB_32 (scratch2, amount));
31528           break;
31529         default:
31530           gcc_unreachable ();
31531         }
31532
31533       /* Emit code like this:
31534
31535          arithmetic-left:
31536             out_down = in_down << amount;
31537             out_down = (in_up << (amount - 32)) | out_down;
31538             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31539             out_up = in_up << amount;
31540
31541          arithmetic-right:
31542             out_down = in_down >> amount;
31543             out_down = (in_up << (32 - amount)) | out_down;
31544             if (amount < 32)
31545               out_down = ((signed)in_up >> (amount - 32)) | out_down;
31546             out_up = in_up << amount;
31547
31548          logical-right:
31549             out_down = in_down >> amount;
31550             out_down = (in_up << (32 - amount)) | out_down;
31551             if (amount < 32)
31552               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31553             out_up = in_up << amount;
31554
31555           The ARM and Thumb2 variants are the same but implemented slightly
31556           differently.  If this were only called during expand we could just
31557           use the Thumb2 case and let combine do the right thing, but this
31558           can also be called from post-reload splitters.  */
31559
31560       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31561
31562       if (!TARGET_THUMB2)
31563         {
31564           /* Emit code for ARM mode.  */
31565           emit_insn (SET (out_down,
31566                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31567           if (code == ASHIFTRT)
31568             {
31569               rtx_code_label *done_label = gen_label_rtx ();
31570               emit_jump_insn (BRANCH (LT, done_label));
31571               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31572                                              out_down)));
31573               emit_label (done_label);
31574             }
31575           else
31576             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31577                                            out_down)));
31578         }
31579       else
31580         {
31581           /* Emit code for Thumb2 mode.
31582              Thumb2 can't do shift and or in one insn.  */
31583           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31584           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31585
31586           if (code == ASHIFTRT)
31587             {
31588               rtx_code_label *done_label = gen_label_rtx ();
31589               emit_jump_insn (BRANCH (LT, done_label));
31590               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31591               emit_insn (SET (out_down, ORR (out_down, scratch2)));
31592               emit_label (done_label);
31593             }
31594           else
31595             {
31596               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31597               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31598             }
31599         }
31600
31601       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31602     }
31603
31604   #undef SUB_32
31605   #undef RSB_32
31606   #undef SUB_S_32
31607   #undef SET
31608   #undef SHIFT
31609   #undef LSHIFT
31610   #undef REV_LSHIFT
31611   #undef ORR
31612   #undef BRANCH
31613 }
31614
31615
31616 /* Returns true if a valid comparison operation and makes
31617    the operands in a form that is valid.  */
31618 bool
31619 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31620 {
31621   enum rtx_code code = GET_CODE (*comparison);
31622   int code_int;
31623   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31624     ? GET_MODE (*op2) : GET_MODE (*op1);
31625
31626   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31627
31628   if (code == UNEQ || code == LTGT)
31629     return false;
31630
31631   code_int = (int)code;
31632   arm_canonicalize_comparison (&code_int, op1, op2, 0);
31633   PUT_CODE (*comparison, (enum rtx_code)code_int);
31634
31635   switch (mode)
31636     {
31637     case SImode:
31638       if (!arm_add_operand (*op1, mode))
31639         *op1 = force_reg (mode, *op1);
31640       if (!arm_add_operand (*op2, mode))
31641         *op2 = force_reg (mode, *op2);
31642       return true;
31643
31644     case DImode:
31645       if (!cmpdi_operand (*op1, mode))
31646         *op1 = force_reg (mode, *op1);
31647       if (!cmpdi_operand (*op2, mode))
31648         *op2 = force_reg (mode, *op2);
31649       return true;
31650
31651     case SFmode:
31652     case DFmode:
31653       if (!arm_float_compare_operand (*op1, mode))
31654         *op1 = force_reg (mode, *op1);
31655       if (!arm_float_compare_operand (*op2, mode))
31656         *op2 = force_reg (mode, *op2);
31657       return true;
31658     default:
31659       break;
31660     }
31661
31662   return false;
31663
31664 }
31665
31666 /* Maximum number of instructions to set block of memory.  */
31667 static int
31668 arm_block_set_max_insns (void)
31669 {
31670   if (optimize_function_for_size_p (cfun))
31671     return 4;
31672   else
31673     return current_tune->max_insns_inline_memset;
31674 }
31675
31676 /* Return TRUE if it's profitable to set block of memory for
31677    non-vectorized case.  VAL is the value to set the memory
31678    with.  LENGTH is the number of bytes to set.  ALIGN is the
31679    alignment of the destination memory in bytes.  UNALIGNED_P
31680    is TRUE if we can only set the memory with instructions
31681    meeting alignment requirements.  USE_STRD_P is TRUE if we
31682    can use strd to set the memory.  */
31683 static bool
31684 arm_block_set_non_vect_profit_p (rtx val,
31685                                  unsigned HOST_WIDE_INT length,
31686                                  unsigned HOST_WIDE_INT align,
31687                                  bool unaligned_p, bool use_strd_p)
31688 {
31689   int num = 0;
31690   /* For leftovers in bytes of 0-7, we can set the memory block using
31691      strb/strh/str with minimum instruction number.  */
31692   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31693
31694   if (unaligned_p)
31695     {
31696       num = arm_const_inline_cost (SET, val);
31697       num += length / align + length % align;
31698     }
31699   else if (use_strd_p)
31700     {
31701       num = arm_const_double_inline_cost (val);
31702       num += (length >> 3) + leftover[length & 7];
31703     }
31704   else
31705     {
31706       num = arm_const_inline_cost (SET, val);
31707       num += (length >> 2) + leftover[length & 3];
31708     }
31709
31710   /* We may be able to combine last pair STRH/STRB into a single STR
31711      by shifting one byte back.  */
31712   if (unaligned_access && length > 3 && (length & 3) == 3)
31713     num--;
31714
31715   return (num <= arm_block_set_max_insns ());
31716 }
31717
31718 /* Return TRUE if it's profitable to set block of memory for
31719    vectorized case.  LENGTH is the number of bytes to set.
31720    ALIGN is the alignment of destination memory in bytes.
31721    MODE is the vector mode used to set the memory.  */
31722 static bool
31723 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
31724                              unsigned HOST_WIDE_INT align,
31725                              machine_mode mode)
31726 {
31727   int num;
31728   bool unaligned_p = ((align & 3) != 0);
31729   unsigned int nelt = GET_MODE_NUNITS (mode);
31730
31731   /* Instruction loading constant value.  */
31732   num = 1;
31733   /* Instructions storing the memory.  */
31734   num += (length + nelt - 1) / nelt;
31735   /* Instructions adjusting the address expression.  Only need to
31736      adjust address expression if it's 4 bytes aligned and bytes
31737      leftover can only be stored by mis-aligned store instruction.  */
31738   if (!unaligned_p && (length & 3) != 0)
31739     num++;
31740
31741   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
31742   if (!unaligned_p && mode == V16QImode)
31743     num--;
31744
31745   return (num <= arm_block_set_max_insns ());
31746 }
31747
31748 /* Set a block of memory using vectorization instructions for the
31749    unaligned case.  We fill the first LENGTH bytes of the memory
31750    area starting from DSTBASE with byte constant VALUE.  ALIGN is
31751    the alignment requirement of memory.  Return TRUE if succeeded.  */
31752 static bool
31753 arm_block_set_unaligned_vect (rtx dstbase,
31754                               unsigned HOST_WIDE_INT length,
31755                               unsigned HOST_WIDE_INT value,
31756                               unsigned HOST_WIDE_INT align)
31757 {
31758   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
31759   rtx dst, mem;
31760   rtx val_elt, val_vec, reg;
31761   rtx rval[MAX_VECT_LEN];
31762   rtx (*gen_func) (rtx, rtx);
31763   machine_mode mode;
31764   unsigned HOST_WIDE_INT v = value;
31765
31766   gcc_assert ((align & 0x3) != 0);
31767   nelt_v8 = GET_MODE_NUNITS (V8QImode);
31768   nelt_v16 = GET_MODE_NUNITS (V16QImode);
31769   if (length >= nelt_v16)
31770     {
31771       mode = V16QImode;
31772       gen_func = gen_movmisalignv16qi;
31773     }
31774   else
31775     {
31776       mode = V8QImode;
31777       gen_func = gen_movmisalignv8qi;
31778     }
31779   nelt_mode = GET_MODE_NUNITS (mode);
31780   gcc_assert (length >= nelt_mode);
31781   /* Skip if it isn't profitable.  */
31782   if (!arm_block_set_vect_profit_p (length, align, mode))
31783     return false;
31784
31785   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31786   mem = adjust_automodify_address (dstbase, mode, dst, 0);
31787
31788   v = sext_hwi (v, BITS_PER_WORD);
31789   val_elt = GEN_INT (v);
31790   for (j = 0; j < nelt_mode; j++)
31791     rval[j] = val_elt;
31792
31793   reg = gen_reg_rtx (mode);
31794   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31795   /* Emit instruction loading the constant value.  */
31796   emit_move_insn (reg, val_vec);
31797
31798   /* Handle nelt_mode bytes in a vector.  */
31799   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
31800     {
31801       emit_insn ((*gen_func) (mem, reg));
31802       if (i + 2 * nelt_mode <= length)
31803         emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
31804     }
31805
31806   /* If there are not less than nelt_v8 bytes leftover, we must be in
31807      V16QI mode.  */
31808   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
31809
31810   /* Handle (8, 16) bytes leftover.  */
31811   if (i + nelt_v8 < length)
31812     {
31813       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
31814       /* We are shifting bytes back, set the alignment accordingly.  */
31815       if ((length & 1) != 0 && align >= 2)
31816         set_mem_align (mem, BITS_PER_UNIT);
31817
31818       emit_insn (gen_movmisalignv16qi (mem, reg));
31819     }
31820   /* Handle (0, 8] bytes leftover.  */
31821   else if (i < length && i + nelt_v8 >= length)
31822     {
31823       if (mode == V16QImode)
31824         {
31825           reg = gen_lowpart (V8QImode, reg);
31826           mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
31827         }
31828       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
31829                                               + (nelt_mode - nelt_v8))));
31830       /* We are shifting bytes back, set the alignment accordingly.  */
31831       if ((length & 1) != 0 && align >= 2)
31832         set_mem_align (mem, BITS_PER_UNIT);
31833
31834       emit_insn (gen_movmisalignv8qi (mem, reg));
31835     }
31836
31837   return true;
31838 }
31839
31840 /* Set a block of memory using vectorization instructions for the
31841    aligned case.  We fill the first LENGTH bytes of the memory area
31842    starting from DSTBASE with byte constant VALUE.  ALIGN is the
31843    alignment requirement of memory.  Return TRUE if succeeded.  */
31844 static bool
31845 arm_block_set_aligned_vect (rtx dstbase,
31846                             unsigned HOST_WIDE_INT length,
31847                             unsigned HOST_WIDE_INT value,
31848                             unsigned HOST_WIDE_INT align)
31849 {
31850   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
31851   rtx dst, addr, mem;
31852   rtx val_elt, val_vec, reg;
31853   rtx rval[MAX_VECT_LEN];
31854   machine_mode mode;
31855   unsigned HOST_WIDE_INT v = value;
31856
31857   gcc_assert ((align & 0x3) == 0);
31858   nelt_v8 = GET_MODE_NUNITS (V8QImode);
31859   nelt_v16 = GET_MODE_NUNITS (V16QImode);
31860   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
31861     mode = V16QImode;
31862   else
31863     mode = V8QImode;
31864
31865   nelt_mode = GET_MODE_NUNITS (mode);
31866   gcc_assert (length >= nelt_mode);
31867   /* Skip if it isn't profitable.  */
31868   if (!arm_block_set_vect_profit_p (length, align, mode))
31869     return false;
31870
31871   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31872
31873   v = sext_hwi (v, BITS_PER_WORD);
31874   val_elt = GEN_INT (v);
31875   for (j = 0; j < nelt_mode; j++)
31876     rval[j] = val_elt;
31877
31878   reg = gen_reg_rtx (mode);
31879   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31880   /* Emit instruction loading the constant value.  */
31881   emit_move_insn (reg, val_vec);
31882
31883   i = 0;
31884   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
31885   if (mode == V16QImode)
31886     {
31887       mem = adjust_automodify_address (dstbase, mode, dst, 0);
31888       emit_insn (gen_movmisalignv16qi (mem, reg));
31889       i += nelt_mode;
31890       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
31891       if (i + nelt_v8 < length && i + nelt_v16 > length)
31892         {
31893           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31894           mem = adjust_automodify_address (dstbase, mode, dst, 0);
31895           /* We are shifting bytes back, set the alignment accordingly.  */
31896           if ((length & 0x3) == 0)
31897             set_mem_align (mem, BITS_PER_UNIT * 4);
31898           else if ((length & 0x1) == 0)
31899             set_mem_align (mem, BITS_PER_UNIT * 2);
31900           else
31901             set_mem_align (mem, BITS_PER_UNIT);
31902
31903           emit_insn (gen_movmisalignv16qi (mem, reg));
31904           return true;
31905         }
31906       /* Fall through for bytes leftover.  */
31907       mode = V8QImode;
31908       nelt_mode = GET_MODE_NUNITS (mode);
31909       reg = gen_lowpart (V8QImode, reg);
31910     }
31911
31912   /* Handle 8 bytes in a vector.  */
31913   for (; (i + nelt_mode <= length); i += nelt_mode)
31914     {
31915       addr = plus_constant (Pmode, dst, i);
31916       mem = adjust_automodify_address (dstbase, mode, addr, i);
31917       emit_move_insn (mem, reg);
31918     }
31919
31920   /* Handle single word leftover by shifting 4 bytes back.  We can
31921      use aligned access for this case.  */
31922   if (i + UNITS_PER_WORD == length)
31923     {
31924       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
31925       mem = adjust_automodify_address (dstbase, mode,
31926                                        addr, i - UNITS_PER_WORD);
31927       /* We are shifting 4 bytes back, set the alignment accordingly.  */
31928       if (align > UNITS_PER_WORD)
31929         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
31930
31931       emit_move_insn (mem, reg);
31932     }
31933   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
31934      We have to use unaligned access for this case.  */
31935   else if (i < length)
31936     {
31937       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31938       mem = adjust_automodify_address (dstbase, mode, dst, 0);
31939       /* We are shifting bytes back, set the alignment accordingly.  */
31940       if ((length & 1) == 0)
31941         set_mem_align (mem, BITS_PER_UNIT * 2);
31942       else
31943         set_mem_align (mem, BITS_PER_UNIT);
31944
31945       emit_insn (gen_movmisalignv8qi (mem, reg));
31946     }
31947
31948   return true;
31949 }
31950
31951 /* Set a block of memory using plain strh/strb instructions, only
31952    using instructions allowed by ALIGN on processor.  We fill the
31953    first LENGTH bytes of the memory area starting from DSTBASE
31954    with byte constant VALUE.  ALIGN is the alignment requirement
31955    of memory.  */
31956 static bool
31957 arm_block_set_unaligned_non_vect (rtx dstbase,
31958                                   unsigned HOST_WIDE_INT length,
31959                                   unsigned HOST_WIDE_INT value,
31960                                   unsigned HOST_WIDE_INT align)
31961 {
31962   unsigned int i;
31963   rtx dst, addr, mem;
31964   rtx val_exp, val_reg, reg;
31965   machine_mode mode;
31966   HOST_WIDE_INT v = value;
31967
31968   gcc_assert (align == 1 || align == 2);
31969
31970   if (align == 2)
31971     v |= (value << BITS_PER_UNIT);
31972
31973   v = sext_hwi (v, BITS_PER_WORD);
31974   val_exp = GEN_INT (v);
31975   /* Skip if it isn't profitable.  */
31976   if (!arm_block_set_non_vect_profit_p (val_exp, length,
31977                                         align, true, false))
31978     return false;
31979
31980   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31981   mode = (align == 2 ? HImode : QImode);
31982   val_reg = force_reg (SImode, val_exp);
31983   reg = gen_lowpart (mode, val_reg);
31984
31985   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
31986     {
31987       addr = plus_constant (Pmode, dst, i);
31988       mem = adjust_automodify_address (dstbase, mode, addr, i);
31989       emit_move_insn (mem, reg);
31990     }
31991
31992   /* Handle single byte leftover.  */
31993   if (i + 1 == length)
31994     {
31995       reg = gen_lowpart (QImode, val_reg);
31996       addr = plus_constant (Pmode, dst, i);
31997       mem = adjust_automodify_address (dstbase, QImode, addr, i);
31998       emit_move_insn (mem, reg);
31999       i++;
32000     }
32001
32002   gcc_assert (i == length);
32003   return true;
32004 }
32005
32006 /* Set a block of memory using plain strd/str/strh/strb instructions,
32007    to permit unaligned copies on processors which support unaligned
32008    semantics for those instructions.  We fill the first LENGTH bytes
32009    of the memory area starting from DSTBASE with byte constant VALUE.
32010    ALIGN is the alignment requirement of memory.  */
32011 static bool
32012 arm_block_set_aligned_non_vect (rtx dstbase,
32013                                 unsigned HOST_WIDE_INT length,
32014                                 unsigned HOST_WIDE_INT value,
32015                                 unsigned HOST_WIDE_INT align)
32016 {
32017   unsigned int i;
32018   rtx dst, addr, mem;
32019   rtx val_exp, val_reg, reg;
32020   unsigned HOST_WIDE_INT v;
32021   bool use_strd_p;
32022
32023   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32024                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32025
32026   v = (value | (value << 8) | (value << 16) | (value << 24));
32027   if (length < UNITS_PER_WORD)
32028     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32029
32030   if (use_strd_p)
32031     v |= (v << BITS_PER_WORD);
32032   else
32033     v = sext_hwi (v, BITS_PER_WORD);
32034
32035   val_exp = GEN_INT (v);
32036   /* Skip if it isn't profitable.  */
32037   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32038                                         align, false, use_strd_p))
32039     {
32040       if (!use_strd_p)
32041         return false;
32042
32043       /* Try without strd.  */
32044       v = (v >> BITS_PER_WORD);
32045       v = sext_hwi (v, BITS_PER_WORD);
32046       val_exp = GEN_INT (v);
32047       use_strd_p = false;
32048       if (!arm_block_set_non_vect_profit_p (val_exp, length,
32049                                             align, false, use_strd_p))
32050         return false;
32051     }
32052
32053   i = 0;
32054   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32055   /* Handle double words using strd if possible.  */
32056   if (use_strd_p)
32057     {
32058       val_reg = force_reg (DImode, val_exp);
32059       reg = val_reg;
32060       for (; (i + 8 <= length); i += 8)
32061         {
32062           addr = plus_constant (Pmode, dst, i);
32063           mem = adjust_automodify_address (dstbase, DImode, addr, i);
32064           emit_move_insn (mem, reg);
32065         }
32066     }
32067   else
32068     val_reg = force_reg (SImode, val_exp);
32069
32070   /* Handle words.  */
32071   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32072   for (; (i + 4 <= length); i += 4)
32073     {
32074       addr = plus_constant (Pmode, dst, i);
32075       mem = adjust_automodify_address (dstbase, SImode, addr, i);
32076       if ((align & 3) == 0)
32077         emit_move_insn (mem, reg);
32078       else
32079         emit_insn (gen_unaligned_storesi (mem, reg));
32080     }
32081
32082   /* Merge last pair of STRH and STRB into a STR if possible.  */
32083   if (unaligned_access && i > 0 && (i + 3) == length)
32084     {
32085       addr = plus_constant (Pmode, dst, i - 1);
32086       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32087       /* We are shifting one byte back, set the alignment accordingly.  */
32088       if ((align & 1) == 0)
32089         set_mem_align (mem, BITS_PER_UNIT);
32090
32091       /* Most likely this is an unaligned access, and we can't tell at
32092          compilation time.  */
32093       emit_insn (gen_unaligned_storesi (mem, reg));
32094       return true;
32095     }
32096
32097   /* Handle half word leftover.  */
32098   if (i + 2 <= length)
32099     {
32100       reg = gen_lowpart (HImode, val_reg);
32101       addr = plus_constant (Pmode, dst, i);
32102       mem = adjust_automodify_address (dstbase, HImode, addr, i);
32103       if ((align & 1) == 0)
32104         emit_move_insn (mem, reg);
32105       else
32106         emit_insn (gen_unaligned_storehi (mem, reg));
32107
32108       i += 2;
32109     }
32110
32111   /* Handle single byte leftover.  */
32112   if (i + 1 == length)
32113     {
32114       reg = gen_lowpart (QImode, val_reg);
32115       addr = plus_constant (Pmode, dst, i);
32116       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32117       emit_move_insn (mem, reg);
32118     }
32119
32120   return true;
32121 }
32122
32123 /* Set a block of memory using vectorization instructions for both
32124    aligned and unaligned cases.  We fill the first LENGTH bytes of
32125    the memory area starting from DSTBASE with byte constant VALUE.
32126    ALIGN is the alignment requirement of memory.  */
32127 static bool
32128 arm_block_set_vect (rtx dstbase,
32129                     unsigned HOST_WIDE_INT length,
32130                     unsigned HOST_WIDE_INT value,
32131                     unsigned HOST_WIDE_INT align)
32132 {
32133   /* Check whether we need to use unaligned store instruction.  */
32134   if (((align & 3) != 0 || (length & 3) != 0)
32135       /* Check whether unaligned store instruction is available.  */
32136       && (!unaligned_access || BYTES_BIG_ENDIAN))
32137     return false;
32138
32139   if ((align & 3) == 0)
32140     return arm_block_set_aligned_vect (dstbase, length, value, align);
32141   else
32142     return arm_block_set_unaligned_vect (dstbase, length, value, align);
32143 }
32144
32145 /* Expand string store operation.  Firstly we try to do that by using
32146    vectorization instructions, then try with ARM unaligned access and
32147    double-word store if profitable.  OPERANDS[0] is the destination,
32148    OPERANDS[1] is the number of bytes, operands[2] is the value to
32149    initialize the memory, OPERANDS[3] is the known alignment of the
32150    destination.  */
32151 bool
32152 arm_gen_setmem (rtx *operands)
32153 {
32154   rtx dstbase = operands[0];
32155   unsigned HOST_WIDE_INT length;
32156   unsigned HOST_WIDE_INT value;
32157   unsigned HOST_WIDE_INT align;
32158
32159   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32160     return false;
32161
32162   length = UINTVAL (operands[1]);
32163   if (length > 64)
32164     return false;
32165
32166   value = (UINTVAL (operands[2]) & 0xFF);
32167   align = UINTVAL (operands[3]);
32168   if (TARGET_NEON && length >= 8
32169       && current_tune->string_ops_prefer_neon
32170       && arm_block_set_vect (dstbase, length, value, align))
32171     return true;
32172
32173   if (!unaligned_access && (align & 3) != 0)
32174     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32175
32176   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32177 }
32178
32179 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
32180
32181 static unsigned HOST_WIDE_INT
32182 arm_asan_shadow_offset (void)
32183 {
32184   return (unsigned HOST_WIDE_INT) 1 << 29;
32185 }
32186
32187
32188 /* This is a temporary fix for PR60655.  Ideally we need
32189    to handle most of these cases in the generic part but
32190    currently we reject minus (..) (sym_ref).  We try to
32191    ameliorate the case with minus (sym_ref1) (sym_ref2)
32192    where they are in the same section.  */
32193
32194 static bool
32195 arm_const_not_ok_for_debug_p (rtx p)
32196 {
32197   tree decl_op0 = NULL;
32198   tree decl_op1 = NULL;
32199
32200   if (GET_CODE (p) == MINUS)
32201     {
32202       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32203         {
32204           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32205           if (decl_op1
32206               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32207               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32208             {
32209               if ((TREE_CODE (decl_op1) == VAR_DECL
32210                    || TREE_CODE (decl_op1) == CONST_DECL)
32211                   && (TREE_CODE (decl_op0) == VAR_DECL
32212                       || TREE_CODE (decl_op0) == CONST_DECL))
32213                 return (get_variable_section (decl_op1, false)
32214                         != get_variable_section (decl_op0, false));
32215
32216               if (TREE_CODE (decl_op1) == LABEL_DECL
32217                   && TREE_CODE (decl_op0) == LABEL_DECL)
32218                 return (DECL_CONTEXT (decl_op1)
32219                         != DECL_CONTEXT (decl_op0));
32220             }
32221
32222           return true;
32223         }
32224     }
32225
32226   return false;
32227 }
32228
32229 static void
32230 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
32231 {
32232   const unsigned ARM_FE_INVALID = 1;
32233   const unsigned ARM_FE_DIVBYZERO = 2;
32234   const unsigned ARM_FE_OVERFLOW = 4;
32235   const unsigned ARM_FE_UNDERFLOW = 8;
32236   const unsigned ARM_FE_INEXACT = 16;
32237   const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
32238                                                     | ARM_FE_DIVBYZERO
32239                                                     | ARM_FE_OVERFLOW
32240                                                     | ARM_FE_UNDERFLOW
32241                                                     | ARM_FE_INEXACT);
32242   const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
32243   tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
32244   tree new_fenv_var, reload_fenv, restore_fnenv;
32245   tree update_call, atomic_feraiseexcept, hold_fnclex;
32246
32247   if (!TARGET_VFP || !TARGET_HARD_FLOAT)
32248     return;
32249
32250   /* Generate the equivalent of :
32251        unsigned int fenv_var;
32252        fenv_var = __builtin_arm_get_fpscr ();
32253
32254        unsigned int masked_fenv;
32255        masked_fenv = fenv_var & mask;
32256
32257        __builtin_arm_set_fpscr (masked_fenv);  */
32258
32259   fenv_var = create_tmp_var (unsigned_type_node, NULL);
32260   get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
32261   set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
32262   mask = build_int_cst (unsigned_type_node,
32263                         ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
32264                           | ARM_FE_ALL_EXCEPT));
32265   ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
32266                     fenv_var, build_call_expr (get_fpscr, 0));
32267   masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
32268   hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
32269   *hold = build2 (COMPOUND_EXPR, void_type_node,
32270                   build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
32271                   hold_fnclex);
32272
32273   /* Store the value of masked_fenv to clear the exceptions:
32274      __builtin_arm_set_fpscr (masked_fenv);  */
32275
32276   *clear = build_call_expr (set_fpscr, 1, masked_fenv);
32277
32278   /* Generate the equivalent of :
32279        unsigned int new_fenv_var;
32280        new_fenv_var = __builtin_arm_get_fpscr ();
32281
32282        __builtin_arm_set_fpscr (fenv_var);
32283
32284        __atomic_feraiseexcept (new_fenv_var);  */
32285
32286   new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
32287   reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
32288                         build_call_expr (get_fpscr, 0));
32289   restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
32290   atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
32291   update_call = build_call_expr (atomic_feraiseexcept, 1,
32292                                  fold_convert (integer_type_node, new_fenv_var));
32293   *update = build2 (COMPOUND_EXPR, void_type_node,
32294                     build2 (COMPOUND_EXPR, void_type_node,
32295                             reload_fenv, restore_fnenv), update_call);
32296 }
32297
32298 /* return TRUE if x is a reference to a value in a constant pool */
32299 extern bool
32300 arm_is_constant_pool_ref (rtx x)
32301 {
32302   return (MEM_P (x)
32303           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32304           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32305 }
32306
32307 #include "gt-arm.h"