gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2014 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "hash-table.h"
  27 #include "tm.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "stringpool.h"
  31 #include "stor-layout.h"
  32 #include "calls.h"
  33 #include "varasm.h"
  34 #include "obstack.h"
  35 #include "regs.h"
  36 #include "hard-reg-set.h"
  37 #include "insn-config.h"
  38 #include "conditions.h"
  39 #include "output.h"
  40 #include "insn-attr.h"
  41 #include "flags.h"
  42 #include "reload.h"
  43 #include "hashtab.h"
  44 #include "hash-set.h"
  45 #include "vec.h"
  46 #include "machmode.h"
  47 #include "input.h"
  48 #include "function.h"
  49 #include "expr.h"
  50 #include "insn-codes.h"
  51 #include "optabs.h"
  52 #include "diagnostic-core.h"
  53 #include "recog.h"
  54 #include "predict.h"
  55 #include "dominance.h"
  56 #include "cfg.h"
  57 #include "cfgrtl.h"
  58 #include "cfganal.h"
  59 #include "lcm.h"
  60 #include "cfgbuild.h"
  61 #include "cfgcleanup.h"
  62 #include "basic-block.h"
  63 #include "hash-map.h"
  64 #include "is-a.h"
  65 #include "plugin-api.h"
  66 #include "ipa-ref.h"
  67 #include "cgraph.h"
  68 #include "ggc.h"
  69 #include "except.h"
  70 #include "tm_p.h"
  71 #include "target.h"
  72 #include "sched-int.h"
  73 #include "target-def.h"
  74 #include "debug.h"
  75 #include "langhooks.h"
  76 #include "df.h"
  77 #include "intl.h"
  78 #include "libfuncs.h"
  79 #include "params.h"
  80 #include "opts.h"
  81 #include "dumpfile.h"
  82 #include "gimple-expr.h"
  83 #include "builtins.h"
  84 #include "tm-constrs.h"
  85 #include "rtl-iter.h"
  86
  87 /* Forward definitions of types.  */
  88 typedef struct minipool_node    Mnode;
  89 typedef struct minipool_fixup   Mfix;
  90
  91 void (*arm_lang_output_object_attributes_hook)(void);
  92
  93 struct four_ints
  94 {
  95   int i[4];
  96 };
  97
  98 /* Forward function declarations.  */
  99 static bool arm_const_not_ok_for_debug_p (rtx);
 100 static bool arm_lra_p (void);
 101 static bool arm_needs_doubleword_align (machine_mode, const_tree);
 102 static int arm_compute_static_chain_stack_bytes (void);
 103 static arm_stack_offsets *arm_get_frame_offsets (void);
 104 static void arm_add_gc_roots (void);
 105 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 106                              HOST_WIDE_INT, rtx, rtx, int, int);
 107 static unsigned bit_count (unsigned long);
 108 static int arm_address_register_rtx_p (rtx, int);
 109 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 110 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 111 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 112 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 113 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 114 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 115 inline static int thumb1_index_register_rtx_p (rtx, int);
 116 static int thumb_far_jump_used_p (void);
 117 static bool thumb_force_lr_save (void);
 118 static unsigned arm_size_return_regs (void);
 119 static bool arm_assemble_integer (rtx, unsigned int, int);
 120 static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
 121 static void arm_print_operand (FILE *, rtx, int);
 122 static void arm_print_operand_address (FILE *, rtx);
 123 static bool arm_print_operand_punct_valid_p (unsigned char code);
 124 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 125 static arm_cc get_arm_condition_code (rtx);
 126 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
 127 static const char *output_multi_immediate (rtx *, const char *, const char *,
 128                                            int, HOST_WIDE_INT);
 129 static const char *shift_op (rtx, HOST_WIDE_INT *);
 130 static struct machine_function *arm_init_machine_status (void);
 131 static void thumb_exit (FILE *, int);
 132 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 133 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 134 static Mnode *add_minipool_forward_ref (Mfix *);
 135 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 136 static Mnode *add_minipool_backward_ref (Mfix *);
 137 static void assign_minipool_offsets (Mfix *);
 138 static void arm_print_value (FILE *, rtx);
 139 static void dump_minipool (rtx_insn *);
 140 static int arm_barrier_cost (rtx);
 141 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 142 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 143 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 144                                machine_mode, rtx);
 145 static void arm_reorg (void);
 146 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 147 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 148 static unsigned long arm_compute_save_reg_mask (void);
 149 static unsigned long arm_isr_value (tree);
 150 static unsigned long arm_compute_func_type (void);
 151 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 152 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 153 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 154 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 155 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 156 #endif
 157 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
 158 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
 159 static int arm_comp_type_attributes (const_tree, const_tree);
 160 static void arm_set_default_type_attributes (tree);
 161 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
 162 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 163 static int optimal_immediate_sequence (enum rtx_code code,
 164                                        unsigned HOST_WIDE_INT val,
 165                                        struct four_ints *return_sequence);
 166 static int optimal_immediate_sequence_1 (enum rtx_code code,
 167                                          unsigned HOST_WIDE_INT val,
 168                                          struct four_ints *return_sequence,
 169                                          int i);
 170 static int arm_get_strip_length (int);
 171 static bool arm_function_ok_for_sibcall (tree, tree);
 172 static machine_mode arm_promote_function_mode (const_tree,
 173                                                     machine_mode, int *,
 174                                                     const_tree, int);
 175 static bool arm_return_in_memory (const_tree, const_tree);
 176 static rtx arm_function_value (const_tree, const_tree, bool);
 177 static rtx arm_libcall_value_1 (machine_mode);
 178 static rtx arm_libcall_value (machine_mode, const_rtx);
 179 static bool arm_function_value_regno_p (const unsigned int);
 180 static void arm_internal_label (FILE *, const char *, unsigned long);
 181 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 182                                  tree);
 183 static bool arm_have_conditional_execution (void);
 184 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 185 static bool arm_legitimate_constant_p (machine_mode, rtx);
 186 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
 187 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
 188 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 189 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 190 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 191 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 192 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
 193 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 194 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 195 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 196 static void arm_init_builtins (void);
 197 static void arm_init_iwmmxt_builtins (void);
 198 static rtx safe_vector_operand (rtx, machine_mode);
 199 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
 200 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
 201 static rtx arm_expand_builtin (tree, rtx, rtx, machine_mode, int);
 202 static tree arm_builtin_decl (unsigned, bool);
 203 static void emit_constant_insn (rtx cond, rtx pattern);
 204 static rtx_insn *emit_set_insn (rtx, rtx);
 205 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 206 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 207                                   tree, bool);
 208 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 209                              const_tree, bool);
 210 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 211                                       const_tree, bool);
 212 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 213 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 214                                       const_tree);
 215 static rtx aapcs_libcall_value (machine_mode);
 216 static int aapcs_select_return_coproc (const_tree, const_tree);
 217
 218 #ifdef OBJECT_FORMAT_ELF
 219 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 220 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 221 #endif
 222 #ifndef ARM_PE
 223 static void arm_encode_section_info (tree, rtx, int);
 224 #endif
 225
 226 static void arm_file_end (void);
 227 static void arm_file_start (void);
 228
 229 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 230                                         tree, int *, int);
 231 static bool arm_pass_by_reference (cumulative_args_t,
 232                                    machine_mode, const_tree, bool);
 233 static bool arm_promote_prototypes (const_tree);
 234 static bool arm_default_short_enums (void);
 235 static bool arm_align_anon_bitfield (void);
 236 static bool arm_return_in_msb (const_tree);
 237 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 238 static bool arm_return_in_memory (const_tree, const_tree);
 239 #if ARM_UNWIND_INFO
 240 static void arm_unwind_emit (FILE *, rtx_insn *);
 241 static bool arm_output_ttype (rtx);
 242 static void arm_asm_emit_except_personality (rtx);
 243 static void arm_asm_init_sections (void);
 244 #endif
 245 static rtx arm_dwarf_register_span (rtx);
 246
 247 static tree arm_cxx_guard_type (void);
 248 static bool arm_cxx_guard_mask_bit (void);
 249 static tree arm_get_cookie_size (tree);
 250 static bool arm_cookie_has_size (void);
 251 static bool arm_cxx_cdtor_returns_this (void);
 252 static bool arm_cxx_key_method_may_be_inline (void);
 253 static void arm_cxx_determine_class_data_visibility (tree);
 254 static bool arm_cxx_class_data_always_comdat (void);
 255 static bool arm_cxx_use_aeabi_atexit (void);
 256 static void arm_init_libfuncs (void);
 257 static tree arm_build_builtin_va_list (void);
 258 static void arm_expand_builtin_va_start (tree, rtx);
 259 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 260 static void arm_option_override (void);
 261 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 262 static bool arm_cannot_copy_insn_p (rtx_insn *);
 263 static int arm_issue_rate (void);
 264 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 265 static bool arm_output_addr_const_extra (FILE *, rtx);
 266 static bool arm_allocate_stack_slots_for_args (void);
 267 static bool arm_warn_func_return (tree);
 268 static const char *arm_invalid_parameter_type (const_tree t);
 269 static const char *arm_invalid_return_type (const_tree t);
 270 static tree arm_promoted_type (const_tree t);
 271 static tree arm_convert_to_type (tree type, tree expr);
 272 static bool arm_scalar_mode_supported_p (machine_mode);
 273 static bool arm_frame_pointer_required (void);
 274 static bool arm_can_eliminate (const int, const int);
 275 static void arm_asm_trampoline_template (FILE *);
 276 static void arm_trampoline_init (rtx, tree, rtx);
 277 static rtx arm_trampoline_adjust_address (rtx);
 278 static rtx arm_pic_static_addr (rtx orig, rtx reg);
 279 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 280 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 281 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 282 static bool arm_array_mode_supported_p (machine_mode,
 283                                         unsigned HOST_WIDE_INT);
 284 static machine_mode arm_preferred_simd_mode (machine_mode);
 285 static bool arm_class_likely_spilled_p (reg_class_t);
 286 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 287 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 288 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 289                                                      const_tree type,
 290                                                      int misalignment,
 291                                                      bool is_packed);
 292 static void arm_conditional_register_usage (void);
 293 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 294 static unsigned int arm_autovectorize_vector_sizes (void);
 295 static int arm_default_branch_cost (bool, bool);
 296 static int arm_cortex_a5_branch_cost (bool, bool);
 297 static int arm_cortex_m_branch_cost (bool, bool);
 298
 299 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
 300                                              const unsigned char *sel);
 301
 302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 303                                            tree vectype,
 304                                            int misalign ATTRIBUTE_UNUSED);
 305 static unsigned arm_add_stmt_cost (void *data, int count,
 306                                    enum vect_cost_for_stmt kind,
 307                                    struct _stmt_vec_info *stmt_info,
 308                                    int misalign,
 309                                    enum vect_cost_model_location where);
 310
 311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 312                                          bool op0_preserve_value);
 313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 314 \f
 315 /* Table of machine attributes.  */
 316 static const struct attribute_spec arm_attribute_table[] =
 317 {
 318   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 319        affects_type_identity } */
 320   /* Function calls made to this symbol must be done indirectly, because
 321      it may lie outside of the 26 bit addressing range of a normal function
 322      call.  */
 323   { "long_call",    0, 0, false, true,  true,  NULL, false },
 324   /* Whereas these functions are always known to reside within the 26 bit
 325      addressing range.  */
 326   { "short_call",   0, 0, false, true,  true,  NULL, false },
 327   /* Specify the procedure call conventions for a function.  */
 328   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 329     false },
 330   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 331   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 332     false },
 333   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 334     false },
 335   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 336     false },
 337 #ifdef ARM_PE
 338   /* ARM/PE has three new attributes:
 339      interfacearm - ?
 340      dllexport - for exporting a function/variable that will live in a dll
 341      dllimport - for importing a function/variable from a dll
 342
 343      Microsoft allows multiple declspecs in one __declspec, separating
 344      them with spaces.  We do NOT support this.  Instead, use __declspec
 345      multiple times.
 346   */
 347   { "dllimport",    0, 0, true,  false, false, NULL, false },
 348   { "dllexport",    0, 0, true,  false, false, NULL, false },
 349   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 350     false },
 351 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 352   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 353   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 354   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 355     false },
 356 #endif
 357   { NULL,           0, 0, false, false, false, NULL, false }
 358 };
 359 \f
 360 /* Initialize the GCC target structure.  */
 361 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 362 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 363 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 364 #endif
 365
 366 #undef TARGET_LEGITIMIZE_ADDRESS
 367 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 368
 369 #undef TARGET_LRA_P
 370 #define TARGET_LRA_P arm_lra_p
 371
 372 #undef  TARGET_ATTRIBUTE_TABLE
 373 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 374
 375 #undef TARGET_ASM_FILE_START
 376 #define TARGET_ASM_FILE_START arm_file_start
 377 #undef TARGET_ASM_FILE_END
 378 #define TARGET_ASM_FILE_END arm_file_end
 379
 380 #undef  TARGET_ASM_ALIGNED_SI_OP
 381 #define TARGET_ASM_ALIGNED_SI_OP NULL
 382 #undef  TARGET_ASM_INTEGER
 383 #define TARGET_ASM_INTEGER arm_assemble_integer
 384
 385 #undef TARGET_PRINT_OPERAND
 386 #define TARGET_PRINT_OPERAND arm_print_operand
 387 #undef TARGET_PRINT_OPERAND_ADDRESS
 388 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 389 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 390 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 391
 392 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 393 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 394
 395 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 396 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 397
 398 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 399 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 400
 401 #undef  TARGET_OPTION_OVERRIDE
 402 #define TARGET_OPTION_OVERRIDE arm_option_override
 403
 404 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 405 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 406
 407 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 408 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 409
 410 #undef  TARGET_SCHED_ADJUST_COST
 411 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 412
 413 #undef TARGET_SCHED_REORDER
 414 #define TARGET_SCHED_REORDER arm_sched_reorder
 415
 416 #undef TARGET_REGISTER_MOVE_COST
 417 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 418
 419 #undef TARGET_MEMORY_MOVE_COST
 420 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 421
 422 #undef TARGET_ENCODE_SECTION_INFO
 423 #ifdef ARM_PE
 424 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 425 #else
 426 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 427 #endif
 428
 429 #undef  TARGET_STRIP_NAME_ENCODING
 430 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 431
 432 #undef  TARGET_ASM_INTERNAL_LABEL
 433 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 434
 435 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 436 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 437
 438 #undef  TARGET_FUNCTION_VALUE
 439 #define TARGET_FUNCTION_VALUE arm_function_value
 440
 441 #undef  TARGET_LIBCALL_VALUE
 442 #define TARGET_LIBCALL_VALUE arm_libcall_value
 443
 444 #undef TARGET_FUNCTION_VALUE_REGNO_P
 445 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 446
 447 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 448 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 449 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 450 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
 451
 452 #undef  TARGET_RTX_COSTS
 453 #define TARGET_RTX_COSTS arm_rtx_costs
 454 #undef  TARGET_ADDRESS_COST
 455 #define TARGET_ADDRESS_COST arm_address_cost
 456
 457 #undef TARGET_SHIFT_TRUNCATION_MASK
 458 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 459 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 460 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 461 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 462 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 463 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 464 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 465 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 466 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 467   arm_autovectorize_vector_sizes
 468
 469 #undef  TARGET_MACHINE_DEPENDENT_REORG
 470 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 471
 472 #undef  TARGET_INIT_BUILTINS
 473 #define TARGET_INIT_BUILTINS  arm_init_builtins
 474 #undef  TARGET_EXPAND_BUILTIN
 475 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 476 #undef  TARGET_BUILTIN_DECL
 477 #define TARGET_BUILTIN_DECL arm_builtin_decl
 478
 479 #undef TARGET_INIT_LIBFUNCS
 480 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 481
 482 #undef TARGET_PROMOTE_FUNCTION_MODE
 483 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 484 #undef TARGET_PROMOTE_PROTOTYPES
 485 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 486 #undef TARGET_PASS_BY_REFERENCE
 487 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 488 #undef TARGET_ARG_PARTIAL_BYTES
 489 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 490 #undef TARGET_FUNCTION_ARG
 491 #define TARGET_FUNCTION_ARG arm_function_arg
 492 #undef TARGET_FUNCTION_ARG_ADVANCE
 493 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 494 #undef TARGET_FUNCTION_ARG_BOUNDARY
 495 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 496
 497 #undef  TARGET_SETUP_INCOMING_VARARGS
 498 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 499
 500 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 501 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 502
 503 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 504 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 505 #undef TARGET_TRAMPOLINE_INIT
 506 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 507 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 508 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 509
 510 #undef TARGET_WARN_FUNC_RETURN
 511 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 512
 513 #undef TARGET_DEFAULT_SHORT_ENUMS
 514 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 515
 516 #undef TARGET_ALIGN_ANON_BITFIELD
 517 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 518
 519 #undef TARGET_NARROW_VOLATILE_BITFIELD
 520 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 521
 522 #undef TARGET_CXX_GUARD_TYPE
 523 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 524
 525 #undef TARGET_CXX_GUARD_MASK_BIT
 526 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 527
 528 #undef TARGET_CXX_GET_COOKIE_SIZE
 529 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 530
 531 #undef TARGET_CXX_COOKIE_HAS_SIZE
 532 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 533
 534 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 535 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 536
 537 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 538 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 539
 540 #undef TARGET_CXX_USE_AEABI_ATEXIT
 541 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 542
 543 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 544 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 545   arm_cxx_determine_class_data_visibility
 546
 547 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 548 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 549
 550 #undef TARGET_RETURN_IN_MSB
 551 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 552
 553 #undef TARGET_RETURN_IN_MEMORY
 554 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 555
 556 #undef TARGET_MUST_PASS_IN_STACK
 557 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 558
 559 #if ARM_UNWIND_INFO
 560 #undef TARGET_ASM_UNWIND_EMIT
 561 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 562
 563 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 564 #undef TARGET_ASM_TTYPE
 565 #define TARGET_ASM_TTYPE arm_output_ttype
 566
 567 #undef TARGET_ARM_EABI_UNWINDER
 568 #define TARGET_ARM_EABI_UNWINDER true
 569
 570 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 571 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 572
 573 #undef TARGET_ASM_INIT_SECTIONS
 574 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 575 #endif /* ARM_UNWIND_INFO */
 576
 577 #undef TARGET_DWARF_REGISTER_SPAN
 578 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 579
 580 #undef  TARGET_CANNOT_COPY_INSN_P
 581 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 582
 583 #ifdef HAVE_AS_TLS
 584 #undef TARGET_HAVE_TLS
 585 #define TARGET_HAVE_TLS true
 586 #endif
 587
 588 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 589 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 590
 591 #undef TARGET_LEGITIMATE_CONSTANT_P
 592 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 593
 594 #undef TARGET_CANNOT_FORCE_CONST_MEM
 595 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 596
 597 #undef TARGET_MAX_ANCHOR_OFFSET
 598 #define TARGET_MAX_ANCHOR_OFFSET 4095
 599
 600 /* The minimum is set such that the total size of the block
 601    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 602    divisible by eight, ensuring natural spacing of anchors.  */
 603 #undef TARGET_MIN_ANCHOR_OFFSET
 604 #define TARGET_MIN_ANCHOR_OFFSET -4088
 605
 606 #undef TARGET_SCHED_ISSUE_RATE
 607 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 608
 609 #undef TARGET_MANGLE_TYPE
 610 #define TARGET_MANGLE_TYPE arm_mangle_type
 611
 612 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 613 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 614
 615 #undef TARGET_BUILD_BUILTIN_VA_LIST
 616 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 617 #undef TARGET_EXPAND_BUILTIN_VA_START
 618 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 619 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 620 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 621
 622 #ifdef HAVE_AS_TLS
 623 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 624 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 625 #endif
 626
 627 #undef TARGET_LEGITIMATE_ADDRESS_P
 628 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 629
 630 #undef TARGET_PREFERRED_RELOAD_CLASS
 631 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 632
 633 #undef TARGET_INVALID_PARAMETER_TYPE
 634 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
 635
 636 #undef TARGET_INVALID_RETURN_TYPE
 637 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
 638
 639 #undef TARGET_PROMOTED_TYPE
 640 #define TARGET_PROMOTED_TYPE arm_promoted_type
 641
 642 #undef TARGET_CONVERT_TO_TYPE
 643 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
 644
 645 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 646 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 647
 648 #undef TARGET_FRAME_POINTER_REQUIRED
 649 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 650
 651 #undef TARGET_CAN_ELIMINATE
 652 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 653
 654 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 655 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 656
 657 #undef TARGET_CLASS_LIKELY_SPILLED_P
 658 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 659
 660 #undef TARGET_VECTORIZE_BUILTINS
 661 #define TARGET_VECTORIZE_BUILTINS
 662
 663 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 664 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 665   arm_builtin_vectorized_function
 666
 667 #undef TARGET_VECTOR_ALIGNMENT
 668 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 669
 670 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 671 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 672   arm_vector_alignment_reachable
 673
 674 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 675 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 676   arm_builtin_support_vector_misalignment
 677
 678 #undef TARGET_PREFERRED_RENAME_CLASS
 679 #define TARGET_PREFERRED_RENAME_CLASS \
 680   arm_preferred_rename_class
 681
 682 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 683 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 684   arm_vectorize_vec_perm_const_ok
 685
 686 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 687 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 688   arm_builtin_vectorization_cost
 689 #undef TARGET_VECTORIZE_ADD_STMT_COST
 690 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 691
 692 #undef TARGET_CANONICALIZE_COMPARISON
 693 #define TARGET_CANONICALIZE_COMPARISON \
 694   arm_canonicalize_comparison
 695
 696 #undef TARGET_ASAN_SHADOW_OFFSET
 697 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 698
 699 #undef MAX_INSN_PER_IT_BLOCK
 700 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 701
 702 #undef TARGET_CAN_USE_DOLOOP_P
 703 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 704
 705 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 706 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 707
 708 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 709 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 710
 711 struct gcc_target targetm = TARGET_INITIALIZER;
 712 \f
 713 /* Obstack for minipool constant handling.  */
 714 static struct obstack minipool_obstack;
 715 static char *         minipool_startobj;
 716
 717 /* The maximum number of insns skipped which
 718    will be conditionalised if possible.  */
 719 static int max_insns_skipped = 5;
 720
 721 extern FILE * asm_out_file;
 722
 723 /* True if we are currently building a constant table.  */
 724 int making_const_table;
 725
 726 /* The processor for which instructions should be scheduled.  */
 727 enum processor_type arm_tune = arm_none;
 728
 729 /* The current tuning set.  */
 730 const struct tune_params *current_tune;
 731
 732 /* Which floating point hardware to schedule for.  */
 733 int arm_fpu_attr;
 734
 735 /* Which floating popint hardware to use.  */
 736 const struct arm_fpu_desc *arm_fpu_desc;
 737
 738 /* Used for Thumb call_via trampolines.  */
 739 rtx thumb_call_via_label[14];
 740 static int thumb_call_reg_needed;
 741
 742 /* Bit values used to identify processor capabilities.  */
 743 #define FL_CO_PROC    (1 << 0)        /* Has external co-processor bus */
 744 #define FL_ARCH3M     (1 << 1)        /* Extended multiply */
 745 #define FL_MODE26     (1 << 2)        /* 26-bit mode support */
 746 #define FL_MODE32     (1 << 3)        /* 32-bit mode support */
 747 #define FL_ARCH4      (1 << 4)        /* Architecture rel 4 */
 748 #define FL_ARCH5      (1 << 5)        /* Architecture rel 5 */
 749 #define FL_THUMB      (1 << 6)        /* Thumb aware */
 750 #define FL_LDSCHED    (1 << 7)        /* Load scheduling necessary */
 751 #define FL_STRONG     (1 << 8)        /* StrongARM */
 752 #define FL_ARCH5E     (1 << 9)        /* DSP extensions to v5 */
 753 #define FL_XSCALE     (1 << 10)       /* XScale */
 754 /* spare              (1 << 11) */
 755 #define FL_ARCH6      (1 << 12)       /* Architecture rel 6.  Adds
 756                                          media instructions.  */
 757 #define FL_VFPV2      (1 << 13)       /* Vector Floating Point V2.  */
 758 #define FL_WBUF       (1 << 14)       /* Schedule for write buffer ops.
 759                                          Note: ARM6 & 7 derivatives only.  */
 760 #define FL_ARCH6K     (1 << 15)       /* Architecture rel 6 K extensions.  */
 761 #define FL_THUMB2     (1 << 16)       /* Thumb-2.  */
 762 #define FL_NOTM       (1 << 17)       /* Instructions not present in the 'M'
 763                                          profile.  */
 764 #define FL_THUMB_DIV  (1 << 18)       /* Hardware divide (Thumb mode).  */
 765 #define FL_VFPV3      (1 << 19)       /* Vector Floating Point V3.  */
 766 #define FL_NEON       (1 << 20)       /* Neon instructions.  */
 767 #define FL_ARCH7EM    (1 << 21)       /* Instructions present in the ARMv7E-M
 768                                          architecture.  */
 769 #define FL_ARCH7      (1 << 22)       /* Architecture 7.  */
 770 #define FL_ARM_DIV    (1 << 23)       /* Hardware divide (ARM mode).  */
 771 #define FL_ARCH8      (1 << 24)       /* Architecture 8.  */
 772 #define FL_CRC32      (1 << 25)       /* ARMv8 CRC32 instructions.  */
 773
 774 #define FL_SMALLMUL   (1 << 26)       /* Small multiply supported.  */
 775
 776 #define FL_IWMMXT     (1 << 29)       /* XScale v2 or "Intel Wireless MMX technology".  */
 777 #define FL_IWMMXT2    (1 << 30)       /* "Intel Wireless MMX2 technology".  */
 778
 779 /* Flags that only effect tuning, not available instructions.  */
 780 #define FL_TUNE         (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
 781                          | FL_CO_PROC)
 782
 783 #define FL_FOR_ARCH2    FL_NOTM
 784 #define FL_FOR_ARCH3    (FL_FOR_ARCH2 | FL_MODE32)
 785 #define FL_FOR_ARCH3M   (FL_FOR_ARCH3 | FL_ARCH3M)
 786 #define FL_FOR_ARCH4    (FL_FOR_ARCH3M | FL_ARCH4)
 787 #define FL_FOR_ARCH4T   (FL_FOR_ARCH4 | FL_THUMB)
 788 #define FL_FOR_ARCH5    (FL_FOR_ARCH4 | FL_ARCH5)
 789 #define FL_FOR_ARCH5T   (FL_FOR_ARCH5 | FL_THUMB)
 790 #define FL_FOR_ARCH5E   (FL_FOR_ARCH5 | FL_ARCH5E)
 791 #define FL_FOR_ARCH5TE  (FL_FOR_ARCH5E | FL_THUMB)
 792 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
 793 #define FL_FOR_ARCH6    (FL_FOR_ARCH5TE | FL_ARCH6)
 794 #define FL_FOR_ARCH6J   FL_FOR_ARCH6
 795 #define FL_FOR_ARCH6K   (FL_FOR_ARCH6 | FL_ARCH6K)
 796 #define FL_FOR_ARCH6Z   FL_FOR_ARCH6
 797 #define FL_FOR_ARCH6ZK  FL_FOR_ARCH6K
 798 #define FL_FOR_ARCH6T2  (FL_FOR_ARCH6 | FL_THUMB2)
 799 #define FL_FOR_ARCH6M   (FL_FOR_ARCH6 & ~FL_NOTM)
 800 #define FL_FOR_ARCH7    ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
 801 #define FL_FOR_ARCH7A   (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
 802 #define FL_FOR_ARCH7VE  (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
 803 #define FL_FOR_ARCH7R   (FL_FOR_ARCH7A | FL_THUMB_DIV)
 804 #define FL_FOR_ARCH7M   (FL_FOR_ARCH7 | FL_THUMB_DIV)
 805 #define FL_FOR_ARCH7EM  (FL_FOR_ARCH7M | FL_ARCH7EM)
 806 #define FL_FOR_ARCH8A   (FL_FOR_ARCH7VE | FL_ARCH8)
 807
 808 /* The bits in this mask specify which
 809    instructions we are allowed to generate.  */
 810 static unsigned long insn_flags = 0;
 811
 812 /* The bits in this mask specify which instruction scheduling options should
 813    be used.  */
 814 static unsigned long tune_flags = 0;
 815
 816 /* The highest ARM architecture version supported by the
 817    target.  */
 818 enum base_architecture arm_base_arch = BASE_ARCH_0;
 819
 820 /* The following are used in the arm.md file as equivalents to bits
 821    in the above two flag variables.  */
 822
 823 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 824 int arm_arch3m = 0;
 825
 826 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 827 int arm_arch4 = 0;
 828
 829 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 830 int arm_arch4t = 0;
 831
 832 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 833 int arm_arch5 = 0;
 834
 835 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 836 int arm_arch5e = 0;
 837
 838 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 839 int arm_arch6 = 0;
 840
 841 /* Nonzero if this chip supports the ARM 6K extensions.  */
 842 int arm_arch6k = 0;
 843
 844 /* Nonzero if instructions present in ARMv6-M can be used.  */
 845 int arm_arch6m = 0;
 846
 847 /* Nonzero if this chip supports the ARM 7 extensions.  */
 848 int arm_arch7 = 0;
 849
 850 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 851 int arm_arch_notm = 0;
 852
 853 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 854 int arm_arch7em = 0;
 855
 856 /* Nonzero if instructions present in ARMv8 can be used.  */
 857 int arm_arch8 = 0;
 858
 859 /* Nonzero if this chip can benefit from load scheduling.  */
 860 int arm_ld_sched = 0;
 861
 862 /* Nonzero if this chip is a StrongARM.  */
 863 int arm_tune_strongarm = 0;
 864
 865 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 866 int arm_arch_iwmmxt = 0;
 867
 868 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 869 int arm_arch_iwmmxt2 = 0;
 870
 871 /* Nonzero if this chip is an XScale.  */
 872 int arm_arch_xscale = 0;
 873
 874 /* Nonzero if tuning for XScale  */
 875 int arm_tune_xscale = 0;
 876
 877 /* Nonzero if we want to tune for stores that access the write-buffer.
 878    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 879 int arm_tune_wbuf = 0;
 880
 881 /* Nonzero if tuning for Cortex-A9.  */
 882 int arm_tune_cortex_a9 = 0;
 883
 884 /* Nonzero if generating Thumb instructions.  */
 885 int thumb_code = 0;
 886
 887 /* Nonzero if generating Thumb-1 instructions.  */
 888 int thumb1_code = 0;
 889
 890 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 891    preprocessor.
 892    XXX This is a bit of a hack, it's intended to help work around
 893    problems in GLD which doesn't understand that armv5t code is
 894    interworking clean.  */
 895 int arm_cpp_interwork = 0;
 896
 897 /* Nonzero if chip supports Thumb 2.  */
 898 int arm_arch_thumb2;
 899
 900 /* Nonzero if chip supports integer division instruction.  */
 901 int arm_arch_arm_hwdiv;
 902 int arm_arch_thumb_hwdiv;
 903
 904 /* Nonzero if we should use Neon to handle 64-bits operations rather
 905    than core registers.  */
 906 int prefer_neon_for_64bits = 0;
 907
 908 /* Nonzero if we shouldn't use literal pools.  */
 909 bool arm_disable_literal_pool = false;
 910
 911 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
 912    we must report the mode of the memory reference from
 913    TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
 914 machine_mode output_memory_reference_mode;
 915
 916 /* The register number to be used for the PIC offset register.  */
 917 unsigned arm_pic_register = INVALID_REGNUM;
 918
 919 enum arm_pcs arm_pcs_default;
 920
 921 /* For an explanation of these variables, see final_prescan_insn below.  */
 922 int arm_ccfsm_state;
 923 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 924 enum arm_cond_code arm_current_cc;
 925
 926 rtx arm_target_insn;
 927 int arm_target_label;
 928 /* The number of conditionally executed insns, including the current insn.  */
 929 int arm_condexec_count = 0;
 930 /* A bitmask specifying the patterns for the IT block.
 931    Zero means do not output an IT block before this insn. */
 932 int arm_condexec_mask = 0;
 933 /* The number of bits used in arm_condexec_mask.  */
 934 int arm_condexec_masklen = 0;
 935
 936 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 937 int arm_arch_crc = 0;
 938
 939 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 940 int arm_m_profile_small_mul = 0;
 941
 942 /* The condition codes of the ARM, and the inverse function.  */
 943 static const char * const arm_condition_codes[] =
 944 {
 945   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 946   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 947 };
 948
 949 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 950 int arm_regs_in_sequence[] =
 951 {
 952   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 953 };
 954
 955 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
 956 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 957
 958 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 959                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 960                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 961 \f
 962 /* Initialization code.  */
 963
 964 struct processors
 965 {
 966   const char *const name;
 967   enum processor_type core;
 968   const char *arch;
 969   enum base_architecture base_arch;
 970   const unsigned long flags;
 971   const struct tune_params *const tune;
 972 };
 973
 974
 975 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
 976 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
 977   prefetch_slots, \
 978   l1_size, \
 979   l1_line_size
 980
 981 /* arm generic vectorizer costs.  */
 982 static const
 983 struct cpu_vec_costs arm_default_vec_cost = {
 984   1,                                    /* scalar_stmt_cost.  */
 985   1,                                    /* scalar load_cost.  */
 986   1,                                    /* scalar_store_cost.  */
 987   1,                                    /* vec_stmt_cost.  */
 988   1,                                    /* vec_to_scalar_cost.  */
 989   1,                                    /* scalar_to_vec_cost.  */
 990   1,                                    /* vec_align_load_cost.  */
 991   1,                                    /* vec_unalign_load_cost.  */
 992   1,                                    /* vec_unalign_store_cost.  */
 993   1,                                    /* vec_store_cost.  */
 994   3,                                    /* cond_taken_branch_cost.  */
 995   1,                                    /* cond_not_taken_branch_cost.  */
 996 };
 997
 998 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
 999 #include "aarch-cost-tables.h"
1000
1001
1002
1003 const struct cpu_cost_table cortexa9_extra_costs =
1004 {
1005   /* ALU */
1006   {
1007     0,                  /* arith.  */
1008     0,                  /* logical.  */
1009     0,                  /* shift.  */
1010     COSTS_N_INSNS (1),  /* shift_reg.  */
1011     COSTS_N_INSNS (1),  /* arith_shift.  */
1012     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1013     0,                  /* log_shift.  */
1014     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1015     COSTS_N_INSNS (1),  /* extend.  */
1016     COSTS_N_INSNS (2),  /* extend_arith.  */
1017     COSTS_N_INSNS (1),  /* bfi.  */
1018     COSTS_N_INSNS (1),  /* bfx.  */
1019     0,                  /* clz.  */
1020     0,                  /* rev.  */
1021     0,                  /* non_exec.  */
1022     true                /* non_exec_costs_exec.  */
1023   },
1024   {
1025     /* MULT SImode */
1026     {
1027       COSTS_N_INSNS (3),        /* simple.  */
1028       COSTS_N_INSNS (3),        /* flag_setting.  */
1029       COSTS_N_INSNS (2),        /* extend.  */
1030       COSTS_N_INSNS (3),        /* add.  */
1031       COSTS_N_INSNS (2),        /* extend_add.  */
1032       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1033     },
1034     /* MULT DImode */
1035     {
1036       0,                        /* simple (N/A).  */
1037       0,                        /* flag_setting (N/A).  */
1038       COSTS_N_INSNS (4),        /* extend.  */
1039       0,                        /* add (N/A).  */
1040       COSTS_N_INSNS (4),        /* extend_add.  */
1041       0                         /* idiv (N/A).  */
1042     }
1043   },
1044   /* LD/ST */
1045   {
1046     COSTS_N_INSNS (2),  /* load.  */
1047     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1048     COSTS_N_INSNS (2),  /* ldrd.  */
1049     COSTS_N_INSNS (2),  /* ldm_1st.  */
1050     1,                  /* ldm_regs_per_insn_1st.  */
1051     2,                  /* ldm_regs_per_insn_subsequent.  */
1052     COSTS_N_INSNS (5),  /* loadf.  */
1053     COSTS_N_INSNS (5),  /* loadd.  */
1054     COSTS_N_INSNS (1),  /* load_unaligned.  */
1055     COSTS_N_INSNS (2),  /* store.  */
1056     COSTS_N_INSNS (2),  /* strd.  */
1057     COSTS_N_INSNS (2),  /* stm_1st.  */
1058     1,                  /* stm_regs_per_insn_1st.  */
1059     2,                  /* stm_regs_per_insn_subsequent.  */
1060     COSTS_N_INSNS (1),  /* storef.  */
1061     COSTS_N_INSNS (1),  /* stored.  */
1062     COSTS_N_INSNS (1)   /* store_unaligned.  */
1063   },
1064   {
1065     /* FP SFmode */
1066     {
1067       COSTS_N_INSNS (14),       /* div.  */
1068       COSTS_N_INSNS (4),        /* mult.  */
1069       COSTS_N_INSNS (7),        /* mult_addsub. */
1070       COSTS_N_INSNS (30),       /* fma.  */
1071       COSTS_N_INSNS (3),        /* addsub.  */
1072       COSTS_N_INSNS (1),        /* fpconst.  */
1073       COSTS_N_INSNS (1),        /* neg.  */
1074       COSTS_N_INSNS (3),        /* compare.  */
1075       COSTS_N_INSNS (3),        /* widen.  */
1076       COSTS_N_INSNS (3),        /* narrow.  */
1077       COSTS_N_INSNS (3),        /* toint.  */
1078       COSTS_N_INSNS (3),        /* fromint.  */
1079       COSTS_N_INSNS (3)         /* roundint.  */
1080     },
1081     /* FP DFmode */
1082     {
1083       COSTS_N_INSNS (24),       /* div.  */
1084       COSTS_N_INSNS (5),        /* mult.  */
1085       COSTS_N_INSNS (8),        /* mult_addsub.  */
1086       COSTS_N_INSNS (30),       /* fma.  */
1087       COSTS_N_INSNS (3),        /* addsub.  */
1088       COSTS_N_INSNS (1),        /* fpconst.  */
1089       COSTS_N_INSNS (1),        /* neg.  */
1090       COSTS_N_INSNS (3),        /* compare.  */
1091       COSTS_N_INSNS (3),        /* widen.  */
1092       COSTS_N_INSNS (3),        /* narrow.  */
1093       COSTS_N_INSNS (3),        /* toint.  */
1094       COSTS_N_INSNS (3),        /* fromint.  */
1095       COSTS_N_INSNS (3)         /* roundint.  */
1096     }
1097   },
1098   /* Vector */
1099   {
1100     COSTS_N_INSNS (1)   /* alu.  */
1101   }
1102 };
1103
1104 const struct cpu_cost_table cortexa8_extra_costs =
1105 {
1106   /* ALU */
1107   {
1108     0,                  /* arith.  */
1109     0,                  /* logical.  */
1110     COSTS_N_INSNS (1),  /* shift.  */
1111     0,                  /* shift_reg.  */
1112     COSTS_N_INSNS (1),  /* arith_shift.  */
1113     0,                  /* arith_shift_reg.  */
1114     COSTS_N_INSNS (1),  /* log_shift.  */
1115     0,                  /* log_shift_reg.  */
1116     0,                  /* extend.  */
1117     0,                  /* extend_arith.  */
1118     0,                  /* bfi.  */
1119     0,                  /* bfx.  */
1120     0,                  /* clz.  */
1121     0,                  /* rev.  */
1122     0,                  /* non_exec.  */
1123     true                /* non_exec_costs_exec.  */
1124   },
1125   {
1126     /* MULT SImode */
1127     {
1128       COSTS_N_INSNS (1),        /* simple.  */
1129       COSTS_N_INSNS (1),        /* flag_setting.  */
1130       COSTS_N_INSNS (1),        /* extend.  */
1131       COSTS_N_INSNS (1),        /* add.  */
1132       COSTS_N_INSNS (1),        /* extend_add.  */
1133       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1134     },
1135     /* MULT DImode */
1136     {
1137       0,                        /* simple (N/A).  */
1138       0,                        /* flag_setting (N/A).  */
1139       COSTS_N_INSNS (2),        /* extend.  */
1140       0,                        /* add (N/A).  */
1141       COSTS_N_INSNS (2),        /* extend_add.  */
1142       0                         /* idiv (N/A).  */
1143     }
1144   },
1145   /* LD/ST */
1146   {
1147     COSTS_N_INSNS (1),  /* load.  */
1148     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1149     COSTS_N_INSNS (1),  /* ldrd.  */
1150     COSTS_N_INSNS (1),  /* ldm_1st.  */
1151     1,                  /* ldm_regs_per_insn_1st.  */
1152     2,                  /* ldm_regs_per_insn_subsequent.  */
1153     COSTS_N_INSNS (1),  /* loadf.  */
1154     COSTS_N_INSNS (1),  /* loadd.  */
1155     COSTS_N_INSNS (1),  /* load_unaligned.  */
1156     COSTS_N_INSNS (1),  /* store.  */
1157     COSTS_N_INSNS (1),  /* strd.  */
1158     COSTS_N_INSNS (1),  /* stm_1st.  */
1159     1,                  /* stm_regs_per_insn_1st.  */
1160     2,                  /* stm_regs_per_insn_subsequent.  */
1161     COSTS_N_INSNS (1),  /* storef.  */
1162     COSTS_N_INSNS (1),  /* stored.  */
1163     COSTS_N_INSNS (1)   /* store_unaligned.  */
1164   },
1165   {
1166     /* FP SFmode */
1167     {
1168       COSTS_N_INSNS (36),       /* div.  */
1169       COSTS_N_INSNS (11),       /* mult.  */
1170       COSTS_N_INSNS (20),       /* mult_addsub. */
1171       COSTS_N_INSNS (30),       /* fma.  */
1172       COSTS_N_INSNS (9),        /* addsub.  */
1173       COSTS_N_INSNS (3),        /* fpconst.  */
1174       COSTS_N_INSNS (3),        /* neg.  */
1175       COSTS_N_INSNS (6),        /* compare.  */
1176       COSTS_N_INSNS (4),        /* widen.  */
1177       COSTS_N_INSNS (4),        /* narrow.  */
1178       COSTS_N_INSNS (8),        /* toint.  */
1179       COSTS_N_INSNS (8),        /* fromint.  */
1180       COSTS_N_INSNS (8)         /* roundint.  */
1181     },
1182     /* FP DFmode */
1183     {
1184       COSTS_N_INSNS (64),       /* div.  */
1185       COSTS_N_INSNS (16),       /* mult.  */
1186       COSTS_N_INSNS (25),       /* mult_addsub.  */
1187       COSTS_N_INSNS (30),       /* fma.  */
1188       COSTS_N_INSNS (9),        /* addsub.  */
1189       COSTS_N_INSNS (3),        /* fpconst.  */
1190       COSTS_N_INSNS (3),        /* neg.  */
1191       COSTS_N_INSNS (6),        /* compare.  */
1192       COSTS_N_INSNS (6),        /* widen.  */
1193       COSTS_N_INSNS (6),        /* narrow.  */
1194       COSTS_N_INSNS (8),        /* toint.  */
1195       COSTS_N_INSNS (8),        /* fromint.  */
1196       COSTS_N_INSNS (8)         /* roundint.  */
1197     }
1198   },
1199   /* Vector */
1200   {
1201     COSTS_N_INSNS (1)   /* alu.  */
1202   }
1203 };
1204
1205 const struct cpu_cost_table cortexa5_extra_costs =
1206 {
1207   /* ALU */
1208   {
1209     0,                  /* arith.  */
1210     0,                  /* logical.  */
1211     COSTS_N_INSNS (1),  /* shift.  */
1212     COSTS_N_INSNS (1),  /* shift_reg.  */
1213     COSTS_N_INSNS (1),  /* arith_shift.  */
1214     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1215     COSTS_N_INSNS (1),  /* log_shift.  */
1216     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1217     COSTS_N_INSNS (1),  /* extend.  */
1218     COSTS_N_INSNS (1),  /* extend_arith.  */
1219     COSTS_N_INSNS (1),  /* bfi.  */
1220     COSTS_N_INSNS (1),  /* bfx.  */
1221     COSTS_N_INSNS (1),  /* clz.  */
1222     COSTS_N_INSNS (1),  /* rev.  */
1223     0,                  /* non_exec.  */
1224     true                /* non_exec_costs_exec.  */
1225   },
1226
1227   {
1228     /* MULT SImode */
1229     {
1230       0,                        /* simple.  */
1231       COSTS_N_INSNS (1),        /* flag_setting.  */
1232       COSTS_N_INSNS (1),        /* extend.  */
1233       COSTS_N_INSNS (1),        /* add.  */
1234       COSTS_N_INSNS (1),        /* extend_add.  */
1235       COSTS_N_INSNS (7)         /* idiv.  */
1236     },
1237     /* MULT DImode */
1238     {
1239       0,                        /* simple (N/A).  */
1240       0,                        /* flag_setting (N/A).  */
1241       COSTS_N_INSNS (1),        /* extend.  */
1242       0,                        /* add.  */
1243       COSTS_N_INSNS (2),        /* extend_add.  */
1244       0                         /* idiv (N/A).  */
1245     }
1246   },
1247   /* LD/ST */
1248   {
1249     COSTS_N_INSNS (1),  /* load.  */
1250     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1251     COSTS_N_INSNS (6),  /* ldrd.  */
1252     COSTS_N_INSNS (1),  /* ldm_1st.  */
1253     1,                  /* ldm_regs_per_insn_1st.  */
1254     2,                  /* ldm_regs_per_insn_subsequent.  */
1255     COSTS_N_INSNS (2),  /* loadf.  */
1256     COSTS_N_INSNS (4),  /* loadd.  */
1257     COSTS_N_INSNS (1),  /* load_unaligned.  */
1258     COSTS_N_INSNS (1),  /* store.  */
1259     COSTS_N_INSNS (3),  /* strd.  */
1260     COSTS_N_INSNS (1),  /* stm_1st.  */
1261     1,                  /* stm_regs_per_insn_1st.  */
1262     2,                  /* stm_regs_per_insn_subsequent.  */
1263     COSTS_N_INSNS (2),  /* storef.  */
1264     COSTS_N_INSNS (2),  /* stored.  */
1265     COSTS_N_INSNS (1)   /* store_unaligned.  */
1266   },
1267   {
1268     /* FP SFmode */
1269     {
1270       COSTS_N_INSNS (15),       /* div.  */
1271       COSTS_N_INSNS (3),        /* mult.  */
1272       COSTS_N_INSNS (7),        /* mult_addsub. */
1273       COSTS_N_INSNS (7),        /* fma.  */
1274       COSTS_N_INSNS (3),        /* addsub.  */
1275       COSTS_N_INSNS (3),        /* fpconst.  */
1276       COSTS_N_INSNS (3),        /* neg.  */
1277       COSTS_N_INSNS (3),        /* compare.  */
1278       COSTS_N_INSNS (3),        /* widen.  */
1279       COSTS_N_INSNS (3),        /* narrow.  */
1280       COSTS_N_INSNS (3),        /* toint.  */
1281       COSTS_N_INSNS (3),        /* fromint.  */
1282       COSTS_N_INSNS (3)         /* roundint.  */
1283     },
1284     /* FP DFmode */
1285     {
1286       COSTS_N_INSNS (30),       /* div.  */
1287       COSTS_N_INSNS (6),        /* mult.  */
1288       COSTS_N_INSNS (10),       /* mult_addsub.  */
1289       COSTS_N_INSNS (7),        /* fma.  */
1290       COSTS_N_INSNS (3),        /* addsub.  */
1291       COSTS_N_INSNS (3),        /* fpconst.  */
1292       COSTS_N_INSNS (3),        /* neg.  */
1293       COSTS_N_INSNS (3),        /* compare.  */
1294       COSTS_N_INSNS (3),        /* widen.  */
1295       COSTS_N_INSNS (3),        /* narrow.  */
1296       COSTS_N_INSNS (3),        /* toint.  */
1297       COSTS_N_INSNS (3),        /* fromint.  */
1298       COSTS_N_INSNS (3)         /* roundint.  */
1299     }
1300   },
1301   /* Vector */
1302   {
1303     COSTS_N_INSNS (1)   /* alu.  */
1304   }
1305 };
1306
1307
1308 const struct cpu_cost_table cortexa7_extra_costs =
1309 {
1310   /* ALU */
1311   {
1312     0,                  /* arith.  */
1313     0,                  /* logical.  */
1314     COSTS_N_INSNS (1),  /* shift.  */
1315     COSTS_N_INSNS (1),  /* shift_reg.  */
1316     COSTS_N_INSNS (1),  /* arith_shift.  */
1317     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1318     COSTS_N_INSNS (1),  /* log_shift.  */
1319     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1320     COSTS_N_INSNS (1),  /* extend.  */
1321     COSTS_N_INSNS (1),  /* extend_arith.  */
1322     COSTS_N_INSNS (1),  /* bfi.  */
1323     COSTS_N_INSNS (1),  /* bfx.  */
1324     COSTS_N_INSNS (1),  /* clz.  */
1325     COSTS_N_INSNS (1),  /* rev.  */
1326     0,                  /* non_exec.  */
1327     true                /* non_exec_costs_exec.  */
1328   },
1329
1330   {
1331     /* MULT SImode */
1332     {
1333       0,                        /* simple.  */
1334       COSTS_N_INSNS (1),        /* flag_setting.  */
1335       COSTS_N_INSNS (1),        /* extend.  */
1336       COSTS_N_INSNS (1),        /* add.  */
1337       COSTS_N_INSNS (1),        /* extend_add.  */
1338       COSTS_N_INSNS (7)         /* idiv.  */
1339     },
1340     /* MULT DImode */
1341     {
1342       0,                        /* simple (N/A).  */
1343       0,                        /* flag_setting (N/A).  */
1344       COSTS_N_INSNS (1),        /* extend.  */
1345       0,                        /* add.  */
1346       COSTS_N_INSNS (2),        /* extend_add.  */
1347       0                         /* idiv (N/A).  */
1348     }
1349   },
1350   /* LD/ST */
1351   {
1352     COSTS_N_INSNS (1),  /* load.  */
1353     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1354     COSTS_N_INSNS (3),  /* ldrd.  */
1355     COSTS_N_INSNS (1),  /* ldm_1st.  */
1356     1,                  /* ldm_regs_per_insn_1st.  */
1357     2,                  /* ldm_regs_per_insn_subsequent.  */
1358     COSTS_N_INSNS (2),  /* loadf.  */
1359     COSTS_N_INSNS (2),  /* loadd.  */
1360     COSTS_N_INSNS (1),  /* load_unaligned.  */
1361     COSTS_N_INSNS (1),  /* store.  */
1362     COSTS_N_INSNS (3),  /* strd.  */
1363     COSTS_N_INSNS (1),  /* stm_1st.  */
1364     1,                  /* stm_regs_per_insn_1st.  */
1365     2,                  /* stm_regs_per_insn_subsequent.  */
1366     COSTS_N_INSNS (2),  /* storef.  */
1367     COSTS_N_INSNS (2),  /* stored.  */
1368     COSTS_N_INSNS (1)   /* store_unaligned.  */
1369   },
1370   {
1371     /* FP SFmode */
1372     {
1373       COSTS_N_INSNS (15),       /* div.  */
1374       COSTS_N_INSNS (3),        /* mult.  */
1375       COSTS_N_INSNS (7),        /* mult_addsub. */
1376       COSTS_N_INSNS (7),        /* fma.  */
1377       COSTS_N_INSNS (3),        /* addsub.  */
1378       COSTS_N_INSNS (3),        /* fpconst.  */
1379       COSTS_N_INSNS (3),        /* neg.  */
1380       COSTS_N_INSNS (3),        /* compare.  */
1381       COSTS_N_INSNS (3),        /* widen.  */
1382       COSTS_N_INSNS (3),        /* narrow.  */
1383       COSTS_N_INSNS (3),        /* toint.  */
1384       COSTS_N_INSNS (3),        /* fromint.  */
1385       COSTS_N_INSNS (3)         /* roundint.  */
1386     },
1387     /* FP DFmode */
1388     {
1389       COSTS_N_INSNS (30),       /* div.  */
1390       COSTS_N_INSNS (6),        /* mult.  */
1391       COSTS_N_INSNS (10),       /* mult_addsub.  */
1392       COSTS_N_INSNS (7),        /* fma.  */
1393       COSTS_N_INSNS (3),        /* addsub.  */
1394       COSTS_N_INSNS (3),        /* fpconst.  */
1395       COSTS_N_INSNS (3),        /* neg.  */
1396       COSTS_N_INSNS (3),        /* compare.  */
1397       COSTS_N_INSNS (3),        /* widen.  */
1398       COSTS_N_INSNS (3),        /* narrow.  */
1399       COSTS_N_INSNS (3),        /* toint.  */
1400       COSTS_N_INSNS (3),        /* fromint.  */
1401       COSTS_N_INSNS (3)         /* roundint.  */
1402     }
1403   },
1404   /* Vector */
1405   {
1406     COSTS_N_INSNS (1)   /* alu.  */
1407   }
1408 };
1409
1410 const struct cpu_cost_table cortexa12_extra_costs =
1411 {
1412   /* ALU */
1413   {
1414     0,                  /* arith.  */
1415     0,                  /* logical.  */
1416     0,                  /* shift.  */
1417     COSTS_N_INSNS (1),  /* shift_reg.  */
1418     COSTS_N_INSNS (1),  /* arith_shift.  */
1419     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1420     COSTS_N_INSNS (1),  /* log_shift.  */
1421     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1422     0,                  /* extend.  */
1423     COSTS_N_INSNS (1),  /* extend_arith.  */
1424     0,                  /* bfi.  */
1425     COSTS_N_INSNS (1),  /* bfx.  */
1426     COSTS_N_INSNS (1),  /* clz.  */
1427     COSTS_N_INSNS (1),  /* rev.  */
1428     0,                  /* non_exec.  */
1429     true                /* non_exec_costs_exec.  */
1430   },
1431   /* MULT SImode */
1432   {
1433     {
1434       COSTS_N_INSNS (2),        /* simple.  */
1435       COSTS_N_INSNS (3),        /* flag_setting.  */
1436       COSTS_N_INSNS (2),        /* extend.  */
1437       COSTS_N_INSNS (3),        /* add.  */
1438       COSTS_N_INSNS (2),        /* extend_add.  */
1439       COSTS_N_INSNS (18)        /* idiv.  */
1440     },
1441     /* MULT DImode */
1442     {
1443       0,                        /* simple (N/A).  */
1444       0,                        /* flag_setting (N/A).  */
1445       COSTS_N_INSNS (3),        /* extend.  */
1446       0,                        /* add (N/A).  */
1447       COSTS_N_INSNS (3),        /* extend_add.  */
1448       0                         /* idiv (N/A).  */
1449     }
1450   },
1451   /* LD/ST */
1452   {
1453     COSTS_N_INSNS (3),  /* load.  */
1454     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1455     COSTS_N_INSNS (3),  /* ldrd.  */
1456     COSTS_N_INSNS (3),  /* ldm_1st.  */
1457     1,                  /* ldm_regs_per_insn_1st.  */
1458     2,                  /* ldm_regs_per_insn_subsequent.  */
1459     COSTS_N_INSNS (3),  /* loadf.  */
1460     COSTS_N_INSNS (3),  /* loadd.  */
1461     0,                  /* load_unaligned.  */
1462     0,                  /* store.  */
1463     0,                  /* strd.  */
1464     0,                  /* stm_1st.  */
1465     1,                  /* stm_regs_per_insn_1st.  */
1466     2,                  /* stm_regs_per_insn_subsequent.  */
1467     COSTS_N_INSNS (2),  /* storef.  */
1468     COSTS_N_INSNS (2),  /* stored.  */
1469     0                   /* store_unaligned.  */
1470   },
1471   {
1472     /* FP SFmode */
1473     {
1474       COSTS_N_INSNS (17),       /* div.  */
1475       COSTS_N_INSNS (4),        /* mult.  */
1476       COSTS_N_INSNS (8),        /* mult_addsub. */
1477       COSTS_N_INSNS (8),        /* fma.  */
1478       COSTS_N_INSNS (4),        /* addsub.  */
1479       COSTS_N_INSNS (2),        /* fpconst. */
1480       COSTS_N_INSNS (2),        /* neg.  */
1481       COSTS_N_INSNS (2),        /* compare.  */
1482       COSTS_N_INSNS (4),        /* widen.  */
1483       COSTS_N_INSNS (4),        /* narrow.  */
1484       COSTS_N_INSNS (4),        /* toint.  */
1485       COSTS_N_INSNS (4),        /* fromint.  */
1486       COSTS_N_INSNS (4)         /* roundint.  */
1487     },
1488     /* FP DFmode */
1489     {
1490       COSTS_N_INSNS (31),       /* div.  */
1491       COSTS_N_INSNS (4),        /* mult.  */
1492       COSTS_N_INSNS (8),        /* mult_addsub.  */
1493       COSTS_N_INSNS (8),        /* fma.  */
1494       COSTS_N_INSNS (4),        /* addsub.  */
1495       COSTS_N_INSNS (2),        /* fpconst.  */
1496       COSTS_N_INSNS (2),        /* neg.  */
1497       COSTS_N_INSNS (2),        /* compare.  */
1498       COSTS_N_INSNS (4),        /* widen.  */
1499       COSTS_N_INSNS (4),        /* narrow.  */
1500       COSTS_N_INSNS (4),        /* toint.  */
1501       COSTS_N_INSNS (4),        /* fromint.  */
1502       COSTS_N_INSNS (4)         /* roundint.  */
1503     }
1504   },
1505   /* Vector */
1506   {
1507     COSTS_N_INSNS (1)   /* alu.  */
1508   }
1509 };
1510
1511 const struct cpu_cost_table cortexa15_extra_costs =
1512 {
1513   /* ALU */
1514   {
1515     0,                  /* arith.  */
1516     0,                  /* logical.  */
1517     0,                  /* shift.  */
1518     0,                  /* shift_reg.  */
1519     COSTS_N_INSNS (1),  /* arith_shift.  */
1520     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1521     COSTS_N_INSNS (1),  /* log_shift.  */
1522     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1523     0,                  /* extend.  */
1524     COSTS_N_INSNS (1),  /* extend_arith.  */
1525     COSTS_N_INSNS (1),  /* bfi.  */
1526     0,                  /* bfx.  */
1527     0,                  /* clz.  */
1528     0,                  /* rev.  */
1529     0,                  /* non_exec.  */
1530     true                /* non_exec_costs_exec.  */
1531   },
1532   /* MULT SImode */
1533   {
1534     {
1535       COSTS_N_INSNS (2),        /* simple.  */
1536       COSTS_N_INSNS (3),        /* flag_setting.  */
1537       COSTS_N_INSNS (2),        /* extend.  */
1538       COSTS_N_INSNS (2),        /* add.  */
1539       COSTS_N_INSNS (2),        /* extend_add.  */
1540       COSTS_N_INSNS (18)        /* idiv.  */
1541     },
1542     /* MULT DImode */
1543     {
1544       0,                        /* simple (N/A).  */
1545       0,                        /* flag_setting (N/A).  */
1546       COSTS_N_INSNS (3),        /* extend.  */
1547       0,                        /* add (N/A).  */
1548       COSTS_N_INSNS (3),        /* extend_add.  */
1549       0                         /* idiv (N/A).  */
1550     }
1551   },
1552   /* LD/ST */
1553   {
1554     COSTS_N_INSNS (3),  /* load.  */
1555     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1556     COSTS_N_INSNS (3),  /* ldrd.  */
1557     COSTS_N_INSNS (4),  /* ldm_1st.  */
1558     1,                  /* ldm_regs_per_insn_1st.  */
1559     2,                  /* ldm_regs_per_insn_subsequent.  */
1560     COSTS_N_INSNS (4),  /* loadf.  */
1561     COSTS_N_INSNS (4),  /* loadd.  */
1562     0,                  /* load_unaligned.  */
1563     0,                  /* store.  */
1564     0,                  /* strd.  */
1565     COSTS_N_INSNS (1),  /* stm_1st.  */
1566     1,                  /* stm_regs_per_insn_1st.  */
1567     2,                  /* stm_regs_per_insn_subsequent.  */
1568     0,                  /* storef.  */
1569     0,                  /* stored.  */
1570     0                   /* store_unaligned.  */
1571   },
1572   {
1573     /* FP SFmode */
1574     {
1575       COSTS_N_INSNS (17),       /* div.  */
1576       COSTS_N_INSNS (4),        /* mult.  */
1577       COSTS_N_INSNS (8),        /* mult_addsub. */
1578       COSTS_N_INSNS (8),        /* fma.  */
1579       COSTS_N_INSNS (4),        /* addsub.  */
1580       COSTS_N_INSNS (2),        /* fpconst. */
1581       COSTS_N_INSNS (2),        /* neg.  */
1582       COSTS_N_INSNS (5),        /* compare.  */
1583       COSTS_N_INSNS (4),        /* widen.  */
1584       COSTS_N_INSNS (4),        /* narrow.  */
1585       COSTS_N_INSNS (4),        /* toint.  */
1586       COSTS_N_INSNS (4),        /* fromint.  */
1587       COSTS_N_INSNS (4)         /* roundint.  */
1588     },
1589     /* FP DFmode */
1590     {
1591       COSTS_N_INSNS (31),       /* div.  */
1592       COSTS_N_INSNS (4),        /* mult.  */
1593       COSTS_N_INSNS (8),        /* mult_addsub.  */
1594       COSTS_N_INSNS (8),        /* fma.  */
1595       COSTS_N_INSNS (4),        /* addsub.  */
1596       COSTS_N_INSNS (2),        /* fpconst.  */
1597       COSTS_N_INSNS (2),        /* neg.  */
1598       COSTS_N_INSNS (2),        /* compare.  */
1599       COSTS_N_INSNS (4),        /* widen.  */
1600       COSTS_N_INSNS (4),        /* narrow.  */
1601       COSTS_N_INSNS (4),        /* toint.  */
1602       COSTS_N_INSNS (4),        /* fromint.  */
1603       COSTS_N_INSNS (4)         /* roundint.  */
1604     }
1605   },
1606   /* Vector */
1607   {
1608     COSTS_N_INSNS (1)   /* alu.  */
1609   }
1610 };
1611
1612 const struct cpu_cost_table v7m_extra_costs =
1613 {
1614   /* ALU */
1615   {
1616     0,                  /* arith.  */
1617     0,                  /* logical.  */
1618     0,                  /* shift.  */
1619     0,                  /* shift_reg.  */
1620     0,                  /* arith_shift.  */
1621     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1622     0,                  /* log_shift.  */
1623     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1624     0,                  /* extend.  */
1625     COSTS_N_INSNS (1),  /* extend_arith.  */
1626     0,                  /* bfi.  */
1627     0,                  /* bfx.  */
1628     0,                  /* clz.  */
1629     0,                  /* rev.  */
1630     COSTS_N_INSNS (1),  /* non_exec.  */
1631     false               /* non_exec_costs_exec.  */
1632   },
1633   {
1634     /* MULT SImode */
1635     {
1636       COSTS_N_INSNS (1),        /* simple.  */
1637       COSTS_N_INSNS (1),        /* flag_setting.  */
1638       COSTS_N_INSNS (2),        /* extend.  */
1639       COSTS_N_INSNS (1),        /* add.  */
1640       COSTS_N_INSNS (3),        /* extend_add.  */
1641       COSTS_N_INSNS (8)         /* idiv.  */
1642     },
1643     /* MULT DImode */
1644     {
1645       0,                        /* simple (N/A).  */
1646       0,                        /* flag_setting (N/A).  */
1647       COSTS_N_INSNS (2),        /* extend.  */
1648       0,                        /* add (N/A).  */
1649       COSTS_N_INSNS (3),        /* extend_add.  */
1650       0                         /* idiv (N/A).  */
1651     }
1652   },
1653   /* LD/ST */
1654   {
1655     COSTS_N_INSNS (2),  /* load.  */
1656     0,                  /* load_sign_extend.  */
1657     COSTS_N_INSNS (3),  /* ldrd.  */
1658     COSTS_N_INSNS (2),  /* ldm_1st.  */
1659     1,                  /* ldm_regs_per_insn_1st.  */
1660     1,                  /* ldm_regs_per_insn_subsequent.  */
1661     COSTS_N_INSNS (2),  /* loadf.  */
1662     COSTS_N_INSNS (3),  /* loadd.  */
1663     COSTS_N_INSNS (1),  /* load_unaligned.  */
1664     COSTS_N_INSNS (2),  /* store.  */
1665     COSTS_N_INSNS (3),  /* strd.  */
1666     COSTS_N_INSNS (2),  /* stm_1st.  */
1667     1,                  /* stm_regs_per_insn_1st.  */
1668     1,                  /* stm_regs_per_insn_subsequent.  */
1669     COSTS_N_INSNS (2),  /* storef.  */
1670     COSTS_N_INSNS (3),  /* stored.  */
1671     COSTS_N_INSNS (1)  /* store_unaligned.  */
1672   },
1673   {
1674     /* FP SFmode */
1675     {
1676       COSTS_N_INSNS (7),        /* div.  */
1677       COSTS_N_INSNS (2),        /* mult.  */
1678       COSTS_N_INSNS (5),        /* mult_addsub.  */
1679       COSTS_N_INSNS (3),        /* fma.  */
1680       COSTS_N_INSNS (1),        /* addsub.  */
1681       0,                        /* fpconst.  */
1682       0,                        /* neg.  */
1683       0,                        /* compare.  */
1684       0,                        /* widen.  */
1685       0,                        /* narrow.  */
1686       0,                        /* toint.  */
1687       0,                        /* fromint.  */
1688       0                         /* roundint.  */
1689     },
1690     /* FP DFmode */
1691     {
1692       COSTS_N_INSNS (15),       /* div.  */
1693       COSTS_N_INSNS (5),        /* mult.  */
1694       COSTS_N_INSNS (7),        /* mult_addsub.  */
1695       COSTS_N_INSNS (7),        /* fma.  */
1696       COSTS_N_INSNS (3),        /* addsub.  */
1697       0,                        /* fpconst.  */
1698       0,                        /* neg.  */
1699       0,                        /* compare.  */
1700       0,                        /* widen.  */
1701       0,                        /* narrow.  */
1702       0,                        /* toint.  */
1703       0,                        /* fromint.  */
1704       0                         /* roundint.  */
1705     }
1706   },
1707   /* Vector */
1708   {
1709     COSTS_N_INSNS (1)   /* alu.  */
1710   }
1711 };
1712
1713 const struct tune_params arm_slowmul_tune =
1714 {
1715   arm_slowmul_rtx_costs,
1716   NULL,
1717   NULL,                                         /* Sched adj cost.  */
1718   3,                                            /* Constant limit.  */
1719   5,                                            /* Max cond insns.  */
1720   ARM_PREFETCH_NOT_BENEFICIAL,
1721   true,                                         /* Prefer constant pool.  */
1722   arm_default_branch_cost,
1723   false,                                        /* Prefer LDRD/STRD.  */
1724   {true, true},                                 /* Prefer non short circuit.  */
1725   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1726   false,                                        /* Prefer Neon for 64-bits bitops.  */
1727   false, false,                                 /* Prefer 32-bit encodings.  */
1728   false,                                        /* Prefer Neon for stringops.  */
1729   8                                             /* Maximum insns to inline memset.  */
1730 };
1731
1732 const struct tune_params arm_fastmul_tune =
1733 {
1734   arm_fastmul_rtx_costs,
1735   NULL,
1736   NULL,                                         /* Sched adj cost.  */
1737   1,                                            /* Constant limit.  */
1738   5,                                            /* Max cond insns.  */
1739   ARM_PREFETCH_NOT_BENEFICIAL,
1740   true,                                         /* Prefer constant pool.  */
1741   arm_default_branch_cost,
1742   false,                                        /* Prefer LDRD/STRD.  */
1743   {true, true},                                 /* Prefer non short circuit.  */
1744   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1745   false,                                        /* Prefer Neon for 64-bits bitops.  */
1746   false, false,                                 /* Prefer 32-bit encodings.  */
1747   false,                                        /* Prefer Neon for stringops.  */
1748   8                                             /* Maximum insns to inline memset.  */
1749 };
1750
1751 /* StrongARM has early execution of branches, so a sequence that is worth
1752    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1753
1754 const struct tune_params arm_strongarm_tune =
1755 {
1756   arm_fastmul_rtx_costs,
1757   NULL,
1758   NULL,                                         /* Sched adj cost.  */
1759   1,                                            /* Constant limit.  */
1760   3,                                            /* Max cond insns.  */
1761   ARM_PREFETCH_NOT_BENEFICIAL,
1762   true,                                         /* Prefer constant pool.  */
1763   arm_default_branch_cost,
1764   false,                                        /* Prefer LDRD/STRD.  */
1765   {true, true},                                 /* Prefer non short circuit.  */
1766   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1767   false,                                        /* Prefer Neon for 64-bits bitops.  */
1768   false, false,                                 /* Prefer 32-bit encodings.  */
1769   false,                                        /* Prefer Neon for stringops.  */
1770   8                                             /* Maximum insns to inline memset.  */
1771 };
1772
1773 const struct tune_params arm_xscale_tune =
1774 {
1775   arm_xscale_rtx_costs,
1776   NULL,
1777   xscale_sched_adjust_cost,
1778   2,                                            /* Constant limit.  */
1779   3,                                            /* Max cond insns.  */
1780   ARM_PREFETCH_NOT_BENEFICIAL,
1781   true,                                         /* Prefer constant pool.  */
1782   arm_default_branch_cost,
1783   false,                                        /* Prefer LDRD/STRD.  */
1784   {true, true},                                 /* Prefer non short circuit.  */
1785   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1786   false,                                        /* Prefer Neon for 64-bits bitops.  */
1787   false, false,                                 /* Prefer 32-bit encodings.  */
1788   false,                                        /* Prefer Neon for stringops.  */
1789   8                                             /* Maximum insns to inline memset.  */
1790 };
1791
1792 const struct tune_params arm_9e_tune =
1793 {
1794   arm_9e_rtx_costs,
1795   NULL,
1796   NULL,                                         /* Sched adj cost.  */
1797   1,                                            /* Constant limit.  */
1798   5,                                            /* Max cond insns.  */
1799   ARM_PREFETCH_NOT_BENEFICIAL,
1800   true,                                         /* Prefer constant pool.  */
1801   arm_default_branch_cost,
1802   false,                                        /* Prefer LDRD/STRD.  */
1803   {true, true},                                 /* Prefer non short circuit.  */
1804   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1805   false,                                        /* Prefer Neon for 64-bits bitops.  */
1806   false, false,                                 /* Prefer 32-bit encodings.  */
1807   false,                                        /* Prefer Neon for stringops.  */
1808   8                                             /* Maximum insns to inline memset.  */
1809 };
1810
1811 const struct tune_params arm_v6t2_tune =
1812 {
1813   arm_9e_rtx_costs,
1814   NULL,
1815   NULL,                                         /* Sched adj cost.  */
1816   1,                                            /* Constant limit.  */
1817   5,                                            /* Max cond insns.  */
1818   ARM_PREFETCH_NOT_BENEFICIAL,
1819   false,                                        /* Prefer constant pool.  */
1820   arm_default_branch_cost,
1821   false,                                        /* Prefer LDRD/STRD.  */
1822   {true, true},                                 /* Prefer non short circuit.  */
1823   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1824   false,                                        /* Prefer Neon for 64-bits bitops.  */
1825   false, false,                                 /* Prefer 32-bit encodings.  */
1826   false,                                        /* Prefer Neon for stringops.  */
1827   8                                             /* Maximum insns to inline memset.  */
1828 };
1829
1830 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1831 const struct tune_params arm_cortex_tune =
1832 {
1833   arm_9e_rtx_costs,
1834   &generic_extra_costs,
1835   NULL,                                         /* Sched adj cost.  */
1836   1,                                            /* Constant limit.  */
1837   5,                                            /* Max cond insns.  */
1838   ARM_PREFETCH_NOT_BENEFICIAL,
1839   false,                                        /* Prefer constant pool.  */
1840   arm_default_branch_cost,
1841   false,                                        /* Prefer LDRD/STRD.  */
1842   {true, true},                                 /* Prefer non short circuit.  */
1843   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1844   false,                                        /* Prefer Neon for 64-bits bitops.  */
1845   false, false,                                 /* Prefer 32-bit encodings.  */
1846   false,                                        /* Prefer Neon for stringops.  */
1847   8                                             /* Maximum insns to inline memset.  */
1848 };
1849
1850 const struct tune_params arm_cortex_a8_tune =
1851 {
1852   arm_9e_rtx_costs,
1853   &cortexa8_extra_costs,
1854   NULL,                                         /* Sched adj cost.  */
1855   1,                                            /* Constant limit.  */
1856   5,                                            /* Max cond insns.  */
1857   ARM_PREFETCH_NOT_BENEFICIAL,
1858   false,                                        /* Prefer constant pool.  */
1859   arm_default_branch_cost,
1860   false,                                        /* Prefer LDRD/STRD.  */
1861   {true, true},                                 /* Prefer non short circuit.  */
1862   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1863   false,                                        /* Prefer Neon for 64-bits bitops.  */
1864   false, false,                                 /* Prefer 32-bit encodings.  */
1865   true,                                         /* Prefer Neon for stringops.  */
1866   8                                             /* Maximum insns to inline memset.  */
1867 };
1868
1869 const struct tune_params arm_cortex_a7_tune =
1870 {
1871   arm_9e_rtx_costs,
1872   &cortexa7_extra_costs,
1873   NULL,
1874   1,                                            /* Constant limit.  */
1875   5,                                            /* Max cond insns.  */
1876   ARM_PREFETCH_NOT_BENEFICIAL,
1877   false,                                        /* Prefer constant pool.  */
1878   arm_default_branch_cost,
1879   false,                                        /* Prefer LDRD/STRD.  */
1880   {true, true},                                 /* Prefer non short circuit.  */
1881   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1882   false,                                        /* Prefer Neon for 64-bits bitops.  */
1883   false, false,                                 /* Prefer 32-bit encodings.  */
1884   true,                                         /* Prefer Neon for stringops.  */
1885   8                                             /* Maximum insns to inline memset.  */
1886 };
1887
1888 const struct tune_params arm_cortex_a15_tune =
1889 {
1890   arm_9e_rtx_costs,
1891   &cortexa15_extra_costs,
1892   NULL,                                         /* Sched adj cost.  */
1893   1,                                            /* Constant limit.  */
1894   2,                                            /* Max cond insns.  */
1895   ARM_PREFETCH_NOT_BENEFICIAL,
1896   false,                                        /* Prefer constant pool.  */
1897   arm_default_branch_cost,
1898   true,                                         /* Prefer LDRD/STRD.  */
1899   {true, true},                                 /* Prefer non short circuit.  */
1900   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1901   false,                                        /* Prefer Neon for 64-bits bitops.  */
1902   true, true,                                   /* Prefer 32-bit encodings.  */
1903   true,                                         /* Prefer Neon for stringops.  */
1904   8                                             /* Maximum insns to inline memset.  */
1905 };
1906
1907 const struct tune_params arm_cortex_a53_tune =
1908 {
1909   arm_9e_rtx_costs,
1910   &cortexa53_extra_costs,
1911   NULL,                                         /* Scheduler cost adjustment.  */
1912   1,                                            /* Constant limit.  */
1913   5,                                            /* Max cond insns.  */
1914   ARM_PREFETCH_NOT_BENEFICIAL,
1915   false,                                        /* Prefer constant pool.  */
1916   arm_default_branch_cost,
1917   false,                                        /* Prefer LDRD/STRD.  */
1918   {true, true},                                 /* Prefer non short circuit.  */
1919   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1920   false,                                        /* Prefer Neon for 64-bits bitops.  */
1921   false, false,                                 /* Prefer 32-bit encodings.  */
1922   false,                                        /* Prefer Neon for stringops.  */
1923   8                                             /* Maximum insns to inline memset.  */
1924 };
1925
1926 const struct tune_params arm_cortex_a57_tune =
1927 {
1928   arm_9e_rtx_costs,
1929   &cortexa57_extra_costs,
1930   NULL,                                         /* Scheduler cost adjustment.  */
1931   1,                                           /* Constant limit.  */
1932   2,                                           /* Max cond insns.  */
1933   ARM_PREFETCH_NOT_BENEFICIAL,
1934   false,                                       /* Prefer constant pool.  */
1935   arm_default_branch_cost,
1936   true,                                       /* Prefer LDRD/STRD.  */
1937   {true, true},                                /* Prefer non short circuit.  */
1938   &arm_default_vec_cost,                       /* Vectorizer costs.  */
1939   false,                                       /* Prefer Neon for 64-bits bitops.  */
1940   true, true,                                  /* Prefer 32-bit encodings.  */
1941   false,                                        /* Prefer Neon for stringops.  */
1942   8                                             /* Maximum insns to inline memset.  */
1943 };
1944
1945 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1946    less appealing.  Set max_insns_skipped to a low value.  */
1947
1948 const struct tune_params arm_cortex_a5_tune =
1949 {
1950   arm_9e_rtx_costs,
1951   &cortexa5_extra_costs,
1952   NULL,                                         /* Sched adj cost.  */
1953   1,                                            /* Constant limit.  */
1954   1,                                            /* Max cond insns.  */
1955   ARM_PREFETCH_NOT_BENEFICIAL,
1956   false,                                        /* Prefer constant pool.  */
1957   arm_cortex_a5_branch_cost,
1958   false,                                        /* Prefer LDRD/STRD.  */
1959   {false, false},                               /* Prefer non short circuit.  */
1960   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1961   false,                                        /* Prefer Neon for 64-bits bitops.  */
1962   false, false,                                 /* Prefer 32-bit encodings.  */
1963   true,                                         /* Prefer Neon for stringops.  */
1964   8                                             /* Maximum insns to inline memset.  */
1965 };
1966
1967 const struct tune_params arm_cortex_a9_tune =
1968 {
1969   arm_9e_rtx_costs,
1970   &cortexa9_extra_costs,
1971   cortex_a9_sched_adjust_cost,
1972   1,                                            /* Constant limit.  */
1973   5,                                            /* Max cond insns.  */
1974   ARM_PREFETCH_BENEFICIAL(4,32,32),
1975   false,                                        /* Prefer constant pool.  */
1976   arm_default_branch_cost,
1977   false,                                        /* Prefer LDRD/STRD.  */
1978   {true, true},                                 /* Prefer non short circuit.  */
1979   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1980   false,                                        /* Prefer Neon for 64-bits bitops.  */
1981   false, false,                                 /* Prefer 32-bit encodings.  */
1982   false,                                        /* Prefer Neon for stringops.  */
1983   8                                             /* Maximum insns to inline memset.  */
1984 };
1985
1986 const struct tune_params arm_cortex_a12_tune =
1987 {
1988   arm_9e_rtx_costs,
1989   &cortexa12_extra_costs,
1990   NULL,
1991   1,                                            /* Constant limit.  */
1992   5,                                            /* Max cond insns.  */
1993   ARM_PREFETCH_BENEFICIAL(4,32,32),
1994   false,                                        /* Prefer constant pool.  */
1995   arm_default_branch_cost,
1996   true,                                         /* Prefer LDRD/STRD.  */
1997   {true, true},                                 /* Prefer non short circuit.  */
1998   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1999   false,                                        /* Prefer Neon for 64-bits bitops.  */
2000   false, false,                                 /* Prefer 32-bit encodings.  */
2001   true,                                         /* Prefer Neon for stringops.  */
2002   8                                             /* Maximum insns to inline memset.  */
2003 };
2004
2005 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2006    cycle to execute each.  An LDR from the constant pool also takes two cycles
2007    to execute, but mildly increases pipelining opportunity (consecutive
2008    loads/stores can be pipelined together, saving one cycle), and may also
2009    improve icache utilisation.  Hence we prefer the constant pool for such
2010    processors.  */
2011
2012 const struct tune_params arm_v7m_tune =
2013 {
2014   arm_9e_rtx_costs,
2015   &v7m_extra_costs,
2016   NULL,                                         /* Sched adj cost.  */
2017   1,                                            /* Constant limit.  */
2018   2,                                            /* Max cond insns.  */
2019   ARM_PREFETCH_NOT_BENEFICIAL,
2020   true,                                         /* Prefer constant pool.  */
2021   arm_cortex_m_branch_cost,
2022   false,                                        /* Prefer LDRD/STRD.  */
2023   {false, false},                               /* Prefer non short circuit.  */
2024   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2025   false,                                        /* Prefer Neon for 64-bits bitops.  */
2026   false, false,                                 /* Prefer 32-bit encodings.  */
2027   false,                                        /* Prefer Neon for stringops.  */
2028   8                                             /* Maximum insns to inline memset.  */
2029 };
2030
2031 /* Cortex-M7 tuning.  */
2032
2033 const struct tune_params arm_cortex_m7_tune =
2034 {
2035   arm_9e_rtx_costs,
2036   &v7m_extra_costs,
2037   NULL,                                         /* Sched adj cost.  */
2038   0,                                            /* Constant limit.  */
2039   0,                                            /* Max cond insns.  */
2040   ARM_PREFETCH_NOT_BENEFICIAL,
2041   true,                                         /* Prefer constant pool.  */
2042   arm_cortex_m_branch_cost,
2043   false,                                        /* Prefer LDRD/STRD.  */
2044   {true, true},                                 /* Prefer non short circuit.  */
2045   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2046   false,                                        /* Prefer Neon for 64-bits bitops.  */
2047   false, false,                                 /* Prefer 32-bit encodings.  */
2048   false,                                        /* Prefer Neon for stringops.  */
2049   8                                             /* Maximum insns to inline memset.  */
2050 };
2051
2052 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2053    arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
2054 const struct tune_params arm_v6m_tune =
2055 {
2056   arm_9e_rtx_costs,
2057   NULL,
2058   NULL,                                         /* Sched adj cost.  */
2059   1,                                            /* Constant limit.  */
2060   5,                                            /* Max cond insns.  */
2061   ARM_PREFETCH_NOT_BENEFICIAL,
2062   false,                                        /* Prefer constant pool.  */
2063   arm_default_branch_cost,
2064   false,                                        /* Prefer LDRD/STRD.  */
2065   {false, false},                               /* Prefer non short circuit.  */
2066   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2067   false,                                        /* Prefer Neon for 64-bits bitops.  */
2068   false, false,                                 /* Prefer 32-bit encodings.  */
2069   false,                                        /* Prefer Neon for stringops.  */
2070   8                                             /* Maximum insns to inline memset.  */
2071 };
2072
2073 const struct tune_params arm_fa726te_tune =
2074 {
2075   arm_9e_rtx_costs,
2076   NULL,
2077   fa726te_sched_adjust_cost,
2078   1,                                            /* Constant limit.  */
2079   5,                                            /* Max cond insns.  */
2080   ARM_PREFETCH_NOT_BENEFICIAL,
2081   true,                                         /* Prefer constant pool.  */
2082   arm_default_branch_cost,
2083   false,                                        /* Prefer LDRD/STRD.  */
2084   {true, true},                                 /* Prefer non short circuit.  */
2085   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2086   false,                                        /* Prefer Neon for 64-bits bitops.  */
2087   false, false,                                 /* Prefer 32-bit encodings.  */
2088   false,                                        /* Prefer Neon for stringops.  */
2089   8                                             /* Maximum insns to inline memset.  */
2090 };
2091
2092
2093 /* Not all of these give usefully different compilation alternatives,
2094    but there is no simple way of generalizing them.  */
2095 static const struct processors all_cores[] =
2096 {
2097   /* ARM Cores */
2098 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2099   {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH,          \
2100     FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2101 #include "arm-cores.def"
2102 #undef ARM_CORE
2103   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2104 };
2105
2106 static const struct processors all_architectures[] =
2107 {
2108   /* ARM Architectures */
2109   /* We don't specify tuning costs here as it will be figured out
2110      from the core.  */
2111
2112 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2113   {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2114 #include "arm-arches.def"
2115 #undef ARM_ARCH
2116   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2117 };
2118
2119
2120 /* These are populated as commandline arguments are processed, or NULL
2121    if not specified.  */
2122 static const struct processors *arm_selected_arch;
2123 static const struct processors *arm_selected_cpu;
2124 static const struct processors *arm_selected_tune;
2125
2126 /* The name of the preprocessor macro to define for this architecture.  */
2127
2128 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2129
2130 /* Available values for -mfpu=.  */
2131
2132 static const struct arm_fpu_desc all_fpus[] =
2133 {
2134 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2135   { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2136 #include "arm-fpus.def"
2137 #undef ARM_FPU
2138 };
2139
2140
2141 /* Supported TLS relocations.  */
2142
2143 enum tls_reloc {
2144   TLS_GD32,
2145   TLS_LDM32,
2146   TLS_LDO32,
2147   TLS_IE32,
2148   TLS_LE32,
2149   TLS_DESCSEQ   /* GNU scheme */
2150 };
2151
2152 /* The maximum number of insns to be used when loading a constant.  */
2153 inline static int
2154 arm_constant_limit (bool size_p)
2155 {
2156   return size_p ? 1 : current_tune->constant_limit;
2157 }
2158
2159 /* Emit an insn that's a simple single-set.  Both the operands must be known
2160    to be valid.  */
2161 inline static rtx_insn *
2162 emit_set_insn (rtx x, rtx y)
2163 {
2164   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2165 }
2166
2167 /* Return the number of bits set in VALUE.  */
2168 static unsigned
2169 bit_count (unsigned long value)
2170 {
2171   unsigned long count = 0;
2172
2173   while (value)
2174     {
2175       count++;
2176       value &= value - 1;  /* Clear the least-significant set bit.  */
2177     }
2178
2179   return count;
2180 }
2181
2182 typedef struct
2183 {
2184   machine_mode mode;
2185   const char *name;
2186 } arm_fixed_mode_set;
2187
2188 /* A small helper for setting fixed-point library libfuncs.  */
2189
2190 static void
2191 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2192                              const char *funcname, const char *modename,
2193                              int num_suffix)
2194 {
2195   char buffer[50];
2196
2197   if (num_suffix == 0)
2198     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2199   else
2200     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2201
2202   set_optab_libfunc (optable, mode, buffer);
2203 }
2204
2205 static void
2206 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2207                             machine_mode from, const char *funcname,
2208                             const char *toname, const char *fromname)
2209 {
2210   char buffer[50];
2211   const char *maybe_suffix_2 = "";
2212
2213   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2214   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2215       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2216       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2217     maybe_suffix_2 = "2";
2218
2219   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2220            maybe_suffix_2);
2221
2222   set_conv_libfunc (optable, to, from, buffer);
2223 }
2224
2225 /* Set up library functions unique to ARM.  */
2226
2227 static void
2228 arm_init_libfuncs (void)
2229 {
2230   /* For Linux, we have access to kernel support for atomic operations.  */
2231   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2232     init_sync_libfuncs (2 * UNITS_PER_WORD);
2233
2234   /* There are no special library functions unless we are using the
2235      ARM BPABI.  */
2236   if (!TARGET_BPABI)
2237     return;
2238
2239   /* The functions below are described in Section 4 of the "Run-Time
2240      ABI for the ARM architecture", Version 1.0.  */
2241
2242   /* Double-precision floating-point arithmetic.  Table 2.  */
2243   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2244   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2245   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2246   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2247   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2248
2249   /* Double-precision comparisons.  Table 3.  */
2250   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2251   set_optab_libfunc (ne_optab, DFmode, NULL);
2252   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2253   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2254   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2255   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2256   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2257
2258   /* Single-precision floating-point arithmetic.  Table 4.  */
2259   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2260   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2261   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2262   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2263   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2264
2265   /* Single-precision comparisons.  Table 5.  */
2266   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2267   set_optab_libfunc (ne_optab, SFmode, NULL);
2268   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2269   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2270   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2271   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2272   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2273
2274   /* Floating-point to integer conversions.  Table 6.  */
2275   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2276   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2277   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2278   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2279   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2280   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2281   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2282   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2283
2284   /* Conversions between floating types.  Table 7.  */
2285   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2286   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2287
2288   /* Integer to floating-point conversions.  Table 8.  */
2289   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2290   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2291   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2292   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2293   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2294   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2295   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2296   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2297
2298   /* Long long.  Table 9.  */
2299   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2300   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2301   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2302   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2303   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2304   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2305   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2306   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2307
2308   /* Integer (32/32->32) division.  \S 4.3.1.  */
2309   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2310   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2311
2312   /* The divmod functions are designed so that they can be used for
2313      plain division, even though they return both the quotient and the
2314      remainder.  The quotient is returned in the usual location (i.e.,
2315      r0 for SImode, {r0, r1} for DImode), just as would be expected
2316      for an ordinary division routine.  Because the AAPCS calling
2317      conventions specify that all of { r0, r1, r2, r3 } are
2318      callee-saved registers, there is no need to tell the compiler
2319      explicitly that those registers are clobbered by these
2320      routines.  */
2321   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2322   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2323
2324   /* For SImode division the ABI provides div-without-mod routines,
2325      which are faster.  */
2326   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2327   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2328
2329   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2330      divmod libcalls instead.  */
2331   set_optab_libfunc (smod_optab, DImode, NULL);
2332   set_optab_libfunc (umod_optab, DImode, NULL);
2333   set_optab_libfunc (smod_optab, SImode, NULL);
2334   set_optab_libfunc (umod_optab, SImode, NULL);
2335
2336   /* Half-precision float operations.  The compiler handles all operations
2337      with NULL libfuncs by converting the SFmode.  */
2338   switch (arm_fp16_format)
2339     {
2340     case ARM_FP16_FORMAT_IEEE:
2341     case ARM_FP16_FORMAT_ALTERNATIVE:
2342
2343       /* Conversions.  */
2344       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2345                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2346                          ? "__gnu_f2h_ieee"
2347                          : "__gnu_f2h_alternative"));
2348       set_conv_libfunc (sext_optab, SFmode, HFmode,
2349                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2350                          ? "__gnu_h2f_ieee"
2351                          : "__gnu_h2f_alternative"));
2352
2353       /* Arithmetic.  */
2354       set_optab_libfunc (add_optab, HFmode, NULL);
2355       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2356       set_optab_libfunc (smul_optab, HFmode, NULL);
2357       set_optab_libfunc (neg_optab, HFmode, NULL);
2358       set_optab_libfunc (sub_optab, HFmode, NULL);
2359
2360       /* Comparisons.  */
2361       set_optab_libfunc (eq_optab, HFmode, NULL);
2362       set_optab_libfunc (ne_optab, HFmode, NULL);
2363       set_optab_libfunc (lt_optab, HFmode, NULL);
2364       set_optab_libfunc (le_optab, HFmode, NULL);
2365       set_optab_libfunc (ge_optab, HFmode, NULL);
2366       set_optab_libfunc (gt_optab, HFmode, NULL);
2367       set_optab_libfunc (unord_optab, HFmode, NULL);
2368       break;
2369
2370     default:
2371       break;
2372     }
2373
2374   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2375   {
2376     const arm_fixed_mode_set fixed_arith_modes[] =
2377       {
2378         { QQmode, "qq" },
2379         { UQQmode, "uqq" },
2380         { HQmode, "hq" },
2381         { UHQmode, "uhq" },
2382         { SQmode, "sq" },
2383         { USQmode, "usq" },
2384         { DQmode, "dq" },
2385         { UDQmode, "udq" },
2386         { TQmode, "tq" },
2387         { UTQmode, "utq" },
2388         { HAmode, "ha" },
2389         { UHAmode, "uha" },
2390         { SAmode, "sa" },
2391         { USAmode, "usa" },
2392         { DAmode, "da" },
2393         { UDAmode, "uda" },
2394         { TAmode, "ta" },
2395         { UTAmode, "uta" }
2396       };
2397     const arm_fixed_mode_set fixed_conv_modes[] =
2398       {
2399         { QQmode, "qq" },
2400         { UQQmode, "uqq" },
2401         { HQmode, "hq" },
2402         { UHQmode, "uhq" },
2403         { SQmode, "sq" },
2404         { USQmode, "usq" },
2405         { DQmode, "dq" },
2406         { UDQmode, "udq" },
2407         { TQmode, "tq" },
2408         { UTQmode, "utq" },
2409         { HAmode, "ha" },
2410         { UHAmode, "uha" },
2411         { SAmode, "sa" },
2412         { USAmode, "usa" },
2413         { DAmode, "da" },
2414         { UDAmode, "uda" },
2415         { TAmode, "ta" },
2416         { UTAmode, "uta" },
2417         { QImode, "qi" },
2418         { HImode, "hi" },
2419         { SImode, "si" },
2420         { DImode, "di" },
2421         { TImode, "ti" },
2422         { SFmode, "sf" },
2423         { DFmode, "df" }
2424       };
2425     unsigned int i, j;
2426
2427     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2428       {
2429         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2430                                      "add", fixed_arith_modes[i].name, 3);
2431         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2432                                      "ssadd", fixed_arith_modes[i].name, 3);
2433         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2434                                      "usadd", fixed_arith_modes[i].name, 3);
2435         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2436                                      "sub", fixed_arith_modes[i].name, 3);
2437         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2438                                      "sssub", fixed_arith_modes[i].name, 3);
2439         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2440                                      "ussub", fixed_arith_modes[i].name, 3);
2441         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2442                                      "mul", fixed_arith_modes[i].name, 3);
2443         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2444                                      "ssmul", fixed_arith_modes[i].name, 3);
2445         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2446                                      "usmul", fixed_arith_modes[i].name, 3);
2447         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2448                                      "div", fixed_arith_modes[i].name, 3);
2449         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2450                                      "udiv", fixed_arith_modes[i].name, 3);
2451         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2452                                      "ssdiv", fixed_arith_modes[i].name, 3);
2453         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2454                                      "usdiv", fixed_arith_modes[i].name, 3);
2455         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2456                                      "neg", fixed_arith_modes[i].name, 2);
2457         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2458                                      "ssneg", fixed_arith_modes[i].name, 2);
2459         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2460                                      "usneg", fixed_arith_modes[i].name, 2);
2461         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2462                                      "ashl", fixed_arith_modes[i].name, 3);
2463         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2464                                      "ashr", fixed_arith_modes[i].name, 3);
2465         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2466                                      "lshr", fixed_arith_modes[i].name, 3);
2467         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2468                                      "ssashl", fixed_arith_modes[i].name, 3);
2469         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2470                                      "usashl", fixed_arith_modes[i].name, 3);
2471         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2472                                      "cmp", fixed_arith_modes[i].name, 2);
2473       }
2474
2475     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2476       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2477         {
2478           if (i == j
2479               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2480                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2481             continue;
2482
2483           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2484                                       fixed_conv_modes[j].mode, "fract",
2485                                       fixed_conv_modes[i].name,
2486                                       fixed_conv_modes[j].name);
2487           arm_set_fixed_conv_libfunc (satfract_optab,
2488                                       fixed_conv_modes[i].mode,
2489                                       fixed_conv_modes[j].mode, "satfract",
2490                                       fixed_conv_modes[i].name,
2491                                       fixed_conv_modes[j].name);
2492           arm_set_fixed_conv_libfunc (fractuns_optab,
2493                                       fixed_conv_modes[i].mode,
2494                                       fixed_conv_modes[j].mode, "fractuns",
2495                                       fixed_conv_modes[i].name,
2496                                       fixed_conv_modes[j].name);
2497           arm_set_fixed_conv_libfunc (satfractuns_optab,
2498                                       fixed_conv_modes[i].mode,
2499                                       fixed_conv_modes[j].mode, "satfractuns",
2500                                       fixed_conv_modes[i].name,
2501                                       fixed_conv_modes[j].name);
2502         }
2503   }
2504
2505   if (TARGET_AAPCS_BASED)
2506     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2507 }
2508
2509 /* On AAPCS systems, this is the "struct __va_list".  */
2510 static GTY(()) tree va_list_type;
2511
2512 /* Return the type to use as __builtin_va_list.  */
2513 static tree
2514 arm_build_builtin_va_list (void)
2515 {
2516   tree va_list_name;
2517   tree ap_field;
2518
2519   if (!TARGET_AAPCS_BASED)
2520     return std_build_builtin_va_list ();
2521
2522   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2523      defined as:
2524
2525        struct __va_list
2526        {
2527          void *__ap;
2528        };
2529
2530      The C Library ABI further reinforces this definition in \S
2531      4.1.
2532
2533      We must follow this definition exactly.  The structure tag
2534      name is visible in C++ mangled names, and thus forms a part
2535      of the ABI.  The field name may be used by people who
2536      #include <stdarg.h>.  */
2537   /* Create the type.  */
2538   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2539   /* Give it the required name.  */
2540   va_list_name = build_decl (BUILTINS_LOCATION,
2541                              TYPE_DECL,
2542                              get_identifier ("__va_list"),
2543                              va_list_type);
2544   DECL_ARTIFICIAL (va_list_name) = 1;
2545   TYPE_NAME (va_list_type) = va_list_name;
2546   TYPE_STUB_DECL (va_list_type) = va_list_name;
2547   /* Create the __ap field.  */
2548   ap_field = build_decl (BUILTINS_LOCATION,
2549                          FIELD_DECL,
2550                          get_identifier ("__ap"),
2551                          ptr_type_node);
2552   DECL_ARTIFICIAL (ap_field) = 1;
2553   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2554   TYPE_FIELDS (va_list_type) = ap_field;
2555   /* Compute its layout.  */
2556   layout_type (va_list_type);
2557
2558   return va_list_type;
2559 }
2560
2561 /* Return an expression of type "void *" pointing to the next
2562    available argument in a variable-argument list.  VALIST is the
2563    user-level va_list object, of type __builtin_va_list.  */
2564 static tree
2565 arm_extract_valist_ptr (tree valist)
2566 {
2567   if (TREE_TYPE (valist) == error_mark_node)
2568     return error_mark_node;
2569
2570   /* On an AAPCS target, the pointer is stored within "struct
2571      va_list".  */
2572   if (TARGET_AAPCS_BASED)
2573     {
2574       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2575       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2576                        valist, ap_field, NULL_TREE);
2577     }
2578
2579   return valist;
2580 }
2581
2582 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2583 static void
2584 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2585 {
2586   valist = arm_extract_valist_ptr (valist);
2587   std_expand_builtin_va_start (valist, nextarg);
2588 }
2589
2590 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2591 static tree
2592 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2593                           gimple_seq *post_p)
2594 {
2595   valist = arm_extract_valist_ptr (valist);
2596   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2597 }
2598
2599 /* Fix up any incompatible options that the user has specified.  */
2600 static void
2601 arm_option_override (void)
2602 {
2603   if (global_options_set.x_arm_arch_option)
2604     arm_selected_arch = &all_architectures[arm_arch_option];
2605
2606   if (global_options_set.x_arm_cpu_option)
2607     {
2608       arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2609       arm_selected_tune = &all_cores[(int) arm_cpu_option];
2610     }
2611
2612   if (global_options_set.x_arm_tune_option)
2613     arm_selected_tune = &all_cores[(int) arm_tune_option];
2614
2615 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2616   SUBTARGET_OVERRIDE_OPTIONS;
2617 #endif
2618
2619   if (arm_selected_arch)
2620     {
2621       if (arm_selected_cpu)
2622         {
2623           /* Check for conflict between mcpu and march.  */
2624           if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2625             {
2626               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2627                        arm_selected_cpu->name, arm_selected_arch->name);
2628               /* -march wins for code generation.
2629                  -mcpu wins for default tuning.  */
2630               if (!arm_selected_tune)
2631                 arm_selected_tune = arm_selected_cpu;
2632
2633               arm_selected_cpu = arm_selected_arch;
2634             }
2635           else
2636             /* -mcpu wins.  */
2637             arm_selected_arch = NULL;
2638         }
2639       else
2640         /* Pick a CPU based on the architecture.  */
2641         arm_selected_cpu = arm_selected_arch;
2642     }
2643
2644   /* If the user did not specify a processor, choose one for them.  */
2645   if (!arm_selected_cpu)
2646     {
2647       const struct processors * sel;
2648       unsigned int        sought;
2649
2650       arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2651       if (!arm_selected_cpu->name)
2652         {
2653 #ifdef SUBTARGET_CPU_DEFAULT
2654           /* Use the subtarget default CPU if none was specified by
2655              configure.  */
2656           arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2657 #endif
2658           /* Default to ARM6.  */
2659           if (!arm_selected_cpu->name)
2660             arm_selected_cpu = &all_cores[arm6];
2661         }
2662
2663       sel = arm_selected_cpu;
2664       insn_flags = sel->flags;
2665
2666       /* Now check to see if the user has specified some command line
2667          switch that require certain abilities from the cpu.  */
2668       sought = 0;
2669
2670       if (TARGET_INTERWORK || TARGET_THUMB)
2671         {
2672           sought |= (FL_THUMB | FL_MODE32);
2673
2674           /* There are no ARM processors that support both APCS-26 and
2675              interworking.  Therefore we force FL_MODE26 to be removed
2676              from insn_flags here (if it was set), so that the search
2677              below will always be able to find a compatible processor.  */
2678           insn_flags &= ~FL_MODE26;
2679         }
2680
2681       if (sought != 0 && ((sought & insn_flags) != sought))
2682         {
2683           /* Try to locate a CPU type that supports all of the abilities
2684              of the default CPU, plus the extra abilities requested by
2685              the user.  */
2686           for (sel = all_cores; sel->name != NULL; sel++)
2687             if ((sel->flags & sought) == (sought | insn_flags))
2688               break;
2689
2690           if (sel->name == NULL)
2691             {
2692               unsigned current_bit_count = 0;
2693               const struct processors * best_fit = NULL;
2694
2695               /* Ideally we would like to issue an error message here
2696                  saying that it was not possible to find a CPU compatible
2697                  with the default CPU, but which also supports the command
2698                  line options specified by the programmer, and so they
2699                  ought to use the -mcpu=<name> command line option to
2700                  override the default CPU type.
2701
2702                  If we cannot find a cpu that has both the
2703                  characteristics of the default cpu and the given
2704                  command line options we scan the array again looking
2705                  for a best match.  */
2706               for (sel = all_cores; sel->name != NULL; sel++)
2707                 if ((sel->flags & sought) == sought)
2708                   {
2709                     unsigned count;
2710
2711                     count = bit_count (sel->flags & insn_flags);
2712
2713                     if (count >= current_bit_count)
2714                       {
2715                         best_fit = sel;
2716                         current_bit_count = count;
2717                       }
2718                   }
2719
2720               gcc_assert (best_fit);
2721               sel = best_fit;
2722             }
2723
2724           arm_selected_cpu = sel;
2725         }
2726     }
2727
2728   gcc_assert (arm_selected_cpu);
2729   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
2730   if (!arm_selected_tune)
2731     arm_selected_tune = &all_cores[arm_selected_cpu->core];
2732
2733   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2734   insn_flags = arm_selected_cpu->flags;
2735   arm_base_arch = arm_selected_cpu->base_arch;
2736
2737   arm_tune = arm_selected_tune->core;
2738   tune_flags = arm_selected_tune->flags;
2739   current_tune = arm_selected_tune->tune;
2740
2741   /* Make sure that the processor choice does not conflict with any of the
2742      other command line choices.  */
2743   if (TARGET_ARM && !(insn_flags & FL_NOTM))
2744     error ("target CPU does not support ARM mode");
2745
2746   /* BPABI targets use linker tricks to allow interworking on cores
2747      without thumb support.  */
2748   if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2749     {
2750       warning (0, "target CPU does not support interworking" );
2751       target_flags &= ~MASK_INTERWORK;
2752     }
2753
2754   if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2755     {
2756       warning (0, "target CPU does not support THUMB instructions");
2757       target_flags &= ~MASK_THUMB;
2758     }
2759
2760   if (TARGET_APCS_FRAME && TARGET_THUMB)
2761     {
2762       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2763       target_flags &= ~MASK_APCS_FRAME;
2764     }
2765
2766   /* Callee super interworking implies thumb interworking.  Adding
2767      this to the flags here simplifies the logic elsewhere.  */
2768   if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2769     target_flags |= MASK_INTERWORK;
2770
2771   /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2772      from here where no function is being compiled currently.  */
2773   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2774     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2775
2776   if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2777     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2778
2779   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2780     {
2781       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2782       target_flags |= MASK_APCS_FRAME;
2783     }
2784
2785   if (TARGET_POKE_FUNCTION_NAME)
2786     target_flags |= MASK_APCS_FRAME;
2787
2788   if (TARGET_APCS_REENT && flag_pic)
2789     error ("-fpic and -mapcs-reent are incompatible");
2790
2791   if (TARGET_APCS_REENT)
2792     warning (0, "APCS reentrant code not supported.  Ignored");
2793
2794   /* If this target is normally configured to use APCS frames, warn if they
2795      are turned off and debugging is turned on.  */
2796   if (TARGET_ARM
2797       && write_symbols != NO_DEBUG
2798       && !TARGET_APCS_FRAME
2799       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2800     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2801
2802   if (TARGET_APCS_FLOAT)
2803     warning (0, "passing floating point arguments in fp regs not yet supported");
2804
2805   /* Initialize boolean versions of the flags, for use in the arm.md file.  */
2806   arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2807   arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2808   arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2809   arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2810   arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2811   arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2812   arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2813   arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2814   arm_arch6m = arm_arch6 && !arm_arch_notm;
2815   arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2816   arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2817   arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2818   arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2819   arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2820
2821   arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2822   arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2823   thumb_code = TARGET_ARM == 0;
2824   thumb1_code = TARGET_THUMB1 != 0;
2825   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2826   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2827   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2828   arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2829   arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2830   arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2831   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2832   arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2833   arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2834   if (arm_restrict_it == 2)
2835     arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2836
2837   if (!TARGET_THUMB2)
2838     arm_restrict_it = 0;
2839
2840   /* If we are not using the default (ARM mode) section anchor offset
2841      ranges, then set the correct ranges now.  */
2842   if (TARGET_THUMB1)
2843     {
2844       /* Thumb-1 LDR instructions cannot have negative offsets.
2845          Permissible positive offset ranges are 5-bit (for byte loads),
2846          6-bit (for halfword loads), or 7-bit (for word loads).
2847          Empirical results suggest a 7-bit anchor range gives the best
2848          overall code size.  */
2849       targetm.min_anchor_offset = 0;
2850       targetm.max_anchor_offset = 127;
2851     }
2852   else if (TARGET_THUMB2)
2853     {
2854       /* The minimum is set such that the total size of the block
2855          for a particular anchor is 248 + 1 + 4095 bytes, which is
2856          divisible by eight, ensuring natural spacing of anchors.  */
2857       targetm.min_anchor_offset = -248;
2858       targetm.max_anchor_offset = 4095;
2859     }
2860
2861   /* V5 code we generate is completely interworking capable, so we turn off
2862      TARGET_INTERWORK here to avoid many tests later on.  */
2863
2864   /* XXX However, we must pass the right pre-processor defines to CPP
2865      or GLD can get confused.  This is a hack.  */
2866   if (TARGET_INTERWORK)
2867     arm_cpp_interwork = 1;
2868
2869   if (arm_arch5)
2870     target_flags &= ~MASK_INTERWORK;
2871
2872   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2873     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2874
2875   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2876     error ("iwmmxt abi requires an iwmmxt capable cpu");
2877
2878   if (!global_options_set.x_arm_fpu_index)
2879     {
2880       const char *target_fpu_name;
2881       bool ok;
2882
2883 #ifdef FPUTYPE_DEFAULT
2884       target_fpu_name = FPUTYPE_DEFAULT;
2885 #else
2886       target_fpu_name = "vfp";
2887 #endif
2888
2889       ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2890                                   CL_TARGET);
2891       gcc_assert (ok);
2892     }
2893
2894   arm_fpu_desc = &all_fpus[arm_fpu_index];
2895
2896   if (TARGET_NEON && !arm_arch7)
2897     error ("target CPU does not support NEON");
2898
2899   switch (arm_fpu_desc->model)
2900     {
2901     case ARM_FP_MODEL_VFP:
2902       arm_fpu_attr = FPU_VFP;
2903       break;
2904
2905     default:
2906       gcc_unreachable();
2907     }
2908
2909   if (TARGET_AAPCS_BASED)
2910     {
2911       if (TARGET_CALLER_INTERWORKING)
2912         error ("AAPCS does not support -mcaller-super-interworking");
2913       else
2914         if (TARGET_CALLEE_INTERWORKING)
2915           error ("AAPCS does not support -mcallee-super-interworking");
2916     }
2917
2918   /* iWMMXt and NEON are incompatible.  */
2919   if (TARGET_IWMMXT && TARGET_NEON)
2920     error ("iWMMXt and NEON are incompatible");
2921
2922   /* iWMMXt unsupported under Thumb mode.  */
2923   if (TARGET_THUMB && TARGET_IWMMXT)
2924     error ("iWMMXt unsupported under Thumb mode");
2925
2926   /* __fp16 support currently assumes the core has ldrh.  */
2927   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2928     sorry ("__fp16 and no ldrh");
2929
2930   /* If soft-float is specified then don't use FPU.  */
2931   if (TARGET_SOFT_FLOAT)
2932     arm_fpu_attr = FPU_NONE;
2933
2934   if (TARGET_AAPCS_BASED)
2935     {
2936       if (arm_abi == ARM_ABI_IWMMXT)
2937         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2938       else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2939                && TARGET_HARD_FLOAT
2940                && TARGET_VFP)
2941         arm_pcs_default = ARM_PCS_AAPCS_VFP;
2942       else
2943         arm_pcs_default = ARM_PCS_AAPCS;
2944     }
2945   else
2946     {
2947       if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2948         sorry ("-mfloat-abi=hard and VFP");
2949
2950       if (arm_abi == ARM_ABI_APCS)
2951         arm_pcs_default = ARM_PCS_APCS;
2952       else
2953         arm_pcs_default = ARM_PCS_ATPCS;
2954     }
2955
2956   /* For arm2/3 there is no need to do any scheduling if we are doing
2957      software floating-point.  */
2958   if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2959     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2960
2961   /* Use the cp15 method if it is available.  */
2962   if (target_thread_pointer == TP_AUTO)
2963     {
2964       if (arm_arch6k && !TARGET_THUMB1)
2965         target_thread_pointer = TP_CP15;
2966       else
2967         target_thread_pointer = TP_SOFT;
2968     }
2969
2970   if (TARGET_HARD_TP && TARGET_THUMB1)
2971     error ("can not use -mtp=cp15 with 16-bit Thumb");
2972
2973   /* Override the default structure alignment for AAPCS ABI.  */
2974   if (!global_options_set.x_arm_structure_size_boundary)
2975     {
2976       if (TARGET_AAPCS_BASED)
2977         arm_structure_size_boundary = 8;
2978     }
2979   else
2980     {
2981       if (arm_structure_size_boundary != 8
2982           && arm_structure_size_boundary != 32
2983           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2984         {
2985           if (ARM_DOUBLEWORD_ALIGN)
2986             warning (0,
2987                      "structure size boundary can only be set to 8, 32 or 64");
2988           else
2989             warning (0, "structure size boundary can only be set to 8 or 32");
2990           arm_structure_size_boundary
2991             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2992         }
2993     }
2994
2995   if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2996     {
2997       error ("RTP PIC is incompatible with Thumb");
2998       flag_pic = 0;
2999     }
3000
3001   /* If stack checking is disabled, we can use r10 as the PIC register,
3002      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3003   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3004     {
3005       if (TARGET_VXWORKS_RTP)
3006         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3007       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3008     }
3009
3010   if (flag_pic && TARGET_VXWORKS_RTP)
3011     arm_pic_register = 9;
3012
3013   if (arm_pic_register_string != NULL)
3014     {
3015       int pic_register = decode_reg_name (arm_pic_register_string);
3016
3017       if (!flag_pic)
3018         warning (0, "-mpic-register= is useless without -fpic");
3019
3020       /* Prevent the user from choosing an obviously stupid PIC register.  */
3021       else if (pic_register < 0 || call_used_regs[pic_register]
3022                || pic_register == HARD_FRAME_POINTER_REGNUM
3023                || pic_register == STACK_POINTER_REGNUM
3024                || pic_register >= PC_REGNUM
3025                || (TARGET_VXWORKS_RTP
3026                    && (unsigned int) pic_register != arm_pic_register))
3027         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3028       else
3029         arm_pic_register = pic_register;
3030     }
3031
3032   if (TARGET_VXWORKS_RTP
3033       && !global_options_set.x_arm_pic_data_is_text_relative)
3034     arm_pic_data_is_text_relative = 0;
3035
3036   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3037   if (fix_cm3_ldrd == 2)
3038     {
3039       if (arm_selected_cpu->core == cortexm3)
3040         fix_cm3_ldrd = 1;
3041       else
3042         fix_cm3_ldrd = 0;
3043     }
3044
3045   /* Enable -munaligned-access by default for
3046      - all ARMv6 architecture-based processors
3047      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3048      - ARMv8 architecture-base processors.
3049
3050      Disable -munaligned-access by default for
3051      - all pre-ARMv6 architecture-based processors
3052      - ARMv6-M architecture-based processors.  */
3053
3054   if (unaligned_access == 2)
3055     {
3056       if (arm_arch6 && (arm_arch_notm || arm_arch7))
3057         unaligned_access = 1;
3058       else
3059         unaligned_access = 0;
3060     }
3061   else if (unaligned_access == 1
3062            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3063     {
3064       warning (0, "target CPU does not support unaligned accesses");
3065       unaligned_access = 0;
3066     }
3067
3068   if (TARGET_THUMB1 && flag_schedule_insns)
3069     {
3070       /* Don't warn since it's on by default in -O2.  */
3071       flag_schedule_insns = 0;
3072     }
3073
3074   if (optimize_size)
3075     {
3076       /* If optimizing for size, bump the number of instructions that we
3077          are prepared to conditionally execute (even on a StrongARM).  */
3078       max_insns_skipped = 6;
3079
3080       /* For THUMB2, we limit the conditional sequence to one IT block.  */
3081       if (TARGET_THUMB2)
3082         max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3083     }
3084   else
3085     max_insns_skipped = current_tune->max_insns_skipped;
3086
3087   /* Hot/Cold partitioning is not currently supported, since we can't
3088      handle literal pool placement in that case.  */
3089   if (flag_reorder_blocks_and_partition)
3090     {
3091       inform (input_location,
3092               "-freorder-blocks-and-partition not supported on this architecture");
3093       flag_reorder_blocks_and_partition = 0;
3094       flag_reorder_blocks = 1;
3095     }
3096
3097   if (flag_pic)
3098     /* Hoisting PIC address calculations more aggressively provides a small,
3099        but measurable, size reduction for PIC code.  Therefore, we decrease
3100        the bar for unrestricted expression hoisting to the cost of PIC address
3101        calculation, which is 2 instructions.  */
3102     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3103                            global_options.x_param_values,
3104                            global_options_set.x_param_values);
3105
3106   /* ARM EABI defaults to strict volatile bitfields.  */
3107   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3108       && abi_version_at_least(2))
3109     flag_strict_volatile_bitfields = 1;
3110
3111   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3112      it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
3113   if (flag_prefetch_loop_arrays < 0
3114       && HAVE_prefetch
3115       && optimize >= 3
3116       && current_tune->num_prefetch_slots > 0)
3117     flag_prefetch_loop_arrays = 1;
3118
3119   /* Set up parameters to be used in prefetching algorithm.  Do not override the
3120      defaults unless we are tuning for a core we have researched values for.  */
3121   if (current_tune->num_prefetch_slots > 0)
3122     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3123                            current_tune->num_prefetch_slots,
3124                            global_options.x_param_values,
3125                            global_options_set.x_param_values);
3126   if (current_tune->l1_cache_line_size >= 0)
3127     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3128                            current_tune->l1_cache_line_size,
3129                            global_options.x_param_values,
3130                            global_options_set.x_param_values);
3131   if (current_tune->l1_cache_size >= 0)
3132     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3133                            current_tune->l1_cache_size,
3134                            global_options.x_param_values,
3135                            global_options_set.x_param_values);
3136
3137   /* Use Neon to perform 64-bits operations rather than core
3138      registers.  */
3139   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3140   if (use_neon_for_64bits == 1)
3141      prefer_neon_for_64bits = true;
3142
3143   /* Use the alternative scheduling-pressure algorithm by default.  */
3144   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3145                          global_options.x_param_values,
3146                          global_options_set.x_param_values);
3147
3148   /* Disable shrink-wrap when optimizing function for size, since it tends to
3149      generate additional returns.  */
3150   if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3151     flag_shrink_wrap = false;
3152   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3153   if (TARGET_APCS_FRAME)
3154     flag_shrink_wrap = false;
3155
3156   /* We only support -mslow-flash-data on armv7-m targets.  */
3157   if (target_slow_flash_data
3158       && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3159           || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3160     error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3161
3162   /* Currently, for slow flash data, we just disable literal pools.  */
3163   if (target_slow_flash_data)
3164     arm_disable_literal_pool = true;
3165
3166   /* Thumb2 inline assembly code should always use unified syntax.
3167      This will apply to ARM and Thumb1 eventually.  */
3168   if (TARGET_THUMB2)
3169     inline_asm_unified = 1;
3170
3171   /* Register global variables with the garbage collector.  */
3172   arm_add_gc_roots ();
3173 }
3174
3175 static void
3176 arm_add_gc_roots (void)
3177 {
3178   gcc_obstack_init(&minipool_obstack);
3179   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3180 }
3181 \f
3182 /* A table of known ARM exception types.
3183    For use with the interrupt function attribute.  */
3184
3185 typedef struct
3186 {
3187   const char *const arg;
3188   const unsigned long return_value;
3189 }
3190 isr_attribute_arg;
3191
3192 static const isr_attribute_arg isr_attribute_args [] =
3193 {
3194   { "IRQ",   ARM_FT_ISR },
3195   { "irq",   ARM_FT_ISR },
3196   { "FIQ",   ARM_FT_FIQ },
3197   { "fiq",   ARM_FT_FIQ },
3198   { "ABORT", ARM_FT_ISR },
3199   { "abort", ARM_FT_ISR },
3200   { "ABORT", ARM_FT_ISR },
3201   { "abort", ARM_FT_ISR },
3202   { "UNDEF", ARM_FT_EXCEPTION },
3203   { "undef", ARM_FT_EXCEPTION },
3204   { "SWI",   ARM_FT_EXCEPTION },
3205   { "swi",   ARM_FT_EXCEPTION },
3206   { NULL,    ARM_FT_NORMAL }
3207 };
3208
3209 /* Returns the (interrupt) function type of the current
3210    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3211
3212 static unsigned long
3213 arm_isr_value (tree argument)
3214 {
3215   const isr_attribute_arg * ptr;
3216   const char *              arg;
3217
3218   if (!arm_arch_notm)
3219     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3220
3221   /* No argument - default to IRQ.  */
3222   if (argument == NULL_TREE)
3223     return ARM_FT_ISR;
3224
3225   /* Get the value of the argument.  */
3226   if (TREE_VALUE (argument) == NULL_TREE
3227       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3228     return ARM_FT_UNKNOWN;
3229
3230   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3231
3232   /* Check it against the list of known arguments.  */
3233   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3234     if (streq (arg, ptr->arg))
3235       return ptr->return_value;
3236
3237   /* An unrecognized interrupt type.  */
3238   return ARM_FT_UNKNOWN;
3239 }
3240
3241 /* Computes the type of the current function.  */
3242
3243 static unsigned long
3244 arm_compute_func_type (void)
3245 {
3246   unsigned long type = ARM_FT_UNKNOWN;
3247   tree a;
3248   tree attr;
3249
3250   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3251
3252   /* Decide if the current function is volatile.  Such functions
3253      never return, and many memory cycles can be saved by not storing
3254      register values that will never be needed again.  This optimization
3255      was added to speed up context switching in a kernel application.  */
3256   if (optimize > 0
3257       && (TREE_NOTHROW (current_function_decl)
3258           || !(flag_unwind_tables
3259                || (flag_exceptions
3260                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3261       && TREE_THIS_VOLATILE (current_function_decl))
3262     type |= ARM_FT_VOLATILE;
3263
3264   if (cfun->static_chain_decl != NULL)
3265     type |= ARM_FT_NESTED;
3266
3267   attr = DECL_ATTRIBUTES (current_function_decl);
3268
3269   a = lookup_attribute ("naked", attr);
3270   if (a != NULL_TREE)
3271     type |= ARM_FT_NAKED;
3272
3273   a = lookup_attribute ("isr", attr);
3274   if (a == NULL_TREE)
3275     a = lookup_attribute ("interrupt", attr);
3276
3277   if (a == NULL_TREE)
3278     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3279   else
3280     type |= arm_isr_value (TREE_VALUE (a));
3281
3282   return type;
3283 }
3284
3285 /* Returns the type of the current function.  */
3286
3287 unsigned long
3288 arm_current_func_type (void)
3289 {
3290   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3291     cfun->machine->func_type = arm_compute_func_type ();
3292
3293   return cfun->machine->func_type;
3294 }
3295
3296 bool
3297 arm_allocate_stack_slots_for_args (void)
3298 {
3299   /* Naked functions should not allocate stack slots for arguments.  */
3300   return !IS_NAKED (arm_current_func_type ());
3301 }
3302
3303 static bool
3304 arm_warn_func_return (tree decl)
3305 {
3306   /* Naked functions are implemented entirely in assembly, including the
3307      return sequence, so suppress warnings about this.  */
3308   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3309 }
3310
3311 \f
3312 /* Output assembler code for a block containing the constant parts
3313    of a trampoline, leaving space for the variable parts.
3314
3315    On the ARM, (if r8 is the static chain regnum, and remembering that
3316    referencing pc adds an offset of 8) the trampoline looks like:
3317            ldr          r8, [pc, #0]
3318            ldr          pc, [pc]
3319            .word        static chain value
3320            .word        function's address
3321    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3322
3323 static void
3324 arm_asm_trampoline_template (FILE *f)
3325 {
3326   if (TARGET_ARM)
3327     {
3328       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3329       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3330     }
3331   else if (TARGET_THUMB2)
3332     {
3333       /* The Thumb-2 trampoline is similar to the arm implementation.
3334          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3335       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3336                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3337       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3338     }
3339   else
3340     {
3341       ASM_OUTPUT_ALIGN (f, 2);
3342       fprintf (f, "\t.code\t16\n");
3343       fprintf (f, ".Ltrampoline_start:\n");
3344       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3345       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3346       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3347       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3348       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3349       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3350     }
3351   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3352   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3353 }
3354
3355 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3356
3357 static void
3358 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3359 {
3360   rtx fnaddr, mem, a_tramp;
3361
3362   emit_block_move (m_tramp, assemble_trampoline_template (),
3363                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3364
3365   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3366   emit_move_insn (mem, chain_value);
3367
3368   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3369   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3370   emit_move_insn (mem, fnaddr);
3371
3372   a_tramp = XEXP (m_tramp, 0);
3373   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3374                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3375                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3376 }
3377
3378 /* Thumb trampolines should be entered in thumb mode, so set
3379    the bottom bit of the address.  */
3380
3381 static rtx
3382 arm_trampoline_adjust_address (rtx addr)
3383 {
3384   if (TARGET_THUMB)
3385     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3386                                 NULL, 0, OPTAB_LIB_WIDEN);
3387   return addr;
3388 }
3389 \f
3390 /* Return 1 if it is possible to return using a single instruction.
3391    If SIBLING is non-null, this is a test for a return before a sibling
3392    call.  SIBLING is the call insn, so we can examine its register usage.  */
3393
3394 int
3395 use_return_insn (int iscond, rtx sibling)
3396 {
3397   int regno;
3398   unsigned int func_type;
3399   unsigned long saved_int_regs;
3400   unsigned HOST_WIDE_INT stack_adjust;
3401   arm_stack_offsets *offsets;
3402
3403   /* Never use a return instruction before reload has run.  */
3404   if (!reload_completed)
3405     return 0;
3406
3407   func_type = arm_current_func_type ();
3408
3409   /* Naked, volatile and stack alignment functions need special
3410      consideration.  */
3411   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3412     return 0;
3413
3414   /* So do interrupt functions that use the frame pointer and Thumb
3415      interrupt functions.  */
3416   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3417     return 0;
3418
3419   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3420       && !optimize_function_for_size_p (cfun))
3421     return 0;
3422
3423   offsets = arm_get_frame_offsets ();
3424   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3425
3426   /* As do variadic functions.  */
3427   if (crtl->args.pretend_args_size
3428       || cfun->machine->uses_anonymous_args
3429       /* Or if the function calls __builtin_eh_return () */
3430       || crtl->calls_eh_return
3431       /* Or if the function calls alloca */
3432       || cfun->calls_alloca
3433       /* Or if there is a stack adjustment.  However, if the stack pointer
3434          is saved on the stack, we can use a pre-incrementing stack load.  */
3435       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3436                                  && stack_adjust == 4)))
3437     return 0;
3438
3439   saved_int_regs = offsets->saved_regs_mask;
3440
3441   /* Unfortunately, the insn
3442
3443        ldmib sp, {..., sp, ...}
3444
3445      triggers a bug on most SA-110 based devices, such that the stack
3446      pointer won't be correctly restored if the instruction takes a
3447      page fault.  We work around this problem by popping r3 along with
3448      the other registers, since that is never slower than executing
3449      another instruction.
3450
3451      We test for !arm_arch5 here, because code for any architecture
3452      less than this could potentially be run on one of the buggy
3453      chips.  */
3454   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3455     {
3456       /* Validate that r3 is a call-clobbered register (always true in
3457          the default abi) ...  */
3458       if (!call_used_regs[3])
3459         return 0;
3460
3461       /* ... that it isn't being used for a return value ... */
3462       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3463         return 0;
3464
3465       /* ... or for a tail-call argument ...  */
3466       if (sibling)
3467         {
3468           gcc_assert (CALL_P (sibling));
3469
3470           if (find_regno_fusage (sibling, USE, 3))
3471             return 0;
3472         }
3473
3474       /* ... and that there are no call-saved registers in r0-r2
3475          (always true in the default ABI).  */
3476       if (saved_int_regs & 0x7)
3477         return 0;
3478     }
3479
3480   /* Can't be done if interworking with Thumb, and any registers have been
3481      stacked.  */
3482   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3483     return 0;
3484
3485   /* On StrongARM, conditional returns are expensive if they aren't
3486      taken and multiple registers have been stacked.  */
3487   if (iscond && arm_tune_strongarm)
3488     {
3489       /* Conditional return when just the LR is stored is a simple
3490          conditional-load instruction, that's not expensive.  */
3491       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3492         return 0;
3493
3494       if (flag_pic
3495           && arm_pic_register != INVALID_REGNUM
3496           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3497         return 0;
3498     }
3499
3500   /* If there are saved registers but the LR isn't saved, then we need
3501      two instructions for the return.  */
3502   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3503     return 0;
3504
3505   /* Can't be done if any of the VFP regs are pushed,
3506      since this also requires an insn.  */
3507   if (TARGET_HARD_FLOAT && TARGET_VFP)
3508     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3509       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3510         return 0;
3511
3512   if (TARGET_REALLY_IWMMXT)
3513     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3514       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3515         return 0;
3516
3517   return 1;
3518 }
3519
3520 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3521    shrink-wrapping if possible.  This is the case if we need to emit a
3522    prologue, which we can test by looking at the offsets.  */
3523 bool
3524 use_simple_return_p (void)
3525 {
3526   arm_stack_offsets *offsets;
3527
3528   offsets = arm_get_frame_offsets ();
3529   return offsets->outgoing_args != 0;
3530 }
3531
3532 /* Return TRUE if int I is a valid immediate ARM constant.  */
3533
3534 int
3535 const_ok_for_arm (HOST_WIDE_INT i)
3536 {
3537   int lowbit;
3538
3539   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3540      be all zero, or all one.  */
3541   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3542       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3543           != ((~(unsigned HOST_WIDE_INT) 0)
3544               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3545     return FALSE;
3546
3547   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3548
3549   /* Fast return for 0 and small values.  We must do this for zero, since
3550      the code below can't handle that one case.  */
3551   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3552     return TRUE;
3553
3554   /* Get the number of trailing zeros.  */
3555   lowbit = ffs((int) i) - 1;
3556
3557   /* Only even shifts are allowed in ARM mode so round down to the
3558      nearest even number.  */
3559   if (TARGET_ARM)
3560     lowbit &= ~1;
3561
3562   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3563     return TRUE;
3564
3565   if (TARGET_ARM)
3566     {
3567       /* Allow rotated constants in ARM mode.  */
3568       if (lowbit <= 4
3569            && ((i & ~0xc000003f) == 0
3570                || (i & ~0xf000000f) == 0
3571                || (i & ~0xfc000003) == 0))
3572         return TRUE;
3573     }
3574   else
3575     {
3576       HOST_WIDE_INT v;
3577
3578       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
3579       v = i & 0xff;
3580       v |= v << 16;
3581       if (i == v || i == (v | (v << 8)))
3582         return TRUE;
3583
3584       /* Allow repeated pattern 0xXY00XY00.  */
3585       v = i & 0xff00;
3586       v |= v << 16;
3587       if (i == v)
3588         return TRUE;
3589     }
3590
3591   return FALSE;
3592 }
3593
3594 /* Return true if I is a valid constant for the operation CODE.  */
3595 int
3596 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3597 {
3598   if (const_ok_for_arm (i))
3599     return 1;
3600
3601   switch (code)
3602     {
3603     case SET:
3604       /* See if we can use movw.  */
3605       if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3606         return 1;
3607       else
3608         /* Otherwise, try mvn.  */
3609         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3610
3611     case PLUS:
3612       /* See if we can use addw or subw.  */
3613       if (TARGET_THUMB2
3614           && ((i & 0xfffff000) == 0
3615               || ((-i) & 0xfffff000) == 0))
3616         return 1;
3617       /* else fall through.  */
3618
3619     case COMPARE:
3620     case EQ:
3621     case NE:
3622     case GT:
3623     case LE:
3624     case LT:
3625     case GE:
3626     case GEU:
3627     case LTU:
3628     case GTU:
3629     case LEU:
3630     case UNORDERED:
3631     case ORDERED:
3632     case UNEQ:
3633     case UNGE:
3634     case UNLT:
3635     case UNGT:
3636     case UNLE:
3637       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3638
3639     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
3640     case XOR:
3641       return 0;
3642
3643     case IOR:
3644       if (TARGET_THUMB2)
3645         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3646       return 0;
3647
3648     case AND:
3649       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3650
3651     default:
3652       gcc_unreachable ();
3653     }
3654 }
3655
3656 /* Return true if I is a valid di mode constant for the operation CODE.  */
3657 int
3658 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3659 {
3660   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3661   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3662   rtx hi = GEN_INT (hi_val);
3663   rtx lo = GEN_INT (lo_val);
3664
3665   if (TARGET_THUMB1)
3666     return 0;
3667
3668   switch (code)
3669     {
3670     case AND:
3671     case IOR:
3672     case XOR:
3673       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3674               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3675     case PLUS:
3676       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3677
3678     default:
3679       return 0;
3680     }
3681 }
3682
3683 /* Emit a sequence of insns to handle a large constant.
3684    CODE is the code of the operation required, it can be any of SET, PLUS,
3685    IOR, AND, XOR, MINUS;
3686    MODE is the mode in which the operation is being performed;
3687    VAL is the integer to operate on;
3688    SOURCE is the other operand (a register, or a null-pointer for SET);
3689    SUBTARGETS means it is safe to create scratch registers if that will
3690    either produce a simpler sequence, or we will want to cse the values.
3691    Return value is the number of insns emitted.  */
3692
3693 /* ??? Tweak this for thumb2.  */
3694 int
3695 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3696                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3697 {
3698   rtx cond;
3699
3700   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3701     cond = COND_EXEC_TEST (PATTERN (insn));
3702   else
3703     cond = NULL_RTX;
3704
3705   if (subtargets || code == SET
3706       || (REG_P (target) && REG_P (source)
3707           && REGNO (target) != REGNO (source)))
3708     {
3709       /* After arm_reorg has been called, we can't fix up expensive
3710          constants by pushing them into memory so we must synthesize
3711          them in-line, regardless of the cost.  This is only likely to
3712          be more costly on chips that have load delay slots and we are
3713          compiling without running the scheduler (so no splitting
3714          occurred before the final instruction emission).
3715
3716          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3717       */
3718       if (!cfun->machine->after_arm_reorg
3719           && !cond
3720           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3721                                 1, 0)
3722               > (arm_constant_limit (optimize_function_for_size_p (cfun))
3723                  + (code != SET))))
3724         {
3725           if (code == SET)
3726             {
3727               /* Currently SET is the only monadic value for CODE, all
3728                  the rest are diadic.  */
3729               if (TARGET_USE_MOVT)
3730                 arm_emit_movpair (target, GEN_INT (val));
3731               else
3732                 emit_set_insn (target, GEN_INT (val));
3733
3734               return 1;
3735             }
3736           else
3737             {
3738               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3739
3740               if (TARGET_USE_MOVT)
3741                 arm_emit_movpair (temp, GEN_INT (val));
3742               else
3743                 emit_set_insn (temp, GEN_INT (val));
3744
3745               /* For MINUS, the value is subtracted from, since we never
3746                  have subtraction of a constant.  */
3747               if (code == MINUS)
3748                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3749               else
3750                 emit_set_insn (target,
3751                                gen_rtx_fmt_ee (code, mode, source, temp));
3752               return 2;
3753             }
3754         }
3755     }
3756
3757   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3758                            1);
3759 }
3760
3761 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3762    ARM/THUMB2 immediates, and add up to VAL.
3763    Thr function return value gives the number of insns required.  */
3764 static int
3765 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3766                             struct four_ints *return_sequence)
3767 {
3768   int best_consecutive_zeros = 0;
3769   int i;
3770   int best_start = 0;
3771   int insns1, insns2;
3772   struct four_ints tmp_sequence;
3773
3774   /* If we aren't targeting ARM, the best place to start is always at
3775      the bottom, otherwise look more closely.  */
3776   if (TARGET_ARM)
3777     {
3778       for (i = 0; i < 32; i += 2)
3779         {
3780           int consecutive_zeros = 0;
3781
3782           if (!(val & (3 << i)))
3783             {
3784               while ((i < 32) && !(val & (3 << i)))
3785                 {
3786                   consecutive_zeros += 2;
3787                   i += 2;
3788                 }
3789               if (consecutive_zeros > best_consecutive_zeros)
3790                 {
3791                   best_consecutive_zeros = consecutive_zeros;
3792                   best_start = i - consecutive_zeros;
3793                 }
3794               i -= 2;
3795             }
3796         }
3797     }
3798
3799   /* So long as it won't require any more insns to do so, it's
3800      desirable to emit a small constant (in bits 0...9) in the last
3801      insn.  This way there is more chance that it can be combined with
3802      a later addressing insn to form a pre-indexed load or store
3803      operation.  Consider:
3804
3805            *((volatile int *)0xe0000100) = 1;
3806            *((volatile int *)0xe0000110) = 2;
3807
3808      We want this to wind up as:
3809
3810             mov rA, #0xe0000000
3811             mov rB, #1
3812             str rB, [rA, #0x100]
3813             mov rB, #2
3814             str rB, [rA, #0x110]
3815
3816      rather than having to synthesize both large constants from scratch.
3817
3818      Therefore, we calculate how many insns would be required to emit
3819      the constant starting from `best_start', and also starting from
3820      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
3821      yield a shorter sequence, we may as well use zero.  */
3822   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3823   if (best_start != 0
3824       && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3825     {
3826       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3827       if (insns2 <= insns1)
3828         {
3829           *return_sequence = tmp_sequence;
3830           insns1 = insns2;
3831         }
3832     }
3833
3834   return insns1;
3835 }
3836
3837 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
3838 static int
3839 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3840                              struct four_ints *return_sequence, int i)
3841 {
3842   int remainder = val & 0xffffffff;
3843   int insns = 0;
3844
3845   /* Try and find a way of doing the job in either two or three
3846      instructions.
3847
3848      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3849      location.  We start at position I.  This may be the MSB, or
3850      optimial_immediate_sequence may have positioned it at the largest block
3851      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3852      wrapping around to the top of the word when we drop off the bottom.
3853      In the worst case this code should produce no more than four insns.
3854
3855      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3856      constants, shifted to any arbitrary location.  We should always start
3857      at the MSB.  */
3858   do
3859     {
3860       int end;
3861       unsigned int b1, b2, b3, b4;
3862       unsigned HOST_WIDE_INT result;
3863       int loc;
3864
3865       gcc_assert (insns < 4);
3866
3867       if (i <= 0)
3868         i += 32;
3869
3870       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
3871       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3872         {
3873           loc = i;
3874           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3875             /* We can use addw/subw for the last 12 bits.  */
3876             result = remainder;
3877           else
3878             {
3879               /* Use an 8-bit shifted/rotated immediate.  */
3880               end = i - 8;
3881               if (end < 0)
3882                 end += 32;
3883               result = remainder & ((0x0ff << end)
3884                                    | ((i < end) ? (0xff >> (32 - end))
3885                                                 : 0));
3886               i -= 8;
3887             }
3888         }
3889       else
3890         {
3891           /* Arm allows rotates by a multiple of two. Thumb-2 allows
3892              arbitrary shifts.  */
3893           i -= TARGET_ARM ? 2 : 1;
3894           continue;
3895         }
3896
3897       /* Next, see if we can do a better job with a thumb2 replicated
3898          constant.
3899
3900          We do it this way around to catch the cases like 0x01F001E0 where
3901          two 8-bit immediates would work, but a replicated constant would
3902          make it worse.
3903
3904          TODO: 16-bit constants that don't clear all the bits, but still win.
3905          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
3906       if (TARGET_THUMB2)
3907         {
3908           b1 = (remainder & 0xff000000) >> 24;
3909           b2 = (remainder & 0x00ff0000) >> 16;
3910           b3 = (remainder & 0x0000ff00) >> 8;
3911           b4 = remainder & 0xff;
3912
3913           if (loc > 24)
3914             {
3915               /* The 8-bit immediate already found clears b1 (and maybe b2),
3916                  but must leave b3 and b4 alone.  */
3917
3918               /* First try to find a 32-bit replicated constant that clears
3919                  almost everything.  We can assume that we can't do it in one,
3920                  or else we wouldn't be here.  */
3921               unsigned int tmp = b1 & b2 & b3 & b4;
3922               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3923                                   + (tmp << 24);
3924               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3925                                             + (tmp == b3) + (tmp == b4);
3926               if (tmp
3927                   && (matching_bytes >= 3
3928                       || (matching_bytes == 2
3929                           && const_ok_for_op (remainder & ~tmp2, code))))
3930                 {
3931                   /* At least 3 of the bytes match, and the fourth has at
3932                      least as many bits set, or two of the bytes match
3933                      and it will only require one more insn to finish.  */
3934                   result = tmp2;
3935                   i = tmp != b1 ? 32
3936                       : tmp != b2 ? 24
3937                       : tmp != b3 ? 16
3938                       : 8;
3939                 }
3940
3941               /* Second, try to find a 16-bit replicated constant that can
3942                  leave three of the bytes clear.  If b2 or b4 is already
3943                  zero, then we can.  If the 8-bit from above would not
3944                  clear b2 anyway, then we still win.  */
3945               else if (b1 == b3 && (!b2 || !b4
3946                                || (remainder & 0x00ff0000 & ~result)))
3947                 {
3948                   result = remainder & 0xff00ff00;
3949                   i = 24;
3950                 }
3951             }
3952           else if (loc > 16)
3953             {
3954               /* The 8-bit immediate already found clears b2 (and maybe b3)
3955                  and we don't get here unless b1 is alredy clear, but it will
3956                  leave b4 unchanged.  */
3957
3958               /* If we can clear b2 and b4 at once, then we win, since the
3959                  8-bits couldn't possibly reach that far.  */
3960               if (b2 == b4)
3961                 {
3962                   result = remainder & 0x00ff00ff;
3963                   i = 16;
3964                 }
3965             }
3966         }
3967
3968       return_sequence->i[insns++] = result;
3969       remainder &= ~result;
3970
3971       if (code == SET || code == MINUS)
3972         code = PLUS;
3973     }
3974   while (remainder);
3975
3976   return insns;
3977 }
3978
3979 /* Emit an instruction with the indicated PATTERN.  If COND is
3980    non-NULL, conditionalize the execution of the instruction on COND
3981    being true.  */
3982
3983 static void
3984 emit_constant_insn (rtx cond, rtx pattern)
3985 {
3986   if (cond)
3987     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3988   emit_insn (pattern);
3989 }
3990
3991 /* As above, but extra parameter GENERATE which, if clear, suppresses
3992    RTL generation.  */
3993
3994 static int
3995 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3996                   HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3997                   int generate)
3998 {
3999   int can_invert = 0;
4000   int can_negate = 0;
4001   int final_invert = 0;
4002   int i;
4003   int set_sign_bit_copies = 0;
4004   int clear_sign_bit_copies = 0;
4005   int clear_zero_bit_copies = 0;
4006   int set_zero_bit_copies = 0;
4007   int insns = 0, neg_insns, inv_insns;
4008   unsigned HOST_WIDE_INT temp1, temp2;
4009   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4010   struct four_ints *immediates;
4011   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4012
4013   /* Find out which operations are safe for a given CODE.  Also do a quick
4014      check for degenerate cases; these can occur when DImode operations
4015      are split.  */
4016   switch (code)
4017     {
4018     case SET:
4019       can_invert = 1;
4020       break;
4021
4022     case PLUS:
4023       can_negate = 1;
4024       break;
4025
4026     case IOR:
4027       if (remainder == 0xffffffff)
4028         {
4029           if (generate)
4030             emit_constant_insn (cond,
4031                                 gen_rtx_SET (VOIDmode, target,
4032                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4033           return 1;
4034         }
4035
4036       if (remainder == 0)
4037         {
4038           if (reload_completed && rtx_equal_p (target, source))
4039             return 0;
4040
4041           if (generate)
4042             emit_constant_insn (cond,
4043                                 gen_rtx_SET (VOIDmode, target, source));
4044           return 1;
4045         }
4046       break;
4047
4048     case AND:
4049       if (remainder == 0)
4050         {
4051           if (generate)
4052             emit_constant_insn (cond,
4053                                 gen_rtx_SET (VOIDmode, target, const0_rtx));
4054           return 1;
4055         }
4056       if (remainder == 0xffffffff)
4057         {
4058           if (reload_completed && rtx_equal_p (target, source))
4059             return 0;
4060           if (generate)
4061             emit_constant_insn (cond,
4062                                 gen_rtx_SET (VOIDmode, target, source));
4063           return 1;
4064         }
4065       can_invert = 1;
4066       break;
4067
4068     case XOR:
4069       if (remainder == 0)
4070         {
4071           if (reload_completed && rtx_equal_p (target, source))
4072             return 0;
4073           if (generate)
4074             emit_constant_insn (cond,
4075                                 gen_rtx_SET (VOIDmode, target, source));
4076           return 1;
4077         }
4078
4079       if (remainder == 0xffffffff)
4080         {
4081           if (generate)
4082             emit_constant_insn (cond,
4083                                 gen_rtx_SET (VOIDmode, target,
4084                                              gen_rtx_NOT (mode, source)));
4085           return 1;
4086         }
4087       final_invert = 1;
4088       break;
4089
4090     case MINUS:
4091       /* We treat MINUS as (val - source), since (source - val) is always
4092          passed as (source + (-val)).  */
4093       if (remainder == 0)
4094         {
4095           if (generate)
4096             emit_constant_insn (cond,
4097                                 gen_rtx_SET (VOIDmode, target,
4098                                              gen_rtx_NEG (mode, source)));
4099           return 1;
4100         }
4101       if (const_ok_for_arm (val))
4102         {
4103           if (generate)
4104             emit_constant_insn (cond,
4105                                 gen_rtx_SET (VOIDmode, target,
4106                                              gen_rtx_MINUS (mode, GEN_INT (val),
4107                                                             source)));
4108           return 1;
4109         }
4110
4111       break;
4112
4113     default:
4114       gcc_unreachable ();
4115     }
4116
4117   /* If we can do it in one insn get out quickly.  */
4118   if (const_ok_for_op (val, code))
4119     {
4120       if (generate)
4121         emit_constant_insn (cond,
4122                             gen_rtx_SET (VOIDmode, target,
4123                                          (source
4124                                           ? gen_rtx_fmt_ee (code, mode, source,
4125                                                             GEN_INT (val))
4126                                           : GEN_INT (val))));
4127       return 1;
4128     }
4129
4130   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4131      insn.  */
4132   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4133       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4134     {
4135       if (generate)
4136         {
4137           if (mode == SImode && i == 16)
4138             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4139                smaller insn.  */
4140             emit_constant_insn (cond,
4141                                 gen_zero_extendhisi2
4142                                 (target, gen_lowpart (HImode, source)));
4143           else
4144             /* Extz only supports SImode, but we can coerce the operands
4145                into that mode.  */
4146             emit_constant_insn (cond,
4147                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4148                                               gen_lowpart (SImode, source),
4149                                               GEN_INT (i), const0_rtx));
4150         }
4151
4152       return 1;
4153     }
4154
4155   /* Calculate a few attributes that may be useful for specific
4156      optimizations.  */
4157   /* Count number of leading zeros.  */
4158   for (i = 31; i >= 0; i--)
4159     {
4160       if ((remainder & (1 << i)) == 0)
4161         clear_sign_bit_copies++;
4162       else
4163         break;
4164     }
4165
4166   /* Count number of leading 1's.  */
4167   for (i = 31; i >= 0; i--)
4168     {
4169       if ((remainder & (1 << i)) != 0)
4170         set_sign_bit_copies++;
4171       else
4172         break;
4173     }
4174
4175   /* Count number of trailing zero's.  */
4176   for (i = 0; i <= 31; i++)
4177     {
4178       if ((remainder & (1 << i)) == 0)
4179         clear_zero_bit_copies++;
4180       else
4181         break;
4182     }
4183
4184   /* Count number of trailing 1's.  */
4185   for (i = 0; i <= 31; i++)
4186     {
4187       if ((remainder & (1 << i)) != 0)
4188         set_zero_bit_copies++;
4189       else
4190         break;
4191     }
4192
4193   switch (code)
4194     {
4195     case SET:
4196       /* See if we can do this by sign_extending a constant that is known
4197          to be negative.  This is a good, way of doing it, since the shift
4198          may well merge into a subsequent insn.  */
4199       if (set_sign_bit_copies > 1)
4200         {
4201           if (const_ok_for_arm
4202               (temp1 = ARM_SIGN_EXTEND (remainder
4203                                         << (set_sign_bit_copies - 1))))
4204             {
4205               if (generate)
4206                 {
4207                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4208                   emit_constant_insn (cond,
4209                                       gen_rtx_SET (VOIDmode, new_src,
4210                                                    GEN_INT (temp1)));
4211                   emit_constant_insn (cond,
4212                                       gen_ashrsi3 (target, new_src,
4213                                                    GEN_INT (set_sign_bit_copies - 1)));
4214                 }
4215               return 2;
4216             }
4217           /* For an inverted constant, we will need to set the low bits,
4218              these will be shifted out of harm's way.  */
4219           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4220           if (const_ok_for_arm (~temp1))
4221             {
4222               if (generate)
4223                 {
4224                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4225                   emit_constant_insn (cond,
4226                                       gen_rtx_SET (VOIDmode, new_src,
4227                                                    GEN_INT (temp1)));
4228                   emit_constant_insn (cond,
4229                                       gen_ashrsi3 (target, new_src,
4230                                                    GEN_INT (set_sign_bit_copies - 1)));
4231                 }
4232               return 2;
4233             }
4234         }
4235
4236       /* See if we can calculate the value as the difference between two
4237          valid immediates.  */
4238       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4239         {
4240           int topshift = clear_sign_bit_copies & ~1;
4241
4242           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4243                                    & (0xff000000 >> topshift));
4244
4245           /* If temp1 is zero, then that means the 9 most significant
4246              bits of remainder were 1 and we've caused it to overflow.
4247              When topshift is 0 we don't need to do anything since we
4248              can borrow from 'bit 32'.  */
4249           if (temp1 == 0 && topshift != 0)
4250             temp1 = 0x80000000 >> (topshift - 1);
4251
4252           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4253
4254           if (const_ok_for_arm (temp2))
4255             {
4256               if (generate)
4257                 {
4258                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4259                   emit_constant_insn (cond,
4260                                       gen_rtx_SET (VOIDmode, new_src,
4261                                                    GEN_INT (temp1)));
4262                   emit_constant_insn (cond,
4263                                       gen_addsi3 (target, new_src,
4264                                                   GEN_INT (-temp2)));
4265                 }
4266
4267               return 2;
4268             }
4269         }
4270
4271       /* See if we can generate this by setting the bottom (or the top)
4272          16 bits, and then shifting these into the other half of the
4273          word.  We only look for the simplest cases, to do more would cost
4274          too much.  Be careful, however, not to generate this when the
4275          alternative would take fewer insns.  */
4276       if (val & 0xffff0000)
4277         {
4278           temp1 = remainder & 0xffff0000;
4279           temp2 = remainder & 0x0000ffff;
4280
4281           /* Overlaps outside this range are best done using other methods.  */
4282           for (i = 9; i < 24; i++)
4283             {
4284               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4285                   && !const_ok_for_arm (temp2))
4286                 {
4287                   rtx new_src = (subtargets
4288                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4289                                  : target);
4290                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4291                                             source, subtargets, generate);
4292                   source = new_src;
4293                   if (generate)
4294                     emit_constant_insn
4295                       (cond,
4296                        gen_rtx_SET
4297                        (VOIDmode, target,
4298                         gen_rtx_IOR (mode,
4299                                      gen_rtx_ASHIFT (mode, source,
4300                                                      GEN_INT (i)),
4301                                      source)));
4302                   return insns + 1;
4303                 }
4304             }
4305
4306           /* Don't duplicate cases already considered.  */
4307           for (i = 17; i < 24; i++)
4308             {
4309               if (((temp1 | (temp1 >> i)) == remainder)
4310                   && !const_ok_for_arm (temp1))
4311                 {
4312                   rtx new_src = (subtargets
4313                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4314                                  : target);
4315                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4316                                             source, subtargets, generate);
4317                   source = new_src;
4318                   if (generate)
4319                     emit_constant_insn
4320                       (cond,
4321                        gen_rtx_SET (VOIDmode, target,
4322                                     gen_rtx_IOR
4323                                     (mode,
4324                                      gen_rtx_LSHIFTRT (mode, source,
4325                                                        GEN_INT (i)),
4326                                      source)));
4327                   return insns + 1;
4328                 }
4329             }
4330         }
4331       break;
4332
4333     case IOR:
4334     case XOR:
4335       /* If we have IOR or XOR, and the constant can be loaded in a
4336          single instruction, and we can find a temporary to put it in,
4337          then this can be done in two instructions instead of 3-4.  */
4338       if (subtargets
4339           /* TARGET can't be NULL if SUBTARGETS is 0 */
4340           || (reload_completed && !reg_mentioned_p (target, source)))
4341         {
4342           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4343             {
4344               if (generate)
4345                 {
4346                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4347
4348                   emit_constant_insn (cond,
4349                                       gen_rtx_SET (VOIDmode, sub,
4350                                                    GEN_INT (val)));
4351                   emit_constant_insn (cond,
4352                                       gen_rtx_SET (VOIDmode, target,
4353                                                    gen_rtx_fmt_ee (code, mode,
4354                                                                    source, sub)));
4355                 }
4356               return 2;
4357             }
4358         }
4359
4360       if (code == XOR)
4361         break;
4362
4363       /*  Convert.
4364           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4365                              and the remainder 0s for e.g. 0xfff00000)
4366           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4367
4368           This can be done in 2 instructions by using shifts with mov or mvn.
4369           e.g. for
4370           x = x | 0xfff00000;
4371           we generate.
4372           mvn   r0, r0, asl #12
4373           mvn   r0, r0, lsr #12  */
4374       if (set_sign_bit_copies > 8
4375           && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4376         {
4377           if (generate)
4378             {
4379               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4380               rtx shift = GEN_INT (set_sign_bit_copies);
4381
4382               emit_constant_insn
4383                 (cond,
4384                  gen_rtx_SET (VOIDmode, sub,
4385                               gen_rtx_NOT (mode,
4386                                            gen_rtx_ASHIFT (mode,
4387                                                            source,
4388                                                            shift))));
4389               emit_constant_insn
4390                 (cond,
4391                  gen_rtx_SET (VOIDmode, target,
4392                               gen_rtx_NOT (mode,
4393                                            gen_rtx_LSHIFTRT (mode, sub,
4394                                                              shift))));
4395             }
4396           return 2;
4397         }
4398
4399       /* Convert
4400           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4401            to
4402           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4403
4404           For eg. r0 = r0 | 0xfff
4405                mvn      r0, r0, lsr #12
4406                mvn      r0, r0, asl #12
4407
4408       */
4409       if (set_zero_bit_copies > 8
4410           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4411         {
4412           if (generate)
4413             {
4414               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4415               rtx shift = GEN_INT (set_zero_bit_copies);
4416
4417               emit_constant_insn
4418                 (cond,
4419                  gen_rtx_SET (VOIDmode, sub,
4420                               gen_rtx_NOT (mode,
4421                                            gen_rtx_LSHIFTRT (mode,
4422                                                              source,
4423                                                              shift))));
4424               emit_constant_insn
4425                 (cond,
4426                  gen_rtx_SET (VOIDmode, target,
4427                               gen_rtx_NOT (mode,
4428                                            gen_rtx_ASHIFT (mode, sub,
4429                                                            shift))));
4430             }
4431           return 2;
4432         }
4433
4434       /* This will never be reached for Thumb2 because orn is a valid
4435          instruction. This is for Thumb1 and the ARM 32 bit cases.
4436
4437          x = y | constant (such that ~constant is a valid constant)
4438          Transform this to
4439          x = ~(~y & ~constant).
4440       */
4441       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4442         {
4443           if (generate)
4444             {
4445               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4446               emit_constant_insn (cond,
4447                                   gen_rtx_SET (VOIDmode, sub,
4448                                                gen_rtx_NOT (mode, source)));
4449               source = sub;
4450               if (subtargets)
4451                 sub = gen_reg_rtx (mode);
4452               emit_constant_insn (cond,
4453                                   gen_rtx_SET (VOIDmode, sub,
4454                                                gen_rtx_AND (mode, source,
4455                                                             GEN_INT (temp1))));
4456               emit_constant_insn (cond,
4457                                   gen_rtx_SET (VOIDmode, target,
4458                                                gen_rtx_NOT (mode, sub)));
4459             }
4460           return 3;
4461         }
4462       break;
4463
4464     case AND:
4465       /* See if two shifts will do 2 or more insn's worth of work.  */
4466       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4467         {
4468           HOST_WIDE_INT shift_mask = ((0xffffffff
4469                                        << (32 - clear_sign_bit_copies))
4470                                       & 0xffffffff);
4471
4472           if ((remainder | shift_mask) != 0xffffffff)
4473             {
4474               if (generate)
4475                 {
4476                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4477                   insns = arm_gen_constant (AND, mode, cond,
4478                                             remainder | shift_mask,
4479                                             new_src, source, subtargets, 1);
4480                   source = new_src;
4481                 }
4482               else
4483                 {
4484                   rtx targ = subtargets ? NULL_RTX : target;
4485                   insns = arm_gen_constant (AND, mode, cond,
4486                                             remainder | shift_mask,
4487                                             targ, source, subtargets, 0);
4488                 }
4489             }
4490
4491           if (generate)
4492             {
4493               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4494               rtx shift = GEN_INT (clear_sign_bit_copies);
4495
4496               emit_insn (gen_ashlsi3 (new_src, source, shift));
4497               emit_insn (gen_lshrsi3 (target, new_src, shift));
4498             }
4499
4500           return insns + 2;
4501         }
4502
4503       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4504         {
4505           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4506
4507           if ((remainder | shift_mask) != 0xffffffff)
4508             {
4509               if (generate)
4510                 {
4511                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4512
4513                   insns = arm_gen_constant (AND, mode, cond,
4514                                             remainder | shift_mask,
4515                                             new_src, source, subtargets, 1);
4516                   source = new_src;
4517                 }
4518               else
4519                 {
4520                   rtx targ = subtargets ? NULL_RTX : target;
4521
4522                   insns = arm_gen_constant (AND, mode, cond,
4523                                             remainder | shift_mask,
4524                                             targ, source, subtargets, 0);
4525                 }
4526             }
4527
4528           if (generate)
4529             {
4530               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4531               rtx shift = GEN_INT (clear_zero_bit_copies);
4532
4533               emit_insn (gen_lshrsi3 (new_src, source, shift));
4534               emit_insn (gen_ashlsi3 (target, new_src, shift));
4535             }
4536
4537           return insns + 2;
4538         }
4539
4540       break;
4541
4542     default:
4543       break;
4544     }
4545
4546   /* Calculate what the instruction sequences would be if we generated it
4547      normally, negated, or inverted.  */
4548   if (code == AND)
4549     /* AND cannot be split into multiple insns, so invert and use BIC.  */
4550     insns = 99;
4551   else
4552     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4553
4554   if (can_negate)
4555     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4556                                             &neg_immediates);
4557   else
4558     neg_insns = 99;
4559
4560   if (can_invert || final_invert)
4561     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4562                                             &inv_immediates);
4563   else
4564     inv_insns = 99;
4565
4566   immediates = &pos_immediates;
4567
4568   /* Is the negated immediate sequence more efficient?  */
4569   if (neg_insns < insns && neg_insns <= inv_insns)
4570     {
4571       insns = neg_insns;
4572       immediates = &neg_immediates;
4573     }
4574   else
4575     can_negate = 0;
4576
4577   /* Is the inverted immediate sequence more efficient?
4578      We must allow for an extra NOT instruction for XOR operations, although
4579      there is some chance that the final 'mvn' will get optimized later.  */
4580   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4581     {
4582       insns = inv_insns;
4583       immediates = &inv_immediates;
4584     }
4585   else
4586     {
4587       can_invert = 0;
4588       final_invert = 0;
4589     }
4590
4591   /* Now output the chosen sequence as instructions.  */
4592   if (generate)
4593     {
4594       for (i = 0; i < insns; i++)
4595         {
4596           rtx new_src, temp1_rtx;
4597
4598           temp1 = immediates->i[i];
4599
4600           if (code == SET || code == MINUS)
4601             new_src = (subtargets ? gen_reg_rtx (mode) : target);
4602           else if ((final_invert || i < (insns - 1)) && subtargets)
4603             new_src = gen_reg_rtx (mode);
4604           else
4605             new_src = target;
4606
4607           if (can_invert)
4608             temp1 = ~temp1;
4609           else if (can_negate)
4610             temp1 = -temp1;
4611
4612           temp1 = trunc_int_for_mode (temp1, mode);
4613           temp1_rtx = GEN_INT (temp1);
4614
4615           if (code == SET)
4616             ;
4617           else if (code == MINUS)
4618             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4619           else
4620             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4621
4622           emit_constant_insn (cond,
4623                               gen_rtx_SET (VOIDmode, new_src,
4624                                            temp1_rtx));
4625           source = new_src;
4626
4627           if (code == SET)
4628             {
4629               can_negate = can_invert;
4630               can_invert = 0;
4631               code = PLUS;
4632             }
4633           else if (code == MINUS)
4634             code = PLUS;
4635         }
4636     }
4637
4638   if (final_invert)
4639     {
4640       if (generate)
4641         emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4642                                                gen_rtx_NOT (mode, source)));
4643       insns++;
4644     }
4645
4646   return insns;
4647 }
4648
4649 /* Canonicalize a comparison so that we are more likely to recognize it.
4650    This can be done for a few constant compares, where we can make the
4651    immediate value easier to load.  */
4652
4653 static void
4654 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4655                              bool op0_preserve_value)
4656 {
4657   machine_mode mode;
4658   unsigned HOST_WIDE_INT i, maxval;
4659
4660   mode = GET_MODE (*op0);
4661   if (mode == VOIDmode)
4662     mode = GET_MODE (*op1);
4663
4664   maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4665
4666   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
4667      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
4668      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
4669      for GTU/LEU in Thumb mode.  */
4670   if (mode == DImode)
4671     {
4672       rtx tem;
4673
4674       if (*code == GT || *code == LE
4675           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4676         {
4677           /* Missing comparison.  First try to use an available
4678              comparison.  */
4679           if (CONST_INT_P (*op1))
4680             {
4681               i = INTVAL (*op1);
4682               switch (*code)
4683                 {
4684                 case GT:
4685                 case LE:
4686                   if (i != maxval
4687                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4688                     {
4689                       *op1 = GEN_INT (i + 1);
4690                       *code = *code == GT ? GE : LT;
4691                       return;
4692                     }
4693                   break;
4694                 case GTU:
4695                 case LEU:
4696                   if (i != ~((unsigned HOST_WIDE_INT) 0)
4697                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4698                     {
4699                       *op1 = GEN_INT (i + 1);
4700                       *code = *code == GTU ? GEU : LTU;
4701                       return;
4702                     }
4703                   break;
4704                 default:
4705                   gcc_unreachable ();
4706                 }
4707             }
4708
4709           /* If that did not work, reverse the condition.  */
4710           if (!op0_preserve_value)
4711             {
4712               tem = *op0;
4713               *op0 = *op1;
4714               *op1 = tem;
4715               *code = (int)swap_condition ((enum rtx_code)*code);
4716             }
4717         }
4718       return;
4719     }
4720
4721   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4722      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4723      to facilitate possible combining with a cmp into 'ands'.  */
4724   if (mode == SImode
4725       && GET_CODE (*op0) == ZERO_EXTEND
4726       && GET_CODE (XEXP (*op0, 0)) == SUBREG
4727       && GET_MODE (XEXP (*op0, 0)) == QImode
4728       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4729       && subreg_lowpart_p (XEXP (*op0, 0))
4730       && *op1 == const0_rtx)
4731     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4732                         GEN_INT (255));
4733
4734   /* Comparisons smaller than DImode.  Only adjust comparisons against
4735      an out-of-range constant.  */
4736   if (!CONST_INT_P (*op1)
4737       || const_ok_for_arm (INTVAL (*op1))
4738       || const_ok_for_arm (- INTVAL (*op1)))
4739     return;
4740
4741   i = INTVAL (*op1);
4742
4743   switch (*code)
4744     {
4745     case EQ:
4746     case NE:
4747       return;
4748
4749     case GT:
4750     case LE:
4751       if (i != maxval
4752           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4753         {
4754           *op1 = GEN_INT (i + 1);
4755           *code = *code == GT ? GE : LT;
4756           return;
4757         }
4758       break;
4759
4760     case GE:
4761     case LT:
4762       if (i != ~maxval
4763           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4764         {
4765           *op1 = GEN_INT (i - 1);
4766           *code = *code == GE ? GT : LE;
4767           return;
4768         }
4769       break;
4770
4771     case GTU:
4772     case LEU:
4773       if (i != ~((unsigned HOST_WIDE_INT) 0)
4774           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4775         {
4776           *op1 = GEN_INT (i + 1);
4777           *code = *code == GTU ? GEU : LTU;
4778           return;
4779         }
4780       break;
4781
4782     case GEU:
4783     case LTU:
4784       if (i != 0
4785           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4786         {
4787           *op1 = GEN_INT (i - 1);
4788           *code = *code == GEU ? GTU : LEU;
4789           return;
4790         }
4791       break;
4792
4793     default:
4794       gcc_unreachable ();
4795     }
4796 }
4797
4798
4799 /* Define how to find the value returned by a function.  */
4800
4801 static rtx
4802 arm_function_value(const_tree type, const_tree func,
4803                    bool outgoing ATTRIBUTE_UNUSED)
4804 {
4805   machine_mode mode;
4806   int unsignedp ATTRIBUTE_UNUSED;
4807   rtx r ATTRIBUTE_UNUSED;
4808
4809   mode = TYPE_MODE (type);
4810
4811   if (TARGET_AAPCS_BASED)
4812     return aapcs_allocate_return_reg (mode, type, func);
4813
4814   /* Promote integer types.  */
4815   if (INTEGRAL_TYPE_P (type))
4816     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4817
4818   /* Promotes small structs returned in a register to full-word size
4819      for big-endian AAPCS.  */
4820   if (arm_return_in_msb (type))
4821     {
4822       HOST_WIDE_INT size = int_size_in_bytes (type);
4823       if (size % UNITS_PER_WORD != 0)
4824         {
4825           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4826           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4827         }
4828     }
4829
4830   return arm_libcall_value_1 (mode);
4831 }
4832
4833 /* libcall hashtable helpers.  */
4834
4835 struct libcall_hasher : typed_noop_remove <rtx_def>
4836 {
4837   typedef rtx_def value_type;
4838   typedef rtx_def compare_type;
4839   static inline hashval_t hash (const value_type *);
4840   static inline bool equal (const value_type *, const compare_type *);
4841   static inline void remove (value_type *);
4842 };
4843
4844 inline bool
4845 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4846 {
4847   return rtx_equal_p (p1, p2);
4848 }
4849
4850 inline hashval_t
4851 libcall_hasher::hash (const value_type *p1)
4852 {
4853   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4854 }
4855
4856 typedef hash_table<libcall_hasher> libcall_table_type;
4857
4858 static void
4859 add_libcall (libcall_table_type *htab, rtx libcall)
4860 {
4861   *htab->find_slot (libcall, INSERT) = libcall;
4862 }
4863
4864 static bool
4865 arm_libcall_uses_aapcs_base (const_rtx libcall)
4866 {
4867   static bool init_done = false;
4868   static libcall_table_type *libcall_htab = NULL;
4869
4870   if (!init_done)
4871     {
4872       init_done = true;
4873
4874       libcall_htab = new libcall_table_type (31);
4875       add_libcall (libcall_htab,
4876                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4877       add_libcall (libcall_htab,
4878                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4879       add_libcall (libcall_htab,
4880                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4881       add_libcall (libcall_htab,
4882                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4883
4884       add_libcall (libcall_htab,
4885                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4886       add_libcall (libcall_htab,
4887                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4888       add_libcall (libcall_htab,
4889                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4890       add_libcall (libcall_htab,
4891                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4892
4893       add_libcall (libcall_htab,
4894                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
4895       add_libcall (libcall_htab,
4896                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4897       add_libcall (libcall_htab,
4898                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
4899       add_libcall (libcall_htab,
4900                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
4901       add_libcall (libcall_htab,
4902                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
4903       add_libcall (libcall_htab,
4904                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
4905       add_libcall (libcall_htab,
4906                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
4907       add_libcall (libcall_htab,
4908                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
4909
4910       /* Values from double-precision helper functions are returned in core
4911          registers if the selected core only supports single-precision
4912          arithmetic, even if we are using the hard-float ABI.  The same is
4913          true for single-precision helpers, but we will never be using the
4914          hard-float ABI on a CPU which doesn't support single-precision
4915          operations in hardware.  */
4916       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4917       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4918       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4919       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4920       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4921       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4922       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4923       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4924       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4925       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4926       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4927       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4928                                                         SFmode));
4929       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4930                                                         DFmode));
4931     }
4932
4933   return libcall && libcall_htab->find (libcall) != NULL;
4934 }
4935
4936 static rtx
4937 arm_libcall_value_1 (machine_mode mode)
4938 {
4939   if (TARGET_AAPCS_BASED)
4940     return aapcs_libcall_value (mode);
4941   else if (TARGET_IWMMXT_ABI
4942            && arm_vector_mode_supported_p (mode))
4943     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4944   else
4945     return gen_rtx_REG (mode, ARG_REGISTER (1));
4946 }
4947
4948 /* Define how to find the value returned by a library function
4949    assuming the value has mode MODE.  */
4950
4951 static rtx
4952 arm_libcall_value (machine_mode mode, const_rtx libcall)
4953 {
4954   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4955       && GET_MODE_CLASS (mode) == MODE_FLOAT)
4956     {
4957       /* The following libcalls return their result in integer registers,
4958          even though they return a floating point value.  */
4959       if (arm_libcall_uses_aapcs_base (libcall))
4960         return gen_rtx_REG (mode, ARG_REGISTER(1));
4961
4962     }
4963
4964   return arm_libcall_value_1 (mode);
4965 }
4966
4967 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
4968
4969 static bool
4970 arm_function_value_regno_p (const unsigned int regno)
4971 {
4972   if (regno == ARG_REGISTER (1)
4973       || (TARGET_32BIT
4974           && TARGET_AAPCS_BASED
4975           && TARGET_VFP
4976           && TARGET_HARD_FLOAT
4977           && regno == FIRST_VFP_REGNUM)
4978       || (TARGET_IWMMXT_ABI
4979           && regno == FIRST_IWMMXT_REGNUM))
4980     return true;
4981
4982   return false;
4983 }
4984
4985 /* Determine the amount of memory needed to store the possible return
4986    registers of an untyped call.  */
4987 int
4988 arm_apply_result_size (void)
4989 {
4990   int size = 16;
4991
4992   if (TARGET_32BIT)
4993     {
4994       if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4995         size += 32;
4996       if (TARGET_IWMMXT_ABI)
4997         size += 8;
4998     }
4999
5000   return size;
5001 }
5002
5003 /* Decide whether TYPE should be returned in memory (true)
5004    or in a register (false).  FNTYPE is the type of the function making
5005    the call.  */
5006 static bool
5007 arm_return_in_memory (const_tree type, const_tree fntype)
5008 {
5009   HOST_WIDE_INT size;
5010
5011   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5012
5013   if (TARGET_AAPCS_BASED)
5014     {
5015       /* Simple, non-aggregate types (ie not including vectors and
5016          complex) are always returned in a register (or registers).
5017          We don't care about which register here, so we can short-cut
5018          some of the detail.  */
5019       if (!AGGREGATE_TYPE_P (type)
5020           && TREE_CODE (type) != VECTOR_TYPE
5021           && TREE_CODE (type) != COMPLEX_TYPE)
5022         return false;
5023
5024       /* Any return value that is no larger than one word can be
5025          returned in r0.  */
5026       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5027         return false;
5028
5029       /* Check any available co-processors to see if they accept the
5030          type as a register candidate (VFP, for example, can return
5031          some aggregates in consecutive registers).  These aren't
5032          available if the call is variadic.  */
5033       if (aapcs_select_return_coproc (type, fntype) >= 0)
5034         return false;
5035
5036       /* Vector values should be returned using ARM registers, not
5037          memory (unless they're over 16 bytes, which will break since
5038          we only have four call-clobbered registers to play with).  */
5039       if (TREE_CODE (type) == VECTOR_TYPE)
5040         return (size < 0 || size > (4 * UNITS_PER_WORD));
5041
5042       /* The rest go in memory.  */
5043       return true;
5044     }
5045
5046   if (TREE_CODE (type) == VECTOR_TYPE)
5047     return (size < 0 || size > (4 * UNITS_PER_WORD));
5048
5049   if (!AGGREGATE_TYPE_P (type) &&
5050       (TREE_CODE (type) != VECTOR_TYPE))
5051     /* All simple types are returned in registers.  */
5052     return false;
5053
5054   if (arm_abi != ARM_ABI_APCS)
5055     {
5056       /* ATPCS and later return aggregate types in memory only if they are
5057          larger than a word (or are variable size).  */
5058       return (size < 0 || size > UNITS_PER_WORD);
5059     }
5060
5061   /* For the arm-wince targets we choose to be compatible with Microsoft's
5062      ARM and Thumb compilers, which always return aggregates in memory.  */
5063 #ifndef ARM_WINCE
5064   /* All structures/unions bigger than one word are returned in memory.
5065      Also catch the case where int_size_in_bytes returns -1.  In this case
5066      the aggregate is either huge or of variable size, and in either case
5067      we will want to return it via memory and not in a register.  */
5068   if (size < 0 || size > UNITS_PER_WORD)
5069     return true;
5070
5071   if (TREE_CODE (type) == RECORD_TYPE)
5072     {
5073       tree field;
5074
5075       /* For a struct the APCS says that we only return in a register
5076          if the type is 'integer like' and every addressable element
5077          has an offset of zero.  For practical purposes this means
5078          that the structure can have at most one non bit-field element
5079          and that this element must be the first one in the structure.  */
5080
5081       /* Find the first field, ignoring non FIELD_DECL things which will
5082          have been created by C++.  */
5083       for (field = TYPE_FIELDS (type);
5084            field && TREE_CODE (field) != FIELD_DECL;
5085            field = DECL_CHAIN (field))
5086         continue;
5087
5088       if (field == NULL)
5089         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5090
5091       /* Check that the first field is valid for returning in a register.  */
5092
5093       /* ... Floats are not allowed */
5094       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5095         return true;
5096
5097       /* ... Aggregates that are not themselves valid for returning in
5098          a register are not allowed.  */
5099       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5100         return true;
5101
5102       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5103          since they are not addressable.  */
5104       for (field = DECL_CHAIN (field);
5105            field;
5106            field = DECL_CHAIN (field))
5107         {
5108           if (TREE_CODE (field) != FIELD_DECL)
5109             continue;
5110
5111           if (!DECL_BIT_FIELD_TYPE (field))
5112             return true;
5113         }
5114
5115       return false;
5116     }
5117
5118   if (TREE_CODE (type) == UNION_TYPE)
5119     {
5120       tree field;
5121
5122       /* Unions can be returned in registers if every element is
5123          integral, or can be returned in an integer register.  */
5124       for (field = TYPE_FIELDS (type);
5125            field;
5126            field = DECL_CHAIN (field))
5127         {
5128           if (TREE_CODE (field) != FIELD_DECL)
5129             continue;
5130
5131           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5132             return true;
5133
5134           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5135             return true;
5136         }
5137
5138       return false;
5139     }
5140 #endif /* not ARM_WINCE */
5141
5142   /* Return all other types in memory.  */
5143   return true;
5144 }
5145
5146 const struct pcs_attribute_arg
5147 {
5148   const char *arg;
5149   enum arm_pcs value;
5150 } pcs_attribute_args[] =
5151   {
5152     {"aapcs", ARM_PCS_AAPCS},
5153     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5154 #if 0
5155     /* We could recognize these, but changes would be needed elsewhere
5156      * to implement them.  */
5157     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5158     {"atpcs", ARM_PCS_ATPCS},
5159     {"apcs", ARM_PCS_APCS},
5160 #endif
5161     {NULL, ARM_PCS_UNKNOWN}
5162   };
5163
5164 static enum arm_pcs
5165 arm_pcs_from_attribute (tree attr)
5166 {
5167   const struct pcs_attribute_arg *ptr;
5168   const char *arg;
5169
5170   /* Get the value of the argument.  */
5171   if (TREE_VALUE (attr) == NULL_TREE
5172       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5173     return ARM_PCS_UNKNOWN;
5174
5175   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5176
5177   /* Check it against the list of known arguments.  */
5178   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5179     if (streq (arg, ptr->arg))
5180       return ptr->value;
5181
5182   /* An unrecognized interrupt type.  */
5183   return ARM_PCS_UNKNOWN;
5184 }
5185
5186 /* Get the PCS variant to use for this call.  TYPE is the function's type
5187    specification, DECL is the specific declartion.  DECL may be null if
5188    the call could be indirect or if this is a library call.  */
5189 static enum arm_pcs
5190 arm_get_pcs_model (const_tree type, const_tree decl)
5191 {
5192   bool user_convention = false;
5193   enum arm_pcs user_pcs = arm_pcs_default;
5194   tree attr;
5195
5196   gcc_assert (type);
5197
5198   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5199   if (attr)
5200     {
5201       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5202       user_convention = true;
5203     }
5204
5205   if (TARGET_AAPCS_BASED)
5206     {
5207       /* Detect varargs functions.  These always use the base rules
5208          (no argument is ever a candidate for a co-processor
5209          register).  */
5210       bool base_rules = stdarg_p (type);
5211
5212       if (user_convention)
5213         {
5214           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5215             sorry ("non-AAPCS derived PCS variant");
5216           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5217             error ("variadic functions must use the base AAPCS variant");
5218         }
5219
5220       if (base_rules)
5221         return ARM_PCS_AAPCS;
5222       else if (user_convention)
5223         return user_pcs;
5224       else if (decl && flag_unit_at_a_time)
5225         {
5226           /* Local functions never leak outside this compilation unit,
5227              so we are free to use whatever conventions are
5228              appropriate.  */
5229           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5230           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5231           if (i && i->local)
5232             return ARM_PCS_AAPCS_LOCAL;
5233         }
5234     }
5235   else if (user_convention && user_pcs != arm_pcs_default)
5236     sorry ("PCS variant");
5237
5238   /* For everything else we use the target's default.  */
5239   return arm_pcs_default;
5240 }
5241
5242
5243 static void
5244 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5245                     const_tree fntype ATTRIBUTE_UNUSED,
5246                     rtx libcall ATTRIBUTE_UNUSED,
5247                     const_tree fndecl ATTRIBUTE_UNUSED)
5248 {
5249   /* Record the unallocated VFP registers.  */
5250   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5251   pcum->aapcs_vfp_reg_alloc = 0;
5252 }
5253
5254 /* Walk down the type tree of TYPE counting consecutive base elements.
5255    If *MODEP is VOIDmode, then set it to the first valid floating point
5256    type.  If a non-floating point type is found, or if a floating point
5257    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5258    otherwise return the count in the sub-tree.  */
5259 static int
5260 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5261 {
5262   machine_mode mode;
5263   HOST_WIDE_INT size;
5264
5265   switch (TREE_CODE (type))
5266     {
5267     case REAL_TYPE:
5268       mode = TYPE_MODE (type);
5269       if (mode != DFmode && mode != SFmode)
5270         return -1;
5271
5272       if (*modep == VOIDmode)
5273         *modep = mode;
5274
5275       if (*modep == mode)
5276         return 1;
5277
5278       break;
5279
5280     case COMPLEX_TYPE:
5281       mode = TYPE_MODE (TREE_TYPE (type));
5282       if (mode != DFmode && mode != SFmode)
5283         return -1;
5284
5285       if (*modep == VOIDmode)
5286         *modep = mode;
5287
5288       if (*modep == mode)
5289         return 2;
5290
5291       break;
5292
5293     case VECTOR_TYPE:
5294       /* Use V2SImode and V4SImode as representatives of all 64-bit
5295          and 128-bit vector types, whether or not those modes are
5296          supported with the present options.  */
5297       size = int_size_in_bytes (type);
5298       switch (size)
5299         {
5300         case 8:
5301           mode = V2SImode;
5302           break;
5303         case 16:
5304           mode = V4SImode;
5305           break;
5306         default:
5307           return -1;
5308         }
5309
5310       if (*modep == VOIDmode)
5311         *modep = mode;
5312
5313       /* Vector modes are considered to be opaque: two vectors are
5314          equivalent for the purposes of being homogeneous aggregates
5315          if they are the same size.  */
5316       if (*modep == mode)
5317         return 1;
5318
5319       break;
5320
5321     case ARRAY_TYPE:
5322       {
5323         int count;
5324         tree index = TYPE_DOMAIN (type);
5325
5326         /* Can't handle incomplete types nor sizes that are not
5327            fixed.  */
5328         if (!COMPLETE_TYPE_P (type)
5329             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5330           return -1;
5331
5332         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5333         if (count == -1
5334             || !index
5335             || !TYPE_MAX_VALUE (index)
5336             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5337             || !TYPE_MIN_VALUE (index)
5338             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5339             || count < 0)
5340           return -1;
5341
5342         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5343                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5344
5345         /* There must be no padding.  */
5346         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5347           return -1;
5348
5349         return count;
5350       }
5351
5352     case RECORD_TYPE:
5353       {
5354         int count = 0;
5355         int sub_count;
5356         tree field;
5357
5358         /* Can't handle incomplete types nor sizes that are not
5359            fixed.  */
5360         if (!COMPLETE_TYPE_P (type)
5361             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5362           return -1;
5363
5364         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5365           {
5366             if (TREE_CODE (field) != FIELD_DECL)
5367               continue;
5368
5369             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5370             if (sub_count < 0)
5371               return -1;
5372             count += sub_count;
5373           }
5374
5375         /* There must be no padding.  */
5376         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5377           return -1;
5378
5379         return count;
5380       }
5381
5382     case UNION_TYPE:
5383     case QUAL_UNION_TYPE:
5384       {
5385         /* These aren't very interesting except in a degenerate case.  */
5386         int count = 0;
5387         int sub_count;
5388         tree field;
5389
5390         /* Can't handle incomplete types nor sizes that are not
5391            fixed.  */
5392         if (!COMPLETE_TYPE_P (type)
5393             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5394           return -1;
5395
5396         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5397           {
5398             if (TREE_CODE (field) != FIELD_DECL)
5399               continue;
5400
5401             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5402             if (sub_count < 0)
5403               return -1;
5404             count = count > sub_count ? count : sub_count;
5405           }
5406
5407         /* There must be no padding.  */
5408         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5409           return -1;
5410
5411         return count;
5412       }
5413
5414     default:
5415       break;
5416     }
5417
5418   return -1;
5419 }
5420
5421 /* Return true if PCS_VARIANT should use VFP registers.  */
5422 static bool
5423 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5424 {
5425   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5426     {
5427       static bool seen_thumb1_vfp = false;
5428
5429       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5430         {
5431           sorry ("Thumb-1 hard-float VFP ABI");
5432           /* sorry() is not immediately fatal, so only display this once.  */
5433           seen_thumb1_vfp = true;
5434         }
5435
5436       return true;
5437     }
5438
5439   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5440     return false;
5441
5442   return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5443           (TARGET_VFP_DOUBLE || !is_double));
5444 }
5445
5446 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5447    suitable for passing or returning in VFP registers for the PCS
5448    variant selected.  If it is, then *BASE_MODE is updated to contain
5449    a machine mode describing each element of the argument's type and
5450    *COUNT to hold the number of such elements.  */
5451 static bool
5452 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5453                                        machine_mode mode, const_tree type,
5454                                        machine_mode *base_mode, int *count)
5455 {
5456   machine_mode new_mode = VOIDmode;
5457
5458   /* If we have the type information, prefer that to working things
5459      out from the mode.  */
5460   if (type)
5461     {
5462       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5463
5464       if (ag_count > 0 && ag_count <= 4)
5465         *count = ag_count;
5466       else
5467         return false;
5468     }
5469   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5470            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5471            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5472     {
5473       *count = 1;
5474       new_mode = mode;
5475     }
5476   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5477     {
5478       *count = 2;
5479       new_mode = (mode == DCmode ? DFmode : SFmode);
5480     }
5481   else
5482     return false;
5483
5484
5485   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5486     return false;
5487
5488   *base_mode = new_mode;
5489   return true;
5490 }
5491
5492 static bool
5493 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5494                                machine_mode mode, const_tree type)
5495 {
5496   int count ATTRIBUTE_UNUSED;
5497   machine_mode ag_mode ATTRIBUTE_UNUSED;
5498
5499   if (!use_vfp_abi (pcs_variant, false))
5500     return false;
5501   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5502                                                 &ag_mode, &count);
5503 }
5504
5505 static bool
5506 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5507                              const_tree type)
5508 {
5509   if (!use_vfp_abi (pcum->pcs_variant, false))
5510     return false;
5511
5512   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5513                                                 &pcum->aapcs_vfp_rmode,
5514                                                 &pcum->aapcs_vfp_rcount);
5515 }
5516
5517 static bool
5518 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5519                     const_tree type  ATTRIBUTE_UNUSED)
5520 {
5521   int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5522   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5523   int regno;
5524
5525   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5526     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5527       {
5528         pcum->aapcs_vfp_reg_alloc = mask << regno;
5529         if (mode == BLKmode
5530             || (mode == TImode && ! TARGET_NEON)
5531             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5532           {
5533             int i;
5534             int rcount = pcum->aapcs_vfp_rcount;
5535             int rshift = shift;
5536             machine_mode rmode = pcum->aapcs_vfp_rmode;
5537             rtx par;
5538             if (!TARGET_NEON)
5539               {
5540                 /* Avoid using unsupported vector modes.  */
5541                 if (rmode == V2SImode)
5542                   rmode = DImode;
5543                 else if (rmode == V4SImode)
5544                   {
5545                     rmode = DImode;
5546                     rcount *= 2;
5547                     rshift /= 2;
5548                   }
5549               }
5550             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5551             for (i = 0; i < rcount; i++)
5552               {
5553                 rtx tmp = gen_rtx_REG (rmode,
5554                                        FIRST_VFP_REGNUM + regno + i * rshift);
5555                 tmp = gen_rtx_EXPR_LIST
5556                   (VOIDmode, tmp,
5557                    GEN_INT (i * GET_MODE_SIZE (rmode)));
5558                 XVECEXP (par, 0, i) = tmp;
5559               }
5560
5561             pcum->aapcs_reg = par;
5562           }
5563         else
5564           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5565         return true;
5566       }
5567   return false;
5568 }
5569
5570 static rtx
5571 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5572                                machine_mode mode,
5573                                const_tree type ATTRIBUTE_UNUSED)
5574 {
5575   if (!use_vfp_abi (pcs_variant, false))
5576     return NULL;
5577
5578   if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5579     {
5580       int count;
5581       machine_mode ag_mode;
5582       int i;
5583       rtx par;
5584       int shift;
5585
5586       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5587                                              &ag_mode, &count);
5588
5589       if (!TARGET_NEON)
5590         {
5591           if (ag_mode == V2SImode)
5592             ag_mode = DImode;
5593           else if (ag_mode == V4SImode)
5594             {
5595               ag_mode = DImode;
5596               count *= 2;
5597             }
5598         }
5599       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5600       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5601       for (i = 0; i < count; i++)
5602         {
5603           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5604           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5605                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5606           XVECEXP (par, 0, i) = tmp;
5607         }
5608
5609       return par;
5610     }
5611
5612   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5613 }
5614
5615 static void
5616 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5617                    machine_mode mode  ATTRIBUTE_UNUSED,
5618                    const_tree type  ATTRIBUTE_UNUSED)
5619 {
5620   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5621   pcum->aapcs_vfp_reg_alloc = 0;
5622   return;
5623 }
5624
5625 #define AAPCS_CP(X)                             \
5626   {                                             \
5627     aapcs_ ## X ## _cum_init,                   \
5628     aapcs_ ## X ## _is_call_candidate,          \
5629     aapcs_ ## X ## _allocate,                   \
5630     aapcs_ ## X ## _is_return_candidate,        \
5631     aapcs_ ## X ## _allocate_return_reg,        \
5632     aapcs_ ## X ## _advance                     \
5633   }
5634
5635 /* Table of co-processors that can be used to pass arguments in
5636    registers.  Idealy no arugment should be a candidate for more than
5637    one co-processor table entry, but the table is processed in order
5638    and stops after the first match.  If that entry then fails to put
5639    the argument into a co-processor register, the argument will go on
5640    the stack.  */
5641 static struct
5642 {
5643   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
5644   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5645
5646   /* Return true if an argument of mode MODE (or type TYPE if MODE is
5647      BLKmode) is a candidate for this co-processor's registers; this
5648      function should ignore any position-dependent state in
5649      CUMULATIVE_ARGS and only use call-type dependent information.  */
5650   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5651
5652   /* Return true if the argument does get a co-processor register; it
5653      should set aapcs_reg to an RTX of the register allocated as is
5654      required for a return from FUNCTION_ARG.  */
5655   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5656
5657   /* Return true if a result of mode MODE (or type TYPE if MODE is
5658      BLKmode) is can be returned in this co-processor's registers.  */
5659   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5660
5661   /* Allocate and return an RTX element to hold the return type of a
5662      call, this routine must not fail and will only be called if
5663      is_return_candidate returned true with the same parameters.  */
5664   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5665
5666   /* Finish processing this argument and prepare to start processing
5667      the next one.  */
5668   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5669 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5670   {
5671     AAPCS_CP(vfp)
5672   };
5673
5674 #undef AAPCS_CP
5675
5676 static int
5677 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5678                           const_tree type)
5679 {
5680   int i;
5681
5682   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5683     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5684       return i;
5685
5686   return -1;
5687 }
5688
5689 static int
5690 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5691 {
5692   /* We aren't passed a decl, so we can't check that a call is local.
5693      However, it isn't clear that that would be a win anyway, since it
5694      might limit some tail-calling opportunities.  */
5695   enum arm_pcs pcs_variant;
5696
5697   if (fntype)
5698     {
5699       const_tree fndecl = NULL_TREE;
5700
5701       if (TREE_CODE (fntype) == FUNCTION_DECL)
5702         {
5703           fndecl = fntype;
5704           fntype = TREE_TYPE (fntype);
5705         }
5706
5707       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5708     }
5709   else
5710     pcs_variant = arm_pcs_default;
5711
5712   if (pcs_variant != ARM_PCS_AAPCS)
5713     {
5714       int i;
5715
5716       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5717         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5718                                                         TYPE_MODE (type),
5719                                                         type))
5720           return i;
5721     }
5722   return -1;
5723 }
5724
5725 static rtx
5726 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5727                            const_tree fntype)
5728 {
5729   /* We aren't passed a decl, so we can't check that a call is local.
5730      However, it isn't clear that that would be a win anyway, since it
5731      might limit some tail-calling opportunities.  */
5732   enum arm_pcs pcs_variant;
5733   int unsignedp ATTRIBUTE_UNUSED;
5734
5735   if (fntype)
5736     {
5737       const_tree fndecl = NULL_TREE;
5738
5739       if (TREE_CODE (fntype) == FUNCTION_DECL)
5740         {
5741           fndecl = fntype;
5742           fntype = TREE_TYPE (fntype);
5743         }
5744
5745       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5746     }
5747   else
5748     pcs_variant = arm_pcs_default;
5749
5750   /* Promote integer types.  */
5751   if (type && INTEGRAL_TYPE_P (type))
5752     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5753
5754   if (pcs_variant != ARM_PCS_AAPCS)
5755     {
5756       int i;
5757
5758       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5759         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5760                                                         type))
5761           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5762                                                              mode, type);
5763     }
5764
5765   /* Promotes small structs returned in a register to full-word size
5766      for big-endian AAPCS.  */
5767   if (type && arm_return_in_msb (type))
5768     {
5769       HOST_WIDE_INT size = int_size_in_bytes (type);
5770       if (size % UNITS_PER_WORD != 0)
5771         {
5772           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5773           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5774         }
5775     }
5776
5777   return gen_rtx_REG (mode, R0_REGNUM);
5778 }
5779
5780 static rtx
5781 aapcs_libcall_value (machine_mode mode)
5782 {
5783   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5784       && GET_MODE_SIZE (mode) <= 4)
5785     mode = SImode;
5786
5787   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5788 }
5789
5790 /* Lay out a function argument using the AAPCS rules.  The rule
5791    numbers referred to here are those in the AAPCS.  */
5792 static void
5793 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5794                   const_tree type, bool named)
5795 {
5796   int nregs, nregs2;
5797   int ncrn;
5798
5799   /* We only need to do this once per argument.  */
5800   if (pcum->aapcs_arg_processed)
5801     return;
5802
5803   pcum->aapcs_arg_processed = true;
5804
5805   /* Special case: if named is false then we are handling an incoming
5806      anonymous argument which is on the stack.  */
5807   if (!named)
5808     return;
5809
5810   /* Is this a potential co-processor register candidate?  */
5811   if (pcum->pcs_variant != ARM_PCS_AAPCS)
5812     {
5813       int slot = aapcs_select_call_coproc (pcum, mode, type);
5814       pcum->aapcs_cprc_slot = slot;
5815
5816       /* We don't have to apply any of the rules from part B of the
5817          preparation phase, these are handled elsewhere in the
5818          compiler.  */
5819
5820       if (slot >= 0)
5821         {
5822           /* A Co-processor register candidate goes either in its own
5823              class of registers or on the stack.  */
5824           if (!pcum->aapcs_cprc_failed[slot])
5825             {
5826               /* C1.cp - Try to allocate the argument to co-processor
5827                  registers.  */
5828               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5829                 return;
5830
5831               /* C2.cp - Put the argument on the stack and note that we
5832                  can't assign any more candidates in this slot.  We also
5833                  need to note that we have allocated stack space, so that
5834                  we won't later try to split a non-cprc candidate between
5835                  core registers and the stack.  */
5836               pcum->aapcs_cprc_failed[slot] = true;
5837               pcum->can_split = false;
5838             }
5839
5840           /* We didn't get a register, so this argument goes on the
5841              stack.  */
5842           gcc_assert (pcum->can_split == false);
5843           return;
5844         }
5845     }
5846
5847   /* C3 - For double-word aligned arguments, round the NCRN up to the
5848      next even number.  */
5849   ncrn = pcum->aapcs_ncrn;
5850   if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5851     ncrn++;
5852
5853   nregs = ARM_NUM_REGS2(mode, type);
5854
5855   /* Sigh, this test should really assert that nregs > 0, but a GCC
5856      extension allows empty structs and then gives them empty size; it
5857      then allows such a structure to be passed by value.  For some of
5858      the code below we have to pretend that such an argument has
5859      non-zero size so that we 'locate' it correctly either in
5860      registers or on the stack.  */
5861   gcc_assert (nregs >= 0);
5862
5863   nregs2 = nregs ? nregs : 1;
5864
5865   /* C4 - Argument fits entirely in core registers.  */
5866   if (ncrn + nregs2 <= NUM_ARG_REGS)
5867     {
5868       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5869       pcum->aapcs_next_ncrn = ncrn + nregs;
5870       return;
5871     }
5872
5873   /* C5 - Some core registers left and there are no arguments already
5874      on the stack: split this argument between the remaining core
5875      registers and the stack.  */
5876   if (ncrn < NUM_ARG_REGS && pcum->can_split)
5877     {
5878       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5879       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5880       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5881       return;
5882     }
5883
5884   /* C6 - NCRN is set to 4.  */
5885   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5886
5887   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
5888   return;
5889 }
5890
5891 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5892    for a call to a function whose data type is FNTYPE.
5893    For a library call, FNTYPE is NULL.  */
5894 void
5895 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5896                           rtx libname,
5897                           tree fndecl ATTRIBUTE_UNUSED)
5898 {
5899   /* Long call handling.  */
5900   if (fntype)
5901     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5902   else
5903     pcum->pcs_variant = arm_pcs_default;
5904
5905   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5906     {
5907       if (arm_libcall_uses_aapcs_base (libname))
5908         pcum->pcs_variant = ARM_PCS_AAPCS;
5909
5910       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5911       pcum->aapcs_reg = NULL_RTX;
5912       pcum->aapcs_partial = 0;
5913       pcum->aapcs_arg_processed = false;
5914       pcum->aapcs_cprc_slot = -1;
5915       pcum->can_split = true;
5916
5917       if (pcum->pcs_variant != ARM_PCS_AAPCS)
5918         {
5919           int i;
5920
5921           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5922             {
5923               pcum->aapcs_cprc_failed[i] = false;
5924               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5925             }
5926         }
5927       return;
5928     }
5929
5930   /* Legacy ABIs */
5931
5932   /* On the ARM, the offset starts at 0.  */
5933   pcum->nregs = 0;
5934   pcum->iwmmxt_nregs = 0;
5935   pcum->can_split = true;
5936
5937   /* Varargs vectors are treated the same as long long.
5938      named_count avoids having to change the way arm handles 'named' */
5939   pcum->named_count = 0;
5940   pcum->nargs = 0;
5941
5942   if (TARGET_REALLY_IWMMXT && fntype)
5943     {
5944       tree fn_arg;
5945
5946       for (fn_arg = TYPE_ARG_TYPES (fntype);
5947            fn_arg;
5948            fn_arg = TREE_CHAIN (fn_arg))
5949         pcum->named_count += 1;
5950
5951       if (! pcum->named_count)
5952         pcum->named_count = INT_MAX;
5953     }
5954 }
5955
5956 /* Return true if we use LRA instead of reload pass.  */
5957 static bool
5958 arm_lra_p (void)
5959 {
5960   return arm_lra_flag;
5961 }
5962
5963 /* Return true if mode/type need doubleword alignment.  */
5964 static bool
5965 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5966 {
5967   return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5968           || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5969 }
5970
5971
5972 /* Determine where to put an argument to a function.
5973    Value is zero to push the argument on the stack,
5974    or a hard register in which to store the argument.
5975
5976    MODE is the argument's machine mode.
5977    TYPE is the data type of the argument (as a tree).
5978     This is null for libcalls where that information may
5979     not be available.
5980    CUM is a variable of type CUMULATIVE_ARGS which gives info about
5981     the preceding args and about the function being called.
5982    NAMED is nonzero if this argument is a named parameter
5983     (otherwise it is an extra parameter matching an ellipsis).
5984
5985    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5986    other arguments are passed on the stack.  If (NAMED == 0) (which happens
5987    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5988    defined), say it is passed in the stack (function_prologue will
5989    indeed make it pass in the stack if necessary).  */
5990
5991 static rtx
5992 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5993                   const_tree type, bool named)
5994 {
5995   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5996   int nregs;
5997
5998   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
5999      a call insn (op3 of a call_value insn).  */
6000   if (mode == VOIDmode)
6001     return const0_rtx;
6002
6003   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6004     {
6005       aapcs_layout_arg (pcum, mode, type, named);
6006       return pcum->aapcs_reg;
6007     }
6008
6009   /* Varargs vectors are treated the same as long long.
6010      named_count avoids having to change the way arm handles 'named' */
6011   if (TARGET_IWMMXT_ABI
6012       && arm_vector_mode_supported_p (mode)
6013       && pcum->named_count > pcum->nargs + 1)
6014     {
6015       if (pcum->iwmmxt_nregs <= 9)
6016         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6017       else
6018         {
6019           pcum->can_split = false;
6020           return NULL_RTX;
6021         }
6022     }
6023
6024   /* Put doubleword aligned quantities in even register pairs.  */
6025   if (pcum->nregs & 1
6026       && ARM_DOUBLEWORD_ALIGN
6027       && arm_needs_doubleword_align (mode, type))
6028     pcum->nregs++;
6029
6030   /* Only allow splitting an arg between regs and memory if all preceding
6031      args were allocated to regs.  For args passed by reference we only count
6032      the reference pointer.  */
6033   if (pcum->can_split)
6034     nregs = 1;
6035   else
6036     nregs = ARM_NUM_REGS2 (mode, type);
6037
6038   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6039     return NULL_RTX;
6040
6041   return gen_rtx_REG (mode, pcum->nregs);
6042 }
6043
6044 static unsigned int
6045 arm_function_arg_boundary (machine_mode mode, const_tree type)
6046 {
6047   return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6048           ? DOUBLEWORD_ALIGNMENT
6049           : PARM_BOUNDARY);
6050 }
6051
6052 static int
6053 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6054                        tree type, bool named)
6055 {
6056   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6057   int nregs = pcum->nregs;
6058
6059   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6060     {
6061       aapcs_layout_arg (pcum, mode, type, named);
6062       return pcum->aapcs_partial;
6063     }
6064
6065   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6066     return 0;
6067
6068   if (NUM_ARG_REGS > nregs
6069       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6070       && pcum->can_split)
6071     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6072
6073   return 0;
6074 }
6075
6076 /* Update the data in PCUM to advance over an argument
6077    of mode MODE and data type TYPE.
6078    (TYPE is null for libcalls where that information may not be available.)  */
6079
6080 static void
6081 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6082                           const_tree type, bool named)
6083 {
6084   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6085
6086   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6087     {
6088       aapcs_layout_arg (pcum, mode, type, named);
6089
6090       if (pcum->aapcs_cprc_slot >= 0)
6091         {
6092           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6093                                                               type);
6094           pcum->aapcs_cprc_slot = -1;
6095         }
6096
6097       /* Generic stuff.  */
6098       pcum->aapcs_arg_processed = false;
6099       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6100       pcum->aapcs_reg = NULL_RTX;
6101       pcum->aapcs_partial = 0;
6102     }
6103   else
6104     {
6105       pcum->nargs += 1;
6106       if (arm_vector_mode_supported_p (mode)
6107           && pcum->named_count > pcum->nargs
6108           && TARGET_IWMMXT_ABI)
6109         pcum->iwmmxt_nregs += 1;
6110       else
6111         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6112     }
6113 }
6114
6115 /* Variable sized types are passed by reference.  This is a GCC
6116    extension to the ARM ABI.  */
6117
6118 static bool
6119 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6120                        machine_mode mode ATTRIBUTE_UNUSED,
6121                        const_tree type, bool named ATTRIBUTE_UNUSED)
6122 {
6123   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6124 }
6125 \f
6126 /* Encode the current state of the #pragma [no_]long_calls.  */
6127 typedef enum
6128 {
6129   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6130   LONG,         /* #pragma long_calls is in effect.  */
6131   SHORT         /* #pragma no_long_calls is in effect.  */
6132 } arm_pragma_enum;
6133
6134 static arm_pragma_enum arm_pragma_long_calls = OFF;
6135
6136 void
6137 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6138 {
6139   arm_pragma_long_calls = LONG;
6140 }
6141
6142 void
6143 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6144 {
6145   arm_pragma_long_calls = SHORT;
6146 }
6147
6148 void
6149 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6150 {
6151   arm_pragma_long_calls = OFF;
6152 }
6153 \f
6154 /* Handle an attribute requiring a FUNCTION_DECL;
6155    arguments as in struct attribute_spec.handler.  */
6156 static tree
6157 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6158                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6159 {
6160   if (TREE_CODE (*node) != FUNCTION_DECL)
6161     {
6162       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6163                name);
6164       *no_add_attrs = true;
6165     }
6166
6167   return NULL_TREE;
6168 }
6169
6170 /* Handle an "interrupt" or "isr" attribute;
6171    arguments as in struct attribute_spec.handler.  */
6172 static tree
6173 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6174                           bool *no_add_attrs)
6175 {
6176   if (DECL_P (*node))
6177     {
6178       if (TREE_CODE (*node) != FUNCTION_DECL)
6179         {
6180           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6181                    name);
6182           *no_add_attrs = true;
6183         }
6184       /* FIXME: the argument if any is checked for type attributes;
6185          should it be checked for decl ones?  */
6186     }
6187   else
6188     {
6189       if (TREE_CODE (*node) == FUNCTION_TYPE
6190           || TREE_CODE (*node) == METHOD_TYPE)
6191         {
6192           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6193             {
6194               warning (OPT_Wattributes, "%qE attribute ignored",
6195                        name);
6196               *no_add_attrs = true;
6197             }
6198         }
6199       else if (TREE_CODE (*node) == POINTER_TYPE
6200                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6201                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6202                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6203         {
6204           *node = build_variant_type_copy (*node);
6205           TREE_TYPE (*node) = build_type_attribute_variant
6206             (TREE_TYPE (*node),
6207              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6208           *no_add_attrs = true;
6209         }
6210       else
6211         {
6212           /* Possibly pass this attribute on from the type to a decl.  */
6213           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6214                        | (int) ATTR_FLAG_FUNCTION_NEXT
6215                        | (int) ATTR_FLAG_ARRAY_NEXT))
6216             {
6217               *no_add_attrs = true;
6218               return tree_cons (name, args, NULL_TREE);
6219             }
6220           else
6221             {
6222               warning (OPT_Wattributes, "%qE attribute ignored",
6223                        name);
6224             }
6225         }
6226     }
6227
6228   return NULL_TREE;
6229 }
6230
6231 /* Handle a "pcs" attribute; arguments as in struct
6232    attribute_spec.handler.  */
6233 static tree
6234 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6235                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6236 {
6237   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6238     {
6239       warning (OPT_Wattributes, "%qE attribute ignored", name);
6240       *no_add_attrs = true;
6241     }
6242   return NULL_TREE;
6243 }
6244
6245 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6246 /* Handle the "notshared" attribute.  This attribute is another way of
6247    requesting hidden visibility.  ARM's compiler supports
6248    "__declspec(notshared)"; we support the same thing via an
6249    attribute.  */
6250
6251 static tree
6252 arm_handle_notshared_attribute (tree *node,
6253                                 tree name ATTRIBUTE_UNUSED,
6254                                 tree args ATTRIBUTE_UNUSED,
6255                                 int flags ATTRIBUTE_UNUSED,
6256                                 bool *no_add_attrs)
6257 {
6258   tree decl = TYPE_NAME (*node);
6259
6260   if (decl)
6261     {
6262       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6263       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6264       *no_add_attrs = false;
6265     }
6266   return NULL_TREE;
6267 }
6268 #endif
6269
6270 /* Return 0 if the attributes for two types are incompatible, 1 if they
6271    are compatible, and 2 if they are nearly compatible (which causes a
6272    warning to be generated).  */
6273 static int
6274 arm_comp_type_attributes (const_tree type1, const_tree type2)
6275 {
6276   int l1, l2, s1, s2;
6277
6278   /* Check for mismatch of non-default calling convention.  */
6279   if (TREE_CODE (type1) != FUNCTION_TYPE)
6280     return 1;
6281
6282   /* Check for mismatched call attributes.  */
6283   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6284   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6285   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6286   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6287
6288   /* Only bother to check if an attribute is defined.  */
6289   if (l1 | l2 | s1 | s2)
6290     {
6291       /* If one type has an attribute, the other must have the same attribute.  */
6292       if ((l1 != l2) || (s1 != s2))
6293         return 0;
6294
6295       /* Disallow mixed attributes.  */
6296       if ((l1 & s2) || (l2 & s1))
6297         return 0;
6298     }
6299
6300   /* Check for mismatched ISR attribute.  */
6301   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6302   if (! l1)
6303     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6304   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6305   if (! l2)
6306     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6307   if (l1 != l2)
6308     return 0;
6309
6310   return 1;
6311 }
6312
6313 /*  Assigns default attributes to newly defined type.  This is used to
6314     set short_call/long_call attributes for function types of
6315     functions defined inside corresponding #pragma scopes.  */
6316 static void
6317 arm_set_default_type_attributes (tree type)
6318 {
6319   /* Add __attribute__ ((long_call)) to all functions, when
6320      inside #pragma long_calls or __attribute__ ((short_call)),
6321      when inside #pragma no_long_calls.  */
6322   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6323     {
6324       tree type_attr_list, attr_name;
6325       type_attr_list = TYPE_ATTRIBUTES (type);
6326
6327       if (arm_pragma_long_calls == LONG)
6328         attr_name = get_identifier ("long_call");
6329       else if (arm_pragma_long_calls == SHORT)
6330         attr_name = get_identifier ("short_call");
6331       else
6332         return;
6333
6334       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6335       TYPE_ATTRIBUTES (type) = type_attr_list;
6336     }
6337 }
6338 \f
6339 /* Return true if DECL is known to be linked into section SECTION.  */
6340
6341 static bool
6342 arm_function_in_section_p (tree decl, section *section)
6343 {
6344   /* We can only be certain about functions defined in the same
6345      compilation unit.  */
6346   if (!TREE_STATIC (decl))
6347     return false;
6348
6349   /* Make sure that SYMBOL always binds to the definition in this
6350      compilation unit.  */
6351   if (!targetm.binds_local_p (decl))
6352     return false;
6353
6354   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
6355   if (!DECL_SECTION_NAME (decl))
6356     {
6357       /* Make sure that we will not create a unique section for DECL.  */
6358       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6359         return false;
6360     }
6361
6362   return function_section (decl) == section;
6363 }
6364
6365 /* Return nonzero if a 32-bit "long_call" should be generated for
6366    a call from the current function to DECL.  We generate a long_call
6367    if the function:
6368
6369         a.  has an __attribute__((long call))
6370      or b.  is within the scope of a #pragma long_calls
6371      or c.  the -mlong-calls command line switch has been specified
6372
6373    However we do not generate a long call if the function:
6374
6375         d.  has an __attribute__ ((short_call))
6376      or e.  is inside the scope of a #pragma no_long_calls
6377      or f.  is defined in the same section as the current function.  */
6378
6379 bool
6380 arm_is_long_call_p (tree decl)
6381 {
6382   tree attrs;
6383
6384   if (!decl)
6385     return TARGET_LONG_CALLS;
6386
6387   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6388   if (lookup_attribute ("short_call", attrs))
6389     return false;
6390
6391   /* For "f", be conservative, and only cater for cases in which the
6392      whole of the current function is placed in the same section.  */
6393   if (!flag_reorder_blocks_and_partition
6394       && TREE_CODE (decl) == FUNCTION_DECL
6395       && arm_function_in_section_p (decl, current_function_section ()))
6396     return false;
6397
6398   if (lookup_attribute ("long_call", attrs))
6399     return true;
6400
6401   return TARGET_LONG_CALLS;
6402 }
6403
6404 /* Return nonzero if it is ok to make a tail-call to DECL.  */
6405 static bool
6406 arm_function_ok_for_sibcall (tree decl, tree exp)
6407 {
6408   unsigned long func_type;
6409
6410   if (cfun->machine->sibcall_blocked)
6411     return false;
6412
6413   /* Never tailcall something if we are generating code for Thumb-1.  */
6414   if (TARGET_THUMB1)
6415     return false;
6416
6417   /* The PIC register is live on entry to VxWorks PLT entries, so we
6418      must make the call before restoring the PIC register.  */
6419   if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6420     return false;
6421
6422   /* If we are interworking and the function is not declared static
6423      then we can't tail-call it unless we know that it exists in this
6424      compilation unit (since it might be a Thumb routine).  */
6425   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6426       && !TREE_ASM_WRITTEN (decl))
6427     return false;
6428
6429   func_type = arm_current_func_type ();
6430   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
6431   if (IS_INTERRUPT (func_type))
6432     return false;
6433
6434   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6435     {
6436       /* Check that the return value locations are the same.  For
6437          example that we aren't returning a value from the sibling in
6438          a VFP register but then need to transfer it to a core
6439          register.  */
6440       rtx a, b;
6441
6442       a = arm_function_value (TREE_TYPE (exp), decl, false);
6443       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6444                               cfun->decl, false);
6445       if (!rtx_equal_p (a, b))
6446         return false;
6447     }
6448
6449   /* Never tailcall if function may be called with a misaligned SP.  */
6450   if (IS_STACKALIGN (func_type))
6451     return false;
6452
6453   /* The AAPCS says that, on bare-metal, calls to unresolved weak
6454      references should become a NOP.  Don't convert such calls into
6455      sibling calls.  */
6456   if (TARGET_AAPCS_BASED
6457       && arm_abi == ARM_ABI_AAPCS
6458       && decl
6459       && DECL_WEAK (decl))
6460     return false;
6461
6462   /* Everything else is ok.  */
6463   return true;
6464 }
6465
6466 \f
6467 /* Addressing mode support functions.  */
6468
6469 /* Return nonzero if X is a legitimate immediate operand when compiling
6470    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
6471 int
6472 legitimate_pic_operand_p (rtx x)
6473 {
6474   if (GET_CODE (x) == SYMBOL_REF
6475       || (GET_CODE (x) == CONST
6476           && GET_CODE (XEXP (x, 0)) == PLUS
6477           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6478     return 0;
6479
6480   return 1;
6481 }
6482
6483 /* Record that the current function needs a PIC register.  Initialize
6484    cfun->machine->pic_reg if we have not already done so.  */
6485
6486 static void
6487 require_pic_register (void)
6488 {
6489   /* A lot of the logic here is made obscure by the fact that this
6490      routine gets called as part of the rtx cost estimation process.
6491      We don't want those calls to affect any assumptions about the real
6492      function; and further, we can't call entry_of_function() until we
6493      start the real expansion process.  */
6494   if (!crtl->uses_pic_offset_table)
6495     {
6496       gcc_assert (can_create_pseudo_p ());
6497       if (arm_pic_register != INVALID_REGNUM
6498           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6499         {
6500           if (!cfun->machine->pic_reg)
6501             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6502
6503           /* Play games to avoid marking the function as needing pic
6504              if we are being called as part of the cost-estimation
6505              process.  */
6506           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6507             crtl->uses_pic_offset_table = 1;
6508         }
6509       else
6510         {
6511           rtx_insn *seq, *insn;
6512
6513           if (!cfun->machine->pic_reg)
6514             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6515
6516           /* Play games to avoid marking the function as needing pic
6517              if we are being called as part of the cost-estimation
6518              process.  */
6519           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6520             {
6521               crtl->uses_pic_offset_table = 1;
6522               start_sequence ();
6523
6524               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6525                   && arm_pic_register > LAST_LO_REGNUM)
6526                 emit_move_insn (cfun->machine->pic_reg,
6527                                 gen_rtx_REG (Pmode, arm_pic_register));
6528               else
6529                 arm_load_pic_register (0UL);
6530
6531               seq = get_insns ();
6532               end_sequence ();
6533
6534               for (insn = seq; insn; insn = NEXT_INSN (insn))
6535                 if (INSN_P (insn))
6536                   INSN_LOCATION (insn) = prologue_location;
6537
6538               /* We can be called during expansion of PHI nodes, where
6539                  we can't yet emit instructions directly in the final
6540                  insn stream.  Queue the insns on the entry edge, they will
6541                  be committed after everything else is expanded.  */
6542               insert_insn_on_edge (seq,
6543                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6544             }
6545         }
6546     }
6547 }
6548
6549 rtx
6550 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6551 {
6552   if (GET_CODE (orig) == SYMBOL_REF
6553       || GET_CODE (orig) == LABEL_REF)
6554     {
6555       rtx insn;
6556
6557       if (reg == 0)
6558         {
6559           gcc_assert (can_create_pseudo_p ());
6560           reg = gen_reg_rtx (Pmode);
6561         }
6562
6563       /* VxWorks does not impose a fixed gap between segments; the run-time
6564          gap can be different from the object-file gap.  We therefore can't
6565          use GOTOFF unless we are absolutely sure that the symbol is in the
6566          same segment as the GOT.  Unfortunately, the flexibility of linker
6567          scripts means that we can't be sure of that in general, so assume
6568          that GOTOFF is never valid on VxWorks.  */
6569       if ((GET_CODE (orig) == LABEL_REF
6570            || (GET_CODE (orig) == SYMBOL_REF &&
6571                SYMBOL_REF_LOCAL_P (orig)))
6572           && NEED_GOT_RELOC
6573           && arm_pic_data_is_text_relative)
6574         insn = arm_pic_static_addr (orig, reg);
6575       else
6576         {
6577           rtx pat;
6578           rtx mem;
6579
6580           /* If this function doesn't have a pic register, create one now.  */
6581           require_pic_register ();
6582
6583           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6584
6585           /* Make the MEM as close to a constant as possible.  */
6586           mem = SET_SRC (pat);
6587           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6588           MEM_READONLY_P (mem) = 1;
6589           MEM_NOTRAP_P (mem) = 1;
6590
6591           insn = emit_insn (pat);
6592         }
6593
6594       /* Put a REG_EQUAL note on this insn, so that it can be optimized
6595          by loop.  */
6596       set_unique_reg_note (insn, REG_EQUAL, orig);
6597
6598       return reg;
6599     }
6600   else if (GET_CODE (orig) == CONST)
6601     {
6602       rtx base, offset;
6603
6604       if (GET_CODE (XEXP (orig, 0)) == PLUS
6605           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6606         return orig;
6607
6608       /* Handle the case where we have: const (UNSPEC_TLS).  */
6609       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6610           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6611         return orig;
6612
6613       /* Handle the case where we have:
6614          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
6615          CONST_INT.  */
6616       if (GET_CODE (XEXP (orig, 0)) == PLUS
6617           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6618           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6619         {
6620           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6621           return orig;
6622         }
6623
6624       if (reg == 0)
6625         {
6626           gcc_assert (can_create_pseudo_p ());
6627           reg = gen_reg_rtx (Pmode);
6628         }
6629
6630       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6631
6632       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6633       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6634                                        base == reg ? 0 : reg);
6635
6636       if (CONST_INT_P (offset))
6637         {
6638           /* The base register doesn't really matter, we only want to
6639              test the index for the appropriate mode.  */
6640           if (!arm_legitimate_index_p (mode, offset, SET, 0))
6641             {
6642               gcc_assert (can_create_pseudo_p ());
6643               offset = force_reg (Pmode, offset);
6644             }
6645
6646           if (CONST_INT_P (offset))
6647             return plus_constant (Pmode, base, INTVAL (offset));
6648         }
6649
6650       if (GET_MODE_SIZE (mode) > 4
6651           && (GET_MODE_CLASS (mode) == MODE_INT
6652               || TARGET_SOFT_FLOAT))
6653         {
6654           emit_insn (gen_addsi3 (reg, base, offset));
6655           return reg;
6656         }
6657
6658       return gen_rtx_PLUS (Pmode, base, offset);
6659     }
6660
6661   return orig;
6662 }
6663
6664
6665 /* Find a spare register to use during the prolog of a function.  */
6666
6667 static int
6668 thumb_find_work_register (unsigned long pushed_regs_mask)
6669 {
6670   int reg;
6671
6672   /* Check the argument registers first as these are call-used.  The
6673      register allocation order means that sometimes r3 might be used
6674      but earlier argument registers might not, so check them all.  */
6675   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6676     if (!df_regs_ever_live_p (reg))
6677       return reg;
6678
6679   /* Before going on to check the call-saved registers we can try a couple
6680      more ways of deducing that r3 is available.  The first is when we are
6681      pushing anonymous arguments onto the stack and we have less than 4
6682      registers worth of fixed arguments(*).  In this case r3 will be part of
6683      the variable argument list and so we can be sure that it will be
6684      pushed right at the start of the function.  Hence it will be available
6685      for the rest of the prologue.
6686      (*): ie crtl->args.pretend_args_size is greater than 0.  */
6687   if (cfun->machine->uses_anonymous_args
6688       && crtl->args.pretend_args_size > 0)
6689     return LAST_ARG_REGNUM;
6690
6691   /* The other case is when we have fixed arguments but less than 4 registers
6692      worth.  In this case r3 might be used in the body of the function, but
6693      it is not being used to convey an argument into the function.  In theory
6694      we could just check crtl->args.size to see how many bytes are
6695      being passed in argument registers, but it seems that it is unreliable.
6696      Sometimes it will have the value 0 when in fact arguments are being
6697      passed.  (See testcase execute/20021111-1.c for an example).  So we also
6698      check the args_info.nregs field as well.  The problem with this field is
6699      that it makes no allowances for arguments that are passed to the
6700      function but which are not used.  Hence we could miss an opportunity
6701      when a function has an unused argument in r3.  But it is better to be
6702      safe than to be sorry.  */
6703   if (! cfun->machine->uses_anonymous_args
6704       && crtl->args.size >= 0
6705       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6706       && (TARGET_AAPCS_BASED
6707           ? crtl->args.info.aapcs_ncrn < 4
6708           : crtl->args.info.nregs < 4))
6709     return LAST_ARG_REGNUM;
6710
6711   /* Otherwise look for a call-saved register that is going to be pushed.  */
6712   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6713     if (pushed_regs_mask & (1 << reg))
6714       return reg;
6715
6716   if (TARGET_THUMB2)
6717     {
6718       /* Thumb-2 can use high regs.  */
6719       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6720         if (pushed_regs_mask & (1 << reg))
6721           return reg;
6722     }
6723   /* Something went wrong - thumb_compute_save_reg_mask()
6724      should have arranged for a suitable register to be pushed.  */
6725   gcc_unreachable ();
6726 }
6727
6728 static GTY(()) int pic_labelno;
6729
6730 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
6731    low register.  */
6732
6733 void
6734 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6735 {
6736   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6737
6738   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6739     return;
6740
6741   gcc_assert (flag_pic);
6742
6743   pic_reg = cfun->machine->pic_reg;
6744   if (TARGET_VXWORKS_RTP)
6745     {
6746       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6747       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6748       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6749
6750       emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6751
6752       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6753       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6754     }
6755   else
6756     {
6757       /* We use an UNSPEC rather than a LABEL_REF because this label
6758          never appears in the code stream.  */
6759
6760       labelno = GEN_INT (pic_labelno++);
6761       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6762       l1 = gen_rtx_CONST (VOIDmode, l1);
6763
6764       /* On the ARM the PC register contains 'dot + 8' at the time of the
6765          addition, on the Thumb it is 'dot + 4'.  */
6766       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6767       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6768                                 UNSPEC_GOTSYM_OFF);
6769       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6770
6771       if (TARGET_32BIT)
6772         {
6773           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6774         }
6775       else /* TARGET_THUMB1 */
6776         {
6777           if (arm_pic_register != INVALID_REGNUM
6778               && REGNO (pic_reg) > LAST_LO_REGNUM)
6779             {
6780               /* We will have pushed the pic register, so we should always be
6781                  able to find a work register.  */
6782               pic_tmp = gen_rtx_REG (SImode,
6783                                      thumb_find_work_register (saved_regs));
6784               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6785               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6786               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6787             }
6788           else if (arm_pic_register != INVALID_REGNUM
6789                    && arm_pic_register > LAST_LO_REGNUM
6790                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
6791             {
6792               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6793               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6794               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6795             }
6796           else
6797             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6798         }
6799     }
6800
6801   /* Need to emit this whether or not we obey regdecls,
6802      since setjmp/longjmp can cause life info to screw up.  */
6803   emit_use (pic_reg);
6804 }
6805
6806 /* Generate code to load the address of a static var when flag_pic is set.  */
6807 static rtx
6808 arm_pic_static_addr (rtx orig, rtx reg)
6809 {
6810   rtx l1, labelno, offset_rtx, insn;
6811
6812   gcc_assert (flag_pic);
6813
6814   /* We use an UNSPEC rather than a LABEL_REF because this label
6815      never appears in the code stream.  */
6816   labelno = GEN_INT (pic_labelno++);
6817   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6818   l1 = gen_rtx_CONST (VOIDmode, l1);
6819
6820   /* On the ARM the PC register contains 'dot + 8' at the time of the
6821      addition, on the Thumb it is 'dot + 4'.  */
6822   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6823   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6824                                UNSPEC_SYMBOL_OFFSET);
6825   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6826
6827   insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6828   return insn;
6829 }
6830
6831 /* Return nonzero if X is valid as an ARM state addressing register.  */
6832 static int
6833 arm_address_register_rtx_p (rtx x, int strict_p)
6834 {
6835   int regno;
6836
6837   if (!REG_P (x))
6838     return 0;
6839
6840   regno = REGNO (x);
6841
6842   if (strict_p)
6843     return ARM_REGNO_OK_FOR_BASE_P (regno);
6844
6845   return (regno <= LAST_ARM_REGNUM
6846           || regno >= FIRST_PSEUDO_REGISTER
6847           || regno == FRAME_POINTER_REGNUM
6848           || regno == ARG_POINTER_REGNUM);
6849 }
6850
6851 /* Return TRUE if this rtx is the difference of a symbol and a label,
6852    and will reduce to a PC-relative relocation in the object file.
6853    Expressions like this can be left alone when generating PIC, rather
6854    than forced through the GOT.  */
6855 static int
6856 pcrel_constant_p (rtx x)
6857 {
6858   if (GET_CODE (x) == MINUS)
6859     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6860
6861   return FALSE;
6862 }
6863
6864 /* Return true if X will surely end up in an index register after next
6865    splitting pass.  */
6866 static bool
6867 will_be_in_index_register (const_rtx x)
6868 {
6869   /* arm.md: calculate_pic_address will split this into a register.  */
6870   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6871 }
6872
6873 /* Return nonzero if X is a valid ARM state address operand.  */
6874 int
6875 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6876                                 int strict_p)
6877 {
6878   bool use_ldrd;
6879   enum rtx_code code = GET_CODE (x);
6880
6881   if (arm_address_register_rtx_p (x, strict_p))
6882     return 1;
6883
6884   use_ldrd = (TARGET_LDRD
6885               && (mode == DImode
6886                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6887
6888   if (code == POST_INC || code == PRE_DEC
6889       || ((code == PRE_INC || code == POST_DEC)
6890           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6891     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6892
6893   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6894            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6895            && GET_CODE (XEXP (x, 1)) == PLUS
6896            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6897     {
6898       rtx addend = XEXP (XEXP (x, 1), 1);
6899
6900       /* Don't allow ldrd post increment by register because it's hard
6901          to fixup invalid register choices.  */
6902       if (use_ldrd
6903           && GET_CODE (x) == POST_MODIFY
6904           && REG_P (addend))
6905         return 0;
6906
6907       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6908               && arm_legitimate_index_p (mode, addend, outer, strict_p));
6909     }
6910
6911   /* After reload constants split into minipools will have addresses
6912      from a LABEL_REF.  */
6913   else if (reload_completed
6914            && (code == LABEL_REF
6915                || (code == CONST
6916                    && GET_CODE (XEXP (x, 0)) == PLUS
6917                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6918                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6919     return 1;
6920
6921   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6922     return 0;
6923
6924   else if (code == PLUS)
6925     {
6926       rtx xop0 = XEXP (x, 0);
6927       rtx xop1 = XEXP (x, 1);
6928
6929       return ((arm_address_register_rtx_p (xop0, strict_p)
6930                && ((CONST_INT_P (xop1)
6931                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6932                    || (!strict_p && will_be_in_index_register (xop1))))
6933               || (arm_address_register_rtx_p (xop1, strict_p)
6934                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6935     }
6936
6937 #if 0
6938   /* Reload currently can't handle MINUS, so disable this for now */
6939   else if (GET_CODE (x) == MINUS)
6940     {
6941       rtx xop0 = XEXP (x, 0);
6942       rtx xop1 = XEXP (x, 1);
6943
6944       return (arm_address_register_rtx_p (xop0, strict_p)
6945               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6946     }
6947 #endif
6948
6949   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6950            && code == SYMBOL_REF
6951            && CONSTANT_POOL_ADDRESS_P (x)
6952            && ! (flag_pic
6953                  && symbol_mentioned_p (get_pool_constant (x))
6954                  && ! pcrel_constant_p (get_pool_constant (x))))
6955     return 1;
6956
6957   return 0;
6958 }
6959
6960 /* Return nonzero if X is a valid Thumb-2 address operand.  */
6961 static int
6962 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6963 {
6964   bool use_ldrd;
6965   enum rtx_code code = GET_CODE (x);
6966
6967   if (arm_address_register_rtx_p (x, strict_p))
6968     return 1;
6969
6970   use_ldrd = (TARGET_LDRD
6971               && (mode == DImode
6972                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6973
6974   if (code == POST_INC || code == PRE_DEC
6975       || ((code == PRE_INC || code == POST_DEC)
6976           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6977     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6978
6979   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6980            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6981            && GET_CODE (XEXP (x, 1)) == PLUS
6982            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6983     {
6984       /* Thumb-2 only has autoincrement by constant.  */
6985       rtx addend = XEXP (XEXP (x, 1), 1);
6986       HOST_WIDE_INT offset;
6987
6988       if (!CONST_INT_P (addend))
6989         return 0;
6990
6991       offset = INTVAL(addend);
6992       if (GET_MODE_SIZE (mode) <= 4)
6993         return (offset > -256 && offset < 256);
6994
6995       return (use_ldrd && offset > -1024 && offset < 1024
6996               && (offset & 3) == 0);
6997     }
6998
6999   /* After reload constants split into minipools will have addresses
7000      from a LABEL_REF.  */
7001   else if (reload_completed
7002            && (code == LABEL_REF
7003                || (code == CONST
7004                    && GET_CODE (XEXP (x, 0)) == PLUS
7005                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7006                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7007     return 1;
7008
7009   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7010     return 0;
7011
7012   else if (code == PLUS)
7013     {
7014       rtx xop0 = XEXP (x, 0);
7015       rtx xop1 = XEXP (x, 1);
7016
7017       return ((arm_address_register_rtx_p (xop0, strict_p)
7018                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7019                    || (!strict_p && will_be_in_index_register (xop1))))
7020               || (arm_address_register_rtx_p (xop1, strict_p)
7021                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7022     }
7023
7024   /* Normally we can assign constant values to target registers without
7025      the help of constant pool.  But there are cases we have to use constant
7026      pool like:
7027      1) assign a label to register.
7028      2) sign-extend a 8bit value to 32bit and then assign to register.
7029
7030      Constant pool access in format:
7031      (set (reg r0) (mem (symbol_ref (".LC0"))))
7032      will cause the use of literal pool (later in function arm_reorg).
7033      So here we mark such format as an invalid format, then the compiler
7034      will adjust it into:
7035      (set (reg r0) (symbol_ref (".LC0")))
7036      (set (reg r0) (mem (reg r0))).
7037      No extra register is required, and (mem (reg r0)) won't cause the use
7038      of literal pools.  */
7039   else if (arm_disable_literal_pool && code == SYMBOL_REF
7040            && CONSTANT_POOL_ADDRESS_P (x))
7041     return 0;
7042
7043   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7044            && code == SYMBOL_REF
7045            && CONSTANT_POOL_ADDRESS_P (x)
7046            && ! (flag_pic
7047                  && symbol_mentioned_p (get_pool_constant (x))
7048                  && ! pcrel_constant_p (get_pool_constant (x))))
7049     return 1;
7050
7051   return 0;
7052 }
7053
7054 /* Return nonzero if INDEX is valid for an address index operand in
7055    ARM state.  */
7056 static int
7057 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7058                         int strict_p)
7059 {
7060   HOST_WIDE_INT range;
7061   enum rtx_code code = GET_CODE (index);
7062
7063   /* Standard coprocessor addressing modes.  */
7064   if (TARGET_HARD_FLOAT
7065       && TARGET_VFP
7066       && (mode == SFmode || mode == DFmode))
7067     return (code == CONST_INT && INTVAL (index) < 1024
7068             && INTVAL (index) > -1024
7069             && (INTVAL (index) & 3) == 0);
7070
7071   /* For quad modes, we restrict the constant offset to be slightly less
7072      than what the instruction format permits.  We do this because for
7073      quad mode moves, we will actually decompose them into two separate
7074      double-mode reads or writes.  INDEX must therefore be a valid
7075      (double-mode) offset and so should INDEX+8.  */
7076   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7077     return (code == CONST_INT
7078             && INTVAL (index) < 1016
7079             && INTVAL (index) > -1024
7080             && (INTVAL (index) & 3) == 0);
7081
7082   /* We have no such constraint on double mode offsets, so we permit the
7083      full range of the instruction format.  */
7084   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7085     return (code == CONST_INT
7086             && INTVAL (index) < 1024
7087             && INTVAL (index) > -1024
7088             && (INTVAL (index) & 3) == 0);
7089
7090   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7091     return (code == CONST_INT
7092             && INTVAL (index) < 1024
7093             && INTVAL (index) > -1024
7094             && (INTVAL (index) & 3) == 0);
7095
7096   if (arm_address_register_rtx_p (index, strict_p)
7097       && (GET_MODE_SIZE (mode) <= 4))
7098     return 1;
7099
7100   if (mode == DImode || mode == DFmode)
7101     {
7102       if (code == CONST_INT)
7103         {
7104           HOST_WIDE_INT val = INTVAL (index);
7105
7106           if (TARGET_LDRD)
7107             return val > -256 && val < 256;
7108           else
7109             return val > -4096 && val < 4092;
7110         }
7111
7112       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7113     }
7114
7115   if (GET_MODE_SIZE (mode) <= 4
7116       && ! (arm_arch4
7117             && (mode == HImode
7118                 || mode == HFmode
7119                 || (mode == QImode && outer == SIGN_EXTEND))))
7120     {
7121       if (code == MULT)
7122         {
7123           rtx xiop0 = XEXP (index, 0);
7124           rtx xiop1 = XEXP (index, 1);
7125
7126           return ((arm_address_register_rtx_p (xiop0, strict_p)
7127                    && power_of_two_operand (xiop1, SImode))
7128                   || (arm_address_register_rtx_p (xiop1, strict_p)
7129                       && power_of_two_operand (xiop0, SImode)));
7130         }
7131       else if (code == LSHIFTRT || code == ASHIFTRT
7132                || code == ASHIFT || code == ROTATERT)
7133         {
7134           rtx op = XEXP (index, 1);
7135
7136           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7137                   && CONST_INT_P (op)
7138                   && INTVAL (op) > 0
7139                   && INTVAL (op) <= 31);
7140         }
7141     }
7142
7143   /* For ARM v4 we may be doing a sign-extend operation during the
7144      load.  */
7145   if (arm_arch4)
7146     {
7147       if (mode == HImode
7148           || mode == HFmode
7149           || (outer == SIGN_EXTEND && mode == QImode))
7150         range = 256;
7151       else
7152         range = 4096;
7153     }
7154   else
7155     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7156
7157   return (code == CONST_INT
7158           && INTVAL (index) < range
7159           && INTVAL (index) > -range);
7160 }
7161
7162 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7163    index operand.  i.e. 1, 2, 4 or 8.  */
7164 static bool
7165 thumb2_index_mul_operand (rtx op)
7166 {
7167   HOST_WIDE_INT val;
7168
7169   if (!CONST_INT_P (op))
7170     return false;
7171
7172   val = INTVAL(op);
7173   return (val == 1 || val == 2 || val == 4 || val == 8);
7174 }
7175
7176 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
7177 static int
7178 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7179 {
7180   enum rtx_code code = GET_CODE (index);
7181
7182   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
7183   /* Standard coprocessor addressing modes.  */
7184   if (TARGET_HARD_FLOAT
7185       && TARGET_VFP
7186       && (mode == SFmode || mode == DFmode))
7187     return (code == CONST_INT && INTVAL (index) < 1024
7188             /* Thumb-2 allows only > -256 index range for it's core register
7189                load/stores. Since we allow SF/DF in core registers, we have
7190                to use the intersection between -256~4096 (core) and -1024~1024
7191                (coprocessor).  */
7192             && INTVAL (index) > -256
7193             && (INTVAL (index) & 3) == 0);
7194
7195   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7196     {
7197       /* For DImode assume values will usually live in core regs
7198          and only allow LDRD addressing modes.  */
7199       if (!TARGET_LDRD || mode != DImode)
7200         return (code == CONST_INT
7201                 && INTVAL (index) < 1024
7202                 && INTVAL (index) > -1024
7203                 && (INTVAL (index) & 3) == 0);
7204     }
7205
7206   /* For quad modes, we restrict the constant offset to be slightly less
7207      than what the instruction format permits.  We do this because for
7208      quad mode moves, we will actually decompose them into two separate
7209      double-mode reads or writes.  INDEX must therefore be a valid
7210      (double-mode) offset and so should INDEX+8.  */
7211   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7212     return (code == CONST_INT
7213             && INTVAL (index) < 1016
7214             && INTVAL (index) > -1024
7215             && (INTVAL (index) & 3) == 0);
7216
7217   /* We have no such constraint on double mode offsets, so we permit the
7218      full range of the instruction format.  */
7219   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7220     return (code == CONST_INT
7221             && INTVAL (index) < 1024
7222             && INTVAL (index) > -1024
7223             && (INTVAL (index) & 3) == 0);
7224
7225   if (arm_address_register_rtx_p (index, strict_p)
7226       && (GET_MODE_SIZE (mode) <= 4))
7227     return 1;
7228
7229   if (mode == DImode || mode == DFmode)
7230     {
7231       if (code == CONST_INT)
7232         {
7233           HOST_WIDE_INT val = INTVAL (index);
7234           /* ??? Can we assume ldrd for thumb2?  */
7235           /* Thumb-2 ldrd only has reg+const addressing modes.  */
7236           /* ldrd supports offsets of +-1020.
7237              However the ldr fallback does not.  */
7238           return val > -256 && val < 256 && (val & 3) == 0;
7239         }
7240       else
7241         return 0;
7242     }
7243
7244   if (code == MULT)
7245     {
7246       rtx xiop0 = XEXP (index, 0);
7247       rtx xiop1 = XEXP (index, 1);
7248
7249       return ((arm_address_register_rtx_p (xiop0, strict_p)
7250                && thumb2_index_mul_operand (xiop1))
7251               || (arm_address_register_rtx_p (xiop1, strict_p)
7252                   && thumb2_index_mul_operand (xiop0)));
7253     }
7254   else if (code == ASHIFT)
7255     {
7256       rtx op = XEXP (index, 1);
7257
7258       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7259               && CONST_INT_P (op)
7260               && INTVAL (op) > 0
7261               && INTVAL (op) <= 3);
7262     }
7263
7264   return (code == CONST_INT
7265           && INTVAL (index) < 4096
7266           && INTVAL (index) > -256);
7267 }
7268
7269 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
7270 static int
7271 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7272 {
7273   int regno;
7274
7275   if (!REG_P (x))
7276     return 0;
7277
7278   regno = REGNO (x);
7279
7280   if (strict_p)
7281     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7282
7283   return (regno <= LAST_LO_REGNUM
7284           || regno > LAST_VIRTUAL_REGISTER
7285           || regno == FRAME_POINTER_REGNUM
7286           || (GET_MODE_SIZE (mode) >= 4
7287               && (regno == STACK_POINTER_REGNUM
7288                   || regno >= FIRST_PSEUDO_REGISTER
7289                   || x == hard_frame_pointer_rtx
7290                   || x == arg_pointer_rtx)));
7291 }
7292
7293 /* Return nonzero if x is a legitimate index register.  This is the case
7294    for any base register that can access a QImode object.  */
7295 inline static int
7296 thumb1_index_register_rtx_p (rtx x, int strict_p)
7297 {
7298   return thumb1_base_register_rtx_p (x, QImode, strict_p);
7299 }
7300
7301 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7302
7303    The AP may be eliminated to either the SP or the FP, so we use the
7304    least common denominator, e.g. SImode, and offsets from 0 to 64.
7305
7306    ??? Verify whether the above is the right approach.
7307
7308    ??? Also, the FP may be eliminated to the SP, so perhaps that
7309    needs special handling also.
7310
7311    ??? Look at how the mips16 port solves this problem.  It probably uses
7312    better ways to solve some of these problems.
7313
7314    Although it is not incorrect, we don't accept QImode and HImode
7315    addresses based on the frame pointer or arg pointer until the
7316    reload pass starts.  This is so that eliminating such addresses
7317    into stack based ones won't produce impossible code.  */
7318 int
7319 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7320 {
7321   /* ??? Not clear if this is right.  Experiment.  */
7322   if (GET_MODE_SIZE (mode) < 4
7323       && !(reload_in_progress || reload_completed)
7324       && (reg_mentioned_p (frame_pointer_rtx, x)
7325           || reg_mentioned_p (arg_pointer_rtx, x)
7326           || reg_mentioned_p (virtual_incoming_args_rtx, x)
7327           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7328           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7329           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7330     return 0;
7331
7332   /* Accept any base register.  SP only in SImode or larger.  */
7333   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7334     return 1;
7335
7336   /* This is PC relative data before arm_reorg runs.  */
7337   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7338            && GET_CODE (x) == SYMBOL_REF
7339            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7340     return 1;
7341
7342   /* This is PC relative data after arm_reorg runs.  */
7343   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7344            && reload_completed
7345            && (GET_CODE (x) == LABEL_REF
7346                || (GET_CODE (x) == CONST
7347                    && GET_CODE (XEXP (x, 0)) == PLUS
7348                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7349                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7350     return 1;
7351
7352   /* Post-inc indexing only supported for SImode and larger.  */
7353   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7354            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7355     return 1;
7356
7357   else if (GET_CODE (x) == PLUS)
7358     {
7359       /* REG+REG address can be any two index registers.  */
7360       /* We disallow FRAME+REG addressing since we know that FRAME
7361          will be replaced with STACK, and SP relative addressing only
7362          permits SP+OFFSET.  */
7363       if (GET_MODE_SIZE (mode) <= 4
7364           && XEXP (x, 0) != frame_pointer_rtx
7365           && XEXP (x, 1) != frame_pointer_rtx
7366           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7367           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7368               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7369         return 1;
7370
7371       /* REG+const has 5-7 bit offset for non-SP registers.  */
7372       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7373                 || XEXP (x, 0) == arg_pointer_rtx)
7374                && CONST_INT_P (XEXP (x, 1))
7375                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7376         return 1;
7377
7378       /* REG+const has 10-bit offset for SP, but only SImode and
7379          larger is supported.  */
7380       /* ??? Should probably check for DI/DFmode overflow here
7381          just like GO_IF_LEGITIMATE_OFFSET does.  */
7382       else if (REG_P (XEXP (x, 0))
7383                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7384                && GET_MODE_SIZE (mode) >= 4
7385                && CONST_INT_P (XEXP (x, 1))
7386                && INTVAL (XEXP (x, 1)) >= 0
7387                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7388                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7389         return 1;
7390
7391       else if (REG_P (XEXP (x, 0))
7392                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7393                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7394                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7395                        && REGNO (XEXP (x, 0))
7396                           <= LAST_VIRTUAL_POINTER_REGISTER))
7397                && GET_MODE_SIZE (mode) >= 4
7398                && CONST_INT_P (XEXP (x, 1))
7399                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7400         return 1;
7401     }
7402
7403   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7404            && GET_MODE_SIZE (mode) == 4
7405            && GET_CODE (x) == SYMBOL_REF
7406            && CONSTANT_POOL_ADDRESS_P (x)
7407            && ! (flag_pic
7408                  && symbol_mentioned_p (get_pool_constant (x))
7409                  && ! pcrel_constant_p (get_pool_constant (x))))
7410     return 1;
7411
7412   return 0;
7413 }
7414
7415 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7416    instruction of mode MODE.  */
7417 int
7418 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7419 {
7420   switch (GET_MODE_SIZE (mode))
7421     {
7422     case 1:
7423       return val >= 0 && val < 32;
7424
7425     case 2:
7426       return val >= 0 && val < 64 && (val & 1) == 0;
7427
7428     default:
7429       return (val >= 0
7430               && (val + GET_MODE_SIZE (mode)) <= 128
7431               && (val & 3) == 0);
7432     }
7433 }
7434
7435 bool
7436 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7437 {
7438   if (TARGET_ARM)
7439     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7440   else if (TARGET_THUMB2)
7441     return thumb2_legitimate_address_p (mode, x, strict_p);
7442   else /* if (TARGET_THUMB1) */
7443     return thumb1_legitimate_address_p (mode, x, strict_p);
7444 }
7445
7446 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7447
7448    Given an rtx X being reloaded into a reg required to be
7449    in class CLASS, return the class of reg to actually use.
7450    In general this is just CLASS, but for the Thumb core registers and
7451    immediate constants we prefer a LO_REGS class or a subset.  */
7452
7453 static reg_class_t
7454 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7455 {
7456   if (TARGET_32BIT)
7457     return rclass;
7458   else
7459     {
7460       if (rclass == GENERAL_REGS)
7461         return LO_REGS;
7462       else
7463         return rclass;
7464     }
7465 }
7466
7467 /* Build the SYMBOL_REF for __tls_get_addr.  */
7468
7469 static GTY(()) rtx tls_get_addr_libfunc;
7470
7471 static rtx
7472 get_tls_get_addr (void)
7473 {
7474   if (!tls_get_addr_libfunc)
7475     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7476   return tls_get_addr_libfunc;
7477 }
7478
7479 rtx
7480 arm_load_tp (rtx target)
7481 {
7482   if (!target)
7483     target = gen_reg_rtx (SImode);
7484
7485   if (TARGET_HARD_TP)
7486     {
7487       /* Can return in any reg.  */
7488       emit_insn (gen_load_tp_hard (target));
7489     }
7490   else
7491     {
7492       /* Always returned in r0.  Immediately copy the result into a pseudo,
7493          otherwise other uses of r0 (e.g. setting up function arguments) may
7494          clobber the value.  */
7495
7496       rtx tmp;
7497
7498       emit_insn (gen_load_tp_soft ());
7499
7500       tmp = gen_rtx_REG (SImode, 0);
7501       emit_move_insn (target, tmp);
7502     }
7503   return target;
7504 }
7505
7506 static rtx
7507 load_tls_operand (rtx x, rtx reg)
7508 {
7509   rtx tmp;
7510
7511   if (reg == NULL_RTX)
7512     reg = gen_reg_rtx (SImode);
7513
7514   tmp = gen_rtx_CONST (SImode, x);
7515
7516   emit_move_insn (reg, tmp);
7517
7518   return reg;
7519 }
7520
7521 static rtx
7522 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7523 {
7524   rtx insns, label, labelno, sum;
7525
7526   gcc_assert (reloc != TLS_DESCSEQ);
7527   start_sequence ();
7528
7529   labelno = GEN_INT (pic_labelno++);
7530   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7531   label = gen_rtx_CONST (VOIDmode, label);
7532
7533   sum = gen_rtx_UNSPEC (Pmode,
7534                         gen_rtvec (4, x, GEN_INT (reloc), label,
7535                                    GEN_INT (TARGET_ARM ? 8 : 4)),
7536                         UNSPEC_TLS);
7537   reg = load_tls_operand (sum, reg);
7538
7539   if (TARGET_ARM)
7540     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7541   else
7542     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7543
7544   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7545                                      LCT_PURE, /* LCT_CONST?  */
7546                                      Pmode, 1, reg, Pmode);
7547
7548   insns = get_insns ();
7549   end_sequence ();
7550
7551   return insns;
7552 }
7553
7554 static rtx
7555 arm_tls_descseq_addr (rtx x, rtx reg)
7556 {
7557   rtx labelno = GEN_INT (pic_labelno++);
7558   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7559   rtx sum = gen_rtx_UNSPEC (Pmode,
7560                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7561                                        gen_rtx_CONST (VOIDmode, label),
7562                                        GEN_INT (!TARGET_ARM)),
7563                             UNSPEC_TLS);
7564   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7565
7566   emit_insn (gen_tlscall (x, labelno));
7567   if (!reg)
7568     reg = gen_reg_rtx (SImode);
7569   else
7570     gcc_assert (REGNO (reg) != 0);
7571
7572   emit_move_insn (reg, reg0);
7573
7574   return reg;
7575 }
7576
7577 rtx
7578 legitimize_tls_address (rtx x, rtx reg)
7579 {
7580   rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7581   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7582
7583   switch (model)
7584     {
7585     case TLS_MODEL_GLOBAL_DYNAMIC:
7586       if (TARGET_GNU2_TLS)
7587         {
7588           reg = arm_tls_descseq_addr (x, reg);
7589
7590           tp = arm_load_tp (NULL_RTX);
7591
7592           dest = gen_rtx_PLUS (Pmode, tp, reg);
7593         }
7594       else
7595         {
7596           /* Original scheme */
7597           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7598           dest = gen_reg_rtx (Pmode);
7599           emit_libcall_block (insns, dest, ret, x);
7600         }
7601       return dest;
7602
7603     case TLS_MODEL_LOCAL_DYNAMIC:
7604       if (TARGET_GNU2_TLS)
7605         {
7606           reg = arm_tls_descseq_addr (x, reg);
7607
7608           tp = arm_load_tp (NULL_RTX);
7609
7610           dest = gen_rtx_PLUS (Pmode, tp, reg);
7611         }
7612       else
7613         {
7614           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7615
7616           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7617              share the LDM result with other LD model accesses.  */
7618           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7619                                 UNSPEC_TLS);
7620           dest = gen_reg_rtx (Pmode);
7621           emit_libcall_block (insns, dest, ret, eqv);
7622
7623           /* Load the addend.  */
7624           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7625                                                      GEN_INT (TLS_LDO32)),
7626                                    UNSPEC_TLS);
7627           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7628           dest = gen_rtx_PLUS (Pmode, dest, addend);
7629         }
7630       return dest;
7631
7632     case TLS_MODEL_INITIAL_EXEC:
7633       labelno = GEN_INT (pic_labelno++);
7634       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7635       label = gen_rtx_CONST (VOIDmode, label);
7636       sum = gen_rtx_UNSPEC (Pmode,
7637                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7638                                        GEN_INT (TARGET_ARM ? 8 : 4)),
7639                             UNSPEC_TLS);
7640       reg = load_tls_operand (sum, reg);
7641
7642       if (TARGET_ARM)
7643         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7644       else if (TARGET_THUMB2)
7645         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7646       else
7647         {
7648           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7649           emit_move_insn (reg, gen_const_mem (SImode, reg));
7650         }
7651
7652       tp = arm_load_tp (NULL_RTX);
7653
7654       return gen_rtx_PLUS (Pmode, tp, reg);
7655
7656     case TLS_MODEL_LOCAL_EXEC:
7657       tp = arm_load_tp (NULL_RTX);
7658
7659       reg = gen_rtx_UNSPEC (Pmode,
7660                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7661                             UNSPEC_TLS);
7662       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7663
7664       return gen_rtx_PLUS (Pmode, tp, reg);
7665
7666     default:
7667       abort ();
7668     }
7669 }
7670
7671 /* Try machine-dependent ways of modifying an illegitimate address
7672    to be legitimate.  If we find one, return the new, valid address.  */
7673 rtx
7674 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7675 {
7676   if (arm_tls_referenced_p (x))
7677     {
7678       rtx addend = NULL;
7679
7680       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7681         {
7682           addend = XEXP (XEXP (x, 0), 1);
7683           x = XEXP (XEXP (x, 0), 0);
7684         }
7685
7686       if (GET_CODE (x) != SYMBOL_REF)
7687         return x;
7688
7689       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7690
7691       x = legitimize_tls_address (x, NULL_RTX);
7692
7693       if (addend)
7694         {
7695           x = gen_rtx_PLUS (SImode, x, addend);
7696           orig_x = x;
7697         }
7698       else
7699         return x;
7700     }
7701
7702   if (!TARGET_ARM)
7703     {
7704       /* TODO: legitimize_address for Thumb2.  */
7705       if (TARGET_THUMB2)
7706         return x;
7707       return thumb_legitimize_address (x, orig_x, mode);
7708     }
7709
7710   if (GET_CODE (x) == PLUS)
7711     {
7712       rtx xop0 = XEXP (x, 0);
7713       rtx xop1 = XEXP (x, 1);
7714
7715       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7716         xop0 = force_reg (SImode, xop0);
7717
7718       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7719           && !symbol_mentioned_p (xop1))
7720         xop1 = force_reg (SImode, xop1);
7721
7722       if (ARM_BASE_REGISTER_RTX_P (xop0)
7723           && CONST_INT_P (xop1))
7724         {
7725           HOST_WIDE_INT n, low_n;
7726           rtx base_reg, val;
7727           n = INTVAL (xop1);
7728
7729           /* VFP addressing modes actually allow greater offsets, but for
7730              now we just stick with the lowest common denominator.  */
7731           if (mode == DImode
7732               || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7733             {
7734               low_n = n & 0x0f;
7735               n &= ~0x0f;
7736               if (low_n > 4)
7737                 {
7738                   n += 16;
7739                   low_n -= 16;
7740                 }
7741             }
7742           else
7743             {
7744               low_n = ((mode) == TImode ? 0
7745                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7746               n -= low_n;
7747             }
7748
7749           base_reg = gen_reg_rtx (SImode);
7750           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7751           emit_move_insn (base_reg, val);
7752           x = plus_constant (Pmode, base_reg, low_n);
7753         }
7754       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7755         x = gen_rtx_PLUS (SImode, xop0, xop1);
7756     }
7757
7758   /* XXX We don't allow MINUS any more -- see comment in
7759      arm_legitimate_address_outer_p ().  */
7760   else if (GET_CODE (x) == MINUS)
7761     {
7762       rtx xop0 = XEXP (x, 0);
7763       rtx xop1 = XEXP (x, 1);
7764
7765       if (CONSTANT_P (xop0))
7766         xop0 = force_reg (SImode, xop0);
7767
7768       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7769         xop1 = force_reg (SImode, xop1);
7770
7771       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7772         x = gen_rtx_MINUS (SImode, xop0, xop1);
7773     }
7774
7775   /* Make sure to take full advantage of the pre-indexed addressing mode
7776      with absolute addresses which often allows for the base register to
7777      be factorized for multiple adjacent memory references, and it might
7778      even allows for the mini pool to be avoided entirely. */
7779   else if (CONST_INT_P (x) && optimize > 0)
7780     {
7781       unsigned int bits;
7782       HOST_WIDE_INT mask, base, index;
7783       rtx base_reg;
7784
7785       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7786          use a 8-bit index. So let's use a 12-bit index for SImode only and
7787          hope that arm_gen_constant will enable ldrb to use more bits. */
7788       bits = (mode == SImode) ? 12 : 8;
7789       mask = (1 << bits) - 1;
7790       base = INTVAL (x) & ~mask;
7791       index = INTVAL (x) & mask;
7792       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7793         {
7794           /* It'll most probably be more efficient to generate the base
7795              with more bits set and use a negative index instead. */
7796           base |= mask;
7797           index -= mask;
7798         }
7799       base_reg = force_reg (SImode, GEN_INT (base));
7800       x = plus_constant (Pmode, base_reg, index);
7801     }
7802
7803   if (flag_pic)
7804     {
7805       /* We need to find and carefully transform any SYMBOL and LABEL
7806          references; so go back to the original address expression.  */
7807       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7808
7809       if (new_x != orig_x)
7810         x = new_x;
7811     }
7812
7813   return x;
7814 }
7815
7816
7817 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7818    to be legitimate.  If we find one, return the new, valid address.  */
7819 rtx
7820 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7821 {
7822   if (GET_CODE (x) == PLUS
7823       && CONST_INT_P (XEXP (x, 1))
7824       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7825           || INTVAL (XEXP (x, 1)) < 0))
7826     {
7827       rtx xop0 = XEXP (x, 0);
7828       rtx xop1 = XEXP (x, 1);
7829       HOST_WIDE_INT offset = INTVAL (xop1);
7830
7831       /* Try and fold the offset into a biasing of the base register and
7832          then offsetting that.  Don't do this when optimizing for space
7833          since it can cause too many CSEs.  */
7834       if (optimize_size && offset >= 0
7835           && offset < 256 + 31 * GET_MODE_SIZE (mode))
7836         {
7837           HOST_WIDE_INT delta;
7838
7839           if (offset >= 256)
7840             delta = offset - (256 - GET_MODE_SIZE (mode));
7841           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7842             delta = 31 * GET_MODE_SIZE (mode);
7843           else
7844             delta = offset & (~31 * GET_MODE_SIZE (mode));
7845
7846           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7847                                 NULL_RTX);
7848           x = plus_constant (Pmode, xop0, delta);
7849         }
7850       else if (offset < 0 && offset > -256)
7851         /* Small negative offsets are best done with a subtract before the
7852            dereference, forcing these into a register normally takes two
7853            instructions.  */
7854         x = force_operand (x, NULL_RTX);
7855       else
7856         {
7857           /* For the remaining cases, force the constant into a register.  */
7858           xop1 = force_reg (SImode, xop1);
7859           x = gen_rtx_PLUS (SImode, xop0, xop1);
7860         }
7861     }
7862   else if (GET_CODE (x) == PLUS
7863            && s_register_operand (XEXP (x, 1), SImode)
7864            && !s_register_operand (XEXP (x, 0), SImode))
7865     {
7866       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7867
7868       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7869     }
7870
7871   if (flag_pic)
7872     {
7873       /* We need to find and carefully transform any SYMBOL and LABEL
7874          references; so go back to the original address expression.  */
7875       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7876
7877       if (new_x != orig_x)
7878         x = new_x;
7879     }
7880
7881   return x;
7882 }
7883
7884 bool
7885 arm_legitimize_reload_address (rtx *p,
7886                                machine_mode mode,
7887                                int opnum, int type,
7888                                int ind_levels ATTRIBUTE_UNUSED)
7889 {
7890   /* We must recognize output that we have already generated ourselves.  */
7891   if (GET_CODE (*p) == PLUS
7892       && GET_CODE (XEXP (*p, 0)) == PLUS
7893       && REG_P (XEXP (XEXP (*p, 0), 0))
7894       && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7895       && CONST_INT_P (XEXP (*p, 1)))
7896     {
7897       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7898                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7899                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
7900       return true;
7901     }
7902
7903   if (GET_CODE (*p) == PLUS
7904       && REG_P (XEXP (*p, 0))
7905       && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7906       /* If the base register is equivalent to a constant, let the generic
7907          code handle it.  Otherwise we will run into problems if a future
7908          reload pass decides to rematerialize the constant.  */
7909       && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7910       && CONST_INT_P (XEXP (*p, 1)))
7911     {
7912       HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7913       HOST_WIDE_INT low, high;
7914
7915       /* Detect coprocessor load/stores.  */
7916       bool coproc_p = ((TARGET_HARD_FLOAT
7917                         && TARGET_VFP
7918                         && (mode == SFmode || mode == DFmode))
7919                        || (TARGET_REALLY_IWMMXT
7920                            && VALID_IWMMXT_REG_MODE (mode))
7921                        || (TARGET_NEON
7922                            && (VALID_NEON_DREG_MODE (mode)
7923                                || VALID_NEON_QREG_MODE (mode))));
7924
7925       /* For some conditions, bail out when lower two bits are unaligned.  */
7926       if ((val & 0x3) != 0
7927           /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
7928           && (coproc_p
7929               /* For DI, and DF under soft-float: */
7930               || ((mode == DImode || mode == DFmode)
7931                   /* Without ldrd, we use stm/ldm, which does not
7932                      fair well with unaligned bits.  */
7933                   && (! TARGET_LDRD
7934                       /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
7935                       || TARGET_THUMB2))))
7936         return false;
7937
7938       /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7939          of which the (reg+high) gets turned into a reload add insn,
7940          we try to decompose the index into high/low values that can often
7941          also lead to better reload CSE.
7942          For example:
7943                  ldr r0, [r2, #4100]  // Offset too large
7944                  ldr r1, [r2, #4104]  // Offset too large
7945
7946          is best reloaded as:
7947                  add t1, r2, #4096
7948                  ldr r0, [t1, #4]
7949                  add t2, r2, #4096
7950                  ldr r1, [t2, #8]
7951
7952          which post-reload CSE can simplify in most cases to eliminate the
7953          second add instruction:
7954                  add t1, r2, #4096
7955                  ldr r0, [t1, #4]
7956                  ldr r1, [t1, #8]
7957
7958          The idea here is that we want to split out the bits of the constant
7959          as a mask, rather than as subtracting the maximum offset that the
7960          respective type of load/store used can handle.
7961
7962          When encountering negative offsets, we can still utilize it even if
7963          the overall offset is positive; sometimes this may lead to an immediate
7964          that can be constructed with fewer instructions.
7965          For example:
7966                  ldr r0, [r2, #0x3FFFFC]
7967
7968          This is best reloaded as:
7969                  add t1, r2, #0x400000
7970                  ldr r0, [t1, #-4]
7971
7972          The trick for spotting this for a load insn with N bits of offset
7973          (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7974          negative offset that is going to make bit N and all the bits below
7975          it become zero in the remainder part.
7976
7977          The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7978          to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7979          used in most cases of ARM load/store instructions.  */
7980
7981 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N)                                  \
7982       (((VAL) & ((1 << (N)) - 1))                                       \
7983        ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))   \
7984        : 0)
7985
7986       if (coproc_p)
7987         {
7988           low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7989
7990           /* NEON quad-word load/stores are made of two double-word accesses,
7991              so the valid index range is reduced by 8. Treat as 9-bit range if
7992              we go over it.  */
7993           if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7994             low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7995         }
7996       else if (GET_MODE_SIZE (mode) == 8)
7997         {
7998           if (TARGET_LDRD)
7999             low = (TARGET_THUMB2
8000                    ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
8001                    : SIGN_MAG_LOW_ADDR_BITS (val, 8));
8002           else
8003             /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
8004                to access doublewords. The supported load/store offsets are
8005                -8, -4, and 4, which we try to produce here.  */
8006             low = ((val & 0xf) ^ 0x8) - 0x8;
8007         }
8008       else if (GET_MODE_SIZE (mode) < 8)
8009         {
8010           /* NEON element load/stores do not have an offset.  */
8011           if (TARGET_NEON_FP16 && mode == HFmode)
8012             return false;
8013
8014           if (TARGET_THUMB2)
8015             {
8016               /* Thumb-2 has an asymmetrical index range of (-256,4096).
8017                  Try the wider 12-bit range first, and re-try if the result
8018                  is out of range.  */
8019               low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8020               if (low < -255)
8021                 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8022             }
8023           else
8024             {
8025               if (mode == HImode || mode == HFmode)
8026                 {
8027                   if (arm_arch4)
8028                     low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8029                   else
8030                     {
8031                       /* The storehi/movhi_bytes fallbacks can use only
8032                          [-4094,+4094] of the full ldrb/strb index range.  */
8033                       low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8034                       if (low == 4095 || low == -4095)
8035                         return false;
8036                     }
8037                 }
8038               else
8039                 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8040             }
8041         }
8042       else
8043         return false;
8044
8045       high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
8046                ^ (unsigned HOST_WIDE_INT) 0x80000000)
8047               - (unsigned HOST_WIDE_INT) 0x80000000);
8048       /* Check for overflow or zero */
8049       if (low == 0 || high == 0 || (high + low != val))
8050         return false;
8051
8052       /* Reload the high part into a base reg; leave the low part
8053          in the mem.
8054          Note that replacing this gen_rtx_PLUS with plus_constant is
8055          wrong in this case because we rely on the
8056          (plus (plus reg c1) c2) structure being preserved so that
8057          XEXP (*p, 0) in push_reload below uses the correct term.  */
8058       *p = gen_rtx_PLUS (GET_MODE (*p),
8059                          gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8060                                        GEN_INT (high)),
8061                          GEN_INT (low));
8062       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8063                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8064                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
8065       return true;
8066     }
8067
8068   return false;
8069 }
8070
8071 rtx
8072 thumb_legitimize_reload_address (rtx *x_p,
8073                                  machine_mode mode,
8074                                  int opnum, int type,
8075                                  int ind_levels ATTRIBUTE_UNUSED)
8076 {
8077   rtx x = *x_p;
8078
8079   if (GET_CODE (x) == PLUS
8080       && GET_MODE_SIZE (mode) < 4
8081       && REG_P (XEXP (x, 0))
8082       && XEXP (x, 0) == stack_pointer_rtx
8083       && CONST_INT_P (XEXP (x, 1))
8084       && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8085     {
8086       rtx orig_x = x;
8087
8088       x = copy_rtx (x);
8089       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8090                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8091       return x;
8092     }
8093
8094   /* If both registers are hi-regs, then it's better to reload the
8095      entire expression rather than each register individually.  That
8096      only requires one reload register rather than two.  */
8097   if (GET_CODE (x) == PLUS
8098       && REG_P (XEXP (x, 0))
8099       && REG_P (XEXP (x, 1))
8100       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8101       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8102     {
8103       rtx orig_x = x;
8104
8105       x = copy_rtx (x);
8106       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8107                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8108       return x;
8109     }
8110
8111   return NULL;
8112 }
8113
8114 /* Return TRUE if X contains any TLS symbol references.  */
8115
8116 bool
8117 arm_tls_referenced_p (rtx x)
8118 {
8119   if (! TARGET_HAVE_TLS)
8120     return false;
8121
8122   subrtx_iterator::array_type array;
8123   FOR_EACH_SUBRTX (iter, array, x, ALL)
8124     {
8125       const_rtx x = *iter;
8126       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8127         return true;
8128
8129       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8130          TLS offsets, not real symbol references.  */
8131       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8132         iter.skip_subrtxes ();
8133     }
8134   return false;
8135 }
8136
8137 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8138
8139    On the ARM, allow any integer (invalid ones are removed later by insn
8140    patterns), nice doubles and symbol_refs which refer to the function's
8141    constant pool XXX.
8142
8143    When generating pic allow anything.  */
8144
8145 static bool
8146 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8147 {
8148   /* At present, we have no support for Neon structure constants, so forbid
8149      them here.  It might be possible to handle simple cases like 0 and -1
8150      in future.  */
8151   if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8152     return false;
8153
8154   return flag_pic || !label_mentioned_p (x);
8155 }
8156
8157 static bool
8158 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8159 {
8160   return (CONST_INT_P (x)
8161           || CONST_DOUBLE_P (x)
8162           || CONSTANT_ADDRESS_P (x)
8163           || flag_pic);
8164 }
8165
8166 static bool
8167 arm_legitimate_constant_p (machine_mode mode, rtx x)
8168 {
8169   return (!arm_cannot_force_const_mem (mode, x)
8170           && (TARGET_32BIT
8171               ? arm_legitimate_constant_p_1 (mode, x)
8172               : thumb_legitimate_constant_p (mode, x)));
8173 }
8174
8175 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8176
8177 static bool
8178 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8179 {
8180   rtx base, offset;
8181
8182   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8183     {
8184       split_const (x, &base, &offset);
8185       if (GET_CODE (base) == SYMBOL_REF
8186           && !offset_within_block_p (base, INTVAL (offset)))
8187         return true;
8188     }
8189   return arm_tls_referenced_p (x);
8190 }
8191 \f
8192 #define REG_OR_SUBREG_REG(X)                                            \
8193   (REG_P (X)                                                    \
8194    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8195
8196 #define REG_OR_SUBREG_RTX(X)                    \
8197    (REG_P (X) ? (X) : SUBREG_REG (X))
8198
8199 static inline int
8200 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8201 {
8202   machine_mode mode = GET_MODE (x);
8203   int total, words;
8204
8205   switch (code)
8206     {
8207     case ASHIFT:
8208     case ASHIFTRT:
8209     case LSHIFTRT:
8210     case ROTATERT:
8211       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8212
8213     case PLUS:
8214     case MINUS:
8215     case COMPARE:
8216     case NEG:
8217     case NOT:
8218       return COSTS_N_INSNS (1);
8219
8220     case MULT:
8221       if (CONST_INT_P (XEXP (x, 1)))
8222         {
8223           int cycles = 0;
8224           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8225
8226           while (i)
8227             {
8228               i >>= 2;
8229               cycles++;
8230             }
8231           return COSTS_N_INSNS (2) + cycles;
8232         }
8233       return COSTS_N_INSNS (1) + 16;
8234
8235     case SET:
8236       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8237          the mode.  */
8238       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8239       return (COSTS_N_INSNS (words)
8240               + 4 * ((MEM_P (SET_SRC (x)))
8241                      + MEM_P (SET_DEST (x))));
8242
8243     case CONST_INT:
8244       if (outer == SET)
8245         {
8246           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8247             return 0;
8248           if (thumb_shiftable_const (INTVAL (x)))
8249             return COSTS_N_INSNS (2);
8250           return COSTS_N_INSNS (3);
8251         }
8252       else if ((outer == PLUS || outer == COMPARE)
8253                && INTVAL (x) < 256 && INTVAL (x) > -256)
8254         return 0;
8255       else if ((outer == IOR || outer == XOR || outer == AND)
8256                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8257         return COSTS_N_INSNS (1);
8258       else if (outer == AND)
8259         {
8260           int i;
8261           /* This duplicates the tests in the andsi3 expander.  */
8262           for (i = 9; i <= 31; i++)
8263             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8264                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8265               return COSTS_N_INSNS (2);
8266         }
8267       else if (outer == ASHIFT || outer == ASHIFTRT
8268                || outer == LSHIFTRT)
8269         return 0;
8270       return COSTS_N_INSNS (2);
8271
8272     case CONST:
8273     case CONST_DOUBLE:
8274     case LABEL_REF:
8275     case SYMBOL_REF:
8276       return COSTS_N_INSNS (3);
8277
8278     case UDIV:
8279     case UMOD:
8280     case DIV:
8281     case MOD:
8282       return 100;
8283
8284     case TRUNCATE:
8285       return 99;
8286
8287     case AND:
8288     case XOR:
8289     case IOR:
8290       /* XXX guess.  */
8291       return 8;
8292
8293     case MEM:
8294       /* XXX another guess.  */
8295       /* Memory costs quite a lot for the first word, but subsequent words
8296          load at the equivalent of a single insn each.  */
8297       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8298               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8299                  ? 4 : 0));
8300
8301     case IF_THEN_ELSE:
8302       /* XXX a guess.  */
8303       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8304         return 14;
8305       return 2;
8306
8307     case SIGN_EXTEND:
8308     case ZERO_EXTEND:
8309       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8310       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8311
8312       if (mode == SImode)
8313         return total;
8314
8315       if (arm_arch6)
8316         return total + COSTS_N_INSNS (1);
8317
8318       /* Assume a two-shift sequence.  Increase the cost slightly so
8319          we prefer actual shifts over an extend operation.  */
8320       return total + 1 + COSTS_N_INSNS (2);
8321
8322     default:
8323       return 99;
8324     }
8325 }
8326
8327 static inline bool
8328 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8329 {
8330   machine_mode mode = GET_MODE (x);
8331   enum rtx_code subcode;
8332   rtx operand;
8333   enum rtx_code code = GET_CODE (x);
8334   *total = 0;
8335
8336   switch (code)
8337     {
8338     case MEM:
8339       /* Memory costs quite a lot for the first word, but subsequent words
8340          load at the equivalent of a single insn each.  */
8341       *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8342       return true;
8343
8344     case DIV:
8345     case MOD:
8346     case UDIV:
8347     case UMOD:
8348       if (TARGET_HARD_FLOAT && mode == SFmode)
8349         *total = COSTS_N_INSNS (2);
8350       else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8351         *total = COSTS_N_INSNS (4);
8352       else
8353         *total = COSTS_N_INSNS (20);
8354       return false;
8355
8356     case ROTATE:
8357       if (REG_P (XEXP (x, 1)))
8358         *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8359       else if (!CONST_INT_P (XEXP (x, 1)))
8360         *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8361
8362       /* Fall through */
8363     case ROTATERT:
8364       if (mode != SImode)
8365         {
8366           *total += COSTS_N_INSNS (4);
8367           return true;
8368         }
8369
8370       /* Fall through */
8371     case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8372       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8373       if (mode == DImode)
8374         {
8375           *total += COSTS_N_INSNS (3);
8376           return true;
8377         }
8378
8379       *total += COSTS_N_INSNS (1);
8380       /* Increase the cost of complex shifts because they aren't any faster,
8381          and reduce dual issue opportunities.  */
8382       if (arm_tune_cortex_a9
8383           && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8384         ++*total;
8385
8386       return true;
8387
8388     case MINUS:
8389       if (mode == DImode)
8390         {
8391           *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8392           if (CONST_INT_P (XEXP (x, 0))
8393               && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8394             {
8395               *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8396               return true;
8397             }
8398
8399           if (CONST_INT_P (XEXP (x, 1))
8400               && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8401             {
8402               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8403               return true;
8404             }
8405
8406           return false;
8407         }
8408
8409       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8410         {
8411           if (TARGET_HARD_FLOAT
8412               && (mode == SFmode
8413                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8414             {
8415               *total = COSTS_N_INSNS (1);
8416               if (CONST_DOUBLE_P (XEXP (x, 0))
8417                   && arm_const_double_rtx (XEXP (x, 0)))
8418                 {
8419                   *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8420                   return true;
8421                 }
8422
8423               if (CONST_DOUBLE_P (XEXP (x, 1))
8424                   && arm_const_double_rtx (XEXP (x, 1)))
8425                 {
8426                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8427                   return true;
8428                 }
8429
8430               return false;
8431             }
8432           *total = COSTS_N_INSNS (20);
8433           return false;
8434         }
8435
8436       *total = COSTS_N_INSNS (1);
8437       if (CONST_INT_P (XEXP (x, 0))
8438           && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8439         {
8440           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8441           return true;
8442         }
8443
8444       subcode = GET_CODE (XEXP (x, 1));
8445       if (subcode == ASHIFT || subcode == ASHIFTRT
8446           || subcode == LSHIFTRT
8447           || subcode == ROTATE || subcode == ROTATERT)
8448         {
8449           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8450           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8451           return true;
8452         }
8453
8454       /* A shift as a part of RSB costs no more than RSB itself.  */
8455       if (GET_CODE (XEXP (x, 0)) == MULT
8456           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8457         {
8458           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8459           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8460           return true;
8461         }
8462
8463       if (subcode == MULT
8464           && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8465         {
8466           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8467           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8468           return true;
8469         }
8470
8471       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8472           || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8473         {
8474           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8475           if (REG_P (XEXP (XEXP (x, 1), 0))
8476               && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8477             *total += COSTS_N_INSNS (1);
8478
8479           return true;
8480         }
8481
8482       /* Fall through */
8483
8484     case PLUS:
8485       if (code == PLUS && arm_arch6 && mode == SImode
8486           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8487               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8488         {
8489           *total = COSTS_N_INSNS (1);
8490           *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8491                               0, speed);
8492           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8493           return true;
8494         }
8495
8496       /* MLA: All arguments must be registers.  We filter out
8497          multiplication by a power of two, so that we fall down into
8498          the code below.  */
8499       if (GET_CODE (XEXP (x, 0)) == MULT
8500           && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8501         {
8502           /* The cost comes from the cost of the multiply.  */
8503           return false;
8504         }
8505
8506       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8507         {
8508           if (TARGET_HARD_FLOAT
8509               && (mode == SFmode
8510                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8511             {
8512               *total = COSTS_N_INSNS (1);
8513               if (CONST_DOUBLE_P (XEXP (x, 1))
8514                   && arm_const_double_rtx (XEXP (x, 1)))
8515                 {
8516                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8517                   return true;
8518                 }
8519
8520               return false;
8521             }
8522
8523           *total = COSTS_N_INSNS (20);
8524           return false;
8525         }
8526
8527       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8528           || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8529         {
8530           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8531           if (REG_P (XEXP (XEXP (x, 0), 0))
8532               && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8533             *total += COSTS_N_INSNS (1);
8534           return true;
8535         }
8536
8537       /* Fall through */
8538
8539     case AND: case XOR: case IOR:
8540
8541       /* Normally the frame registers will be spilt into reg+const during
8542          reload, so it is a bad idea to combine them with other instructions,
8543          since then they might not be moved outside of loops.  As a compromise
8544          we allow integration with ops that have a constant as their second
8545          operand.  */
8546       if (REG_OR_SUBREG_REG (XEXP (x, 0))
8547           && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8548           && !CONST_INT_P (XEXP (x, 1)))
8549         *total = COSTS_N_INSNS (1);
8550
8551       if (mode == DImode)
8552         {
8553           *total += COSTS_N_INSNS (2);
8554           if (CONST_INT_P (XEXP (x, 1))
8555               && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8556             {
8557               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8558               return true;
8559             }
8560
8561           return false;
8562         }
8563
8564       *total += COSTS_N_INSNS (1);
8565       if (CONST_INT_P (XEXP (x, 1))
8566           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8567         {
8568           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8569           return true;
8570         }
8571       subcode = GET_CODE (XEXP (x, 0));
8572       if (subcode == ASHIFT || subcode == ASHIFTRT
8573           || subcode == LSHIFTRT
8574           || subcode == ROTATE || subcode == ROTATERT)
8575         {
8576           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8577           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8578           return true;
8579         }
8580
8581       if (subcode == MULT
8582           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8583         {
8584           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8585           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8586           return true;
8587         }
8588
8589       if (subcode == UMIN || subcode == UMAX
8590           || subcode == SMIN || subcode == SMAX)
8591         {
8592           *total = COSTS_N_INSNS (3);
8593           return true;
8594         }
8595
8596       return false;
8597
8598     case MULT:
8599       /* This should have been handled by the CPU specific routines.  */
8600       gcc_unreachable ();
8601
8602     case TRUNCATE:
8603       if (arm_arch3m && mode == SImode
8604           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8605           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8606           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8607               == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8608           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8609               || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8610         {
8611           *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8612           return true;
8613         }
8614       *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8615       return false;
8616
8617     case NEG:
8618       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8619         {
8620           if (TARGET_HARD_FLOAT
8621               && (mode == SFmode
8622                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8623             {
8624               *total = COSTS_N_INSNS (1);
8625               return false;
8626             }
8627           *total = COSTS_N_INSNS (2);
8628           return false;
8629         }
8630
8631       /* Fall through */
8632     case NOT:
8633       *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8634       if (mode == SImode && code == NOT)
8635         {
8636           subcode = GET_CODE (XEXP (x, 0));
8637           if (subcode == ASHIFT || subcode == ASHIFTRT
8638               || subcode == LSHIFTRT
8639               || subcode == ROTATE || subcode == ROTATERT
8640               || (subcode == MULT
8641                   && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8642             {
8643               *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8644               /* Register shifts cost an extra cycle.  */
8645               if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8646                 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8647                                                         subcode, 1, speed);
8648               return true;
8649             }
8650         }
8651
8652       return false;
8653
8654     case IF_THEN_ELSE:
8655       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8656         {
8657           *total = COSTS_N_INSNS (4);
8658           return true;
8659         }
8660
8661       operand = XEXP (x, 0);
8662
8663       if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8664              || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8665             && REG_P (XEXP (operand, 0))
8666             && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8667         *total += COSTS_N_INSNS (1);
8668       *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8669                  + rtx_cost (XEXP (x, 2), code, 2, speed));
8670       return true;
8671
8672     case NE:
8673       if (mode == SImode && XEXP (x, 1) == const0_rtx)
8674         {
8675           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8676           return true;
8677         }
8678       goto scc_insn;
8679
8680     case GE:
8681       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8682           && mode == SImode && XEXP (x, 1) == const0_rtx)
8683         {
8684           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8685           return true;
8686         }
8687       goto scc_insn;
8688
8689     case LT:
8690       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8691           && mode == SImode && XEXP (x, 1) == const0_rtx)
8692         {
8693           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8694           return true;
8695         }
8696       goto scc_insn;
8697
8698     case EQ:
8699     case GT:
8700     case LE:
8701     case GEU:
8702     case LTU:
8703     case GTU:
8704     case LEU:
8705     case UNORDERED:
8706     case ORDERED:
8707     case UNEQ:
8708     case UNGE:
8709     case UNLT:
8710     case UNGT:
8711     case UNLE:
8712     scc_insn:
8713       /* SCC insns.  In the case where the comparison has already been
8714          performed, then they cost 2 instructions.  Otherwise they need
8715          an additional comparison before them.  */
8716       *total = COSTS_N_INSNS (2);
8717       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8718         {
8719           return true;
8720         }
8721
8722       /* Fall through */
8723     case COMPARE:
8724       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8725         {
8726           *total = 0;
8727           return true;
8728         }
8729
8730       *total += COSTS_N_INSNS (1);
8731       if (CONST_INT_P (XEXP (x, 1))
8732           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8733         {
8734           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8735           return true;
8736         }
8737
8738       subcode = GET_CODE (XEXP (x, 0));
8739       if (subcode == ASHIFT || subcode == ASHIFTRT
8740           || subcode == LSHIFTRT
8741           || subcode == ROTATE || subcode == ROTATERT)
8742         {
8743           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8744           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8745           return true;
8746         }
8747
8748       if (subcode == MULT
8749           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8750         {
8751           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8752           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8753           return true;
8754         }
8755
8756       return false;
8757
8758     case UMIN:
8759     case UMAX:
8760     case SMIN:
8761     case SMAX:
8762       *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8763       if (!CONST_INT_P (XEXP (x, 1))
8764           || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8765         *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8766       return true;
8767
8768     case ABS:
8769       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8770         {
8771           if (TARGET_HARD_FLOAT
8772               && (mode == SFmode
8773                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8774             {
8775               *total = COSTS_N_INSNS (1);
8776               return false;
8777             }
8778           *total = COSTS_N_INSNS (20);
8779           return false;
8780         }
8781       *total = COSTS_N_INSNS (1);
8782       if (mode == DImode)
8783         *total += COSTS_N_INSNS (3);
8784       return false;
8785
8786     case SIGN_EXTEND:
8787     case ZERO_EXTEND:
8788       *total = 0;
8789       if (GET_MODE_CLASS (mode) == MODE_INT)
8790         {
8791           rtx op = XEXP (x, 0);
8792           machine_mode opmode = GET_MODE (op);
8793
8794           if (mode == DImode)
8795             *total += COSTS_N_INSNS (1);
8796
8797           if (opmode != SImode)
8798             {
8799               if (MEM_P (op))
8800                 {
8801                   /* If !arm_arch4, we use one of the extendhisi2_mem
8802                      or movhi_bytes patterns for HImode.  For a QImode
8803                      sign extension, we first zero-extend from memory
8804                      and then perform a shift sequence.  */
8805                   if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8806                     *total += COSTS_N_INSNS (2);
8807                 }
8808               else if (arm_arch6)
8809                 *total += COSTS_N_INSNS (1);
8810
8811               /* We don't have the necessary insn, so we need to perform some
8812                  other operation.  */
8813               else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8814                 /* An and with constant 255.  */
8815                 *total += COSTS_N_INSNS (1);
8816               else
8817                 /* A shift sequence.  Increase costs slightly to avoid
8818                    combining two shifts into an extend operation.  */
8819                 *total += COSTS_N_INSNS (2) + 1;
8820             }
8821
8822           return false;
8823         }
8824
8825       switch (GET_MODE (XEXP (x, 0)))
8826         {
8827         case V8QImode:
8828         case V4HImode:
8829         case V2SImode:
8830         case V4QImode:
8831         case V2HImode:
8832           *total = COSTS_N_INSNS (1);
8833           return false;
8834
8835         default:
8836           gcc_unreachable ();
8837         }
8838       gcc_unreachable ();
8839
8840     case ZERO_EXTRACT:
8841     case SIGN_EXTRACT:
8842       *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8843       return true;
8844
8845     case CONST_INT:
8846       if (const_ok_for_arm (INTVAL (x))
8847           || const_ok_for_arm (~INTVAL (x)))
8848         *total = COSTS_N_INSNS (1);
8849       else
8850         *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8851                                                   INTVAL (x), NULL_RTX,
8852                                                   NULL_RTX, 0, 0));
8853       return true;
8854
8855     case CONST:
8856     case LABEL_REF:
8857     case SYMBOL_REF:
8858       *total = COSTS_N_INSNS (3);
8859       return true;
8860
8861     case HIGH:
8862       *total = COSTS_N_INSNS (1);
8863       return true;
8864
8865     case LO_SUM:
8866       *total = COSTS_N_INSNS (1);
8867       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8868       return true;
8869
8870     case CONST_DOUBLE:
8871       if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8872           && (mode == SFmode || !TARGET_VFP_SINGLE))
8873         *total = COSTS_N_INSNS (1);
8874       else
8875         *total = COSTS_N_INSNS (4);
8876       return true;
8877
8878     case SET:
8879       /* The vec_extract patterns accept memory operands that require an
8880          address reload.  Account for the cost of that reload to give the
8881          auto-inc-dec pass an incentive to try to replace them.  */
8882       if (TARGET_NEON && MEM_P (SET_DEST (x))
8883           && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8884         {
8885           *total = rtx_cost (SET_DEST (x), code, 0, speed);
8886           if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8887             *total += COSTS_N_INSNS (1);
8888           return true;
8889         }
8890       /* Likewise for the vec_set patterns.  */
8891       if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8892           && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8893           && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8894         {
8895           rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8896           *total = rtx_cost (mem, code, 0, speed);
8897           if (!neon_vector_mem_operand (mem, 2, true))
8898             *total += COSTS_N_INSNS (1);
8899           return true;
8900         }
8901       return false;
8902
8903     case UNSPEC:
8904       /* We cost this as high as our memory costs to allow this to
8905          be hoisted from loops.  */
8906       if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8907         {
8908           *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8909         }
8910       return true;
8911
8912     case CONST_VECTOR:
8913       if (TARGET_NEON
8914           && TARGET_HARD_FLOAT
8915           && outer == SET
8916           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8917           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8918         *total = COSTS_N_INSNS (1);
8919       else
8920         *total = COSTS_N_INSNS (4);
8921       return true;
8922
8923     default:
8924       *total = COSTS_N_INSNS (4);
8925       return false;
8926     }
8927 }
8928
8929 /* Estimates the size cost of thumb1 instructions.
8930    For now most of the code is copied from thumb1_rtx_costs. We need more
8931    fine grain tuning when we have more related test cases.  */
8932 static inline int
8933 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8934 {
8935   machine_mode mode = GET_MODE (x);
8936   int words;
8937
8938   switch (code)
8939     {
8940     case ASHIFT:
8941     case ASHIFTRT:
8942     case LSHIFTRT:
8943     case ROTATERT:
8944       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8945
8946     case PLUS:
8947     case MINUS:
8948       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8949          defined by RTL expansion, especially for the expansion of
8950          multiplication.  */
8951       if ((GET_CODE (XEXP (x, 0)) == MULT
8952            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8953           || (GET_CODE (XEXP (x, 1)) == MULT
8954               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8955         return COSTS_N_INSNS (2);
8956       /* On purpose fall through for normal RTX.  */
8957     case COMPARE:
8958     case NEG:
8959     case NOT:
8960       return COSTS_N_INSNS (1);
8961
8962     case MULT:
8963       if (CONST_INT_P (XEXP (x, 1)))
8964         {
8965           /* Thumb1 mul instruction can't operate on const. We must Load it
8966              into a register first.  */
8967           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8968           /* For the targets which have a very small and high-latency multiply
8969              unit, we prefer to synthesize the mult with up to 5 instructions,
8970              giving a good balance between size and performance.  */
8971           if (arm_arch6m && arm_m_profile_small_mul)
8972             return COSTS_N_INSNS (5);
8973           else
8974             return COSTS_N_INSNS (1) + const_size;
8975         }
8976       return COSTS_N_INSNS (1);
8977
8978     case SET:
8979       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8980          the mode.  */
8981       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8982       return COSTS_N_INSNS (words)
8983              + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8984                                     || satisfies_constraint_K (SET_SRC (x))
8985                                        /* thumb1_movdi_insn.  */
8986                                     || ((words > 1) && MEM_P (SET_SRC (x))));
8987
8988     case CONST_INT:
8989       if (outer == SET)
8990         {
8991           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8992             return COSTS_N_INSNS (1);
8993           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
8994           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8995             return COSTS_N_INSNS (2);
8996           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
8997           if (thumb_shiftable_const (INTVAL (x)))
8998             return COSTS_N_INSNS (2);
8999           return COSTS_N_INSNS (3);
9000         }
9001       else if ((outer == PLUS || outer == COMPARE)
9002                && INTVAL (x) < 256 && INTVAL (x) > -256)
9003         return 0;
9004       else if ((outer == IOR || outer == XOR || outer == AND)
9005                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9006         return COSTS_N_INSNS (1);
9007       else if (outer == AND)
9008         {
9009           int i;
9010           /* This duplicates the tests in the andsi3 expander.  */
9011           for (i = 9; i <= 31; i++)
9012             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
9013                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
9014               return COSTS_N_INSNS (2);
9015         }
9016       else if (outer == ASHIFT || outer == ASHIFTRT
9017                || outer == LSHIFTRT)
9018         return 0;
9019       return COSTS_N_INSNS (2);
9020
9021     case CONST:
9022     case CONST_DOUBLE:
9023     case LABEL_REF:
9024     case SYMBOL_REF:
9025       return COSTS_N_INSNS (3);
9026
9027     case UDIV:
9028     case UMOD:
9029     case DIV:
9030     case MOD:
9031       return 100;
9032
9033     case TRUNCATE:
9034       return 99;
9035
9036     case AND:
9037     case XOR:
9038     case IOR:
9039       return COSTS_N_INSNS (1);
9040
9041     case MEM:
9042       return (COSTS_N_INSNS (1)
9043               + COSTS_N_INSNS (1)
9044                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9045               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9046                  ? COSTS_N_INSNS (1) : 0));
9047
9048     case IF_THEN_ELSE:
9049       /* XXX a guess.  */
9050       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9051         return 14;
9052       return 2;
9053
9054     case ZERO_EXTEND:
9055       /* XXX still guessing.  */
9056       switch (GET_MODE (XEXP (x, 0)))
9057         {
9058           case QImode:
9059             return (1 + (mode == DImode ? 4 : 0)
9060                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9061
9062           case HImode:
9063             return (4 + (mode == DImode ? 4 : 0)
9064                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9065
9066           case SImode:
9067             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9068
9069           default:
9070             return 99;
9071         }
9072
9073     default:
9074       return 99;
9075     }
9076 }
9077
9078 /* RTX costs when optimizing for size.  */
9079 static bool
9080 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9081                     int *total)
9082 {
9083   machine_mode mode = GET_MODE (x);
9084   if (TARGET_THUMB1)
9085     {
9086       *total = thumb1_size_rtx_costs (x, code, outer_code);
9087       return true;
9088     }
9089
9090   /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
9091   switch (code)
9092     {
9093     case MEM:
9094       /* A memory access costs 1 insn if the mode is small, or the address is
9095          a single register, otherwise it costs one insn per word.  */
9096       if (REG_P (XEXP (x, 0)))
9097         *total = COSTS_N_INSNS (1);
9098       else if (flag_pic
9099                && GET_CODE (XEXP (x, 0)) == PLUS
9100                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9101         /* This will be split into two instructions.
9102            See arm.md:calculate_pic_address.  */
9103         *total = COSTS_N_INSNS (2);
9104       else
9105         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9106       return true;
9107
9108     case DIV:
9109     case MOD:
9110     case UDIV:
9111     case UMOD:
9112       /* Needs a libcall, so it costs about this.  */
9113       *total = COSTS_N_INSNS (2);
9114       return false;
9115
9116     case ROTATE:
9117       if (mode == SImode && REG_P (XEXP (x, 1)))
9118         {
9119           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9120           return true;
9121         }
9122       /* Fall through */
9123     case ROTATERT:
9124     case ASHIFT:
9125     case LSHIFTRT:
9126     case ASHIFTRT:
9127       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9128         {
9129           *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9130           return true;
9131         }
9132       else if (mode == SImode)
9133         {
9134           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9135           /* Slightly disparage register shifts, but not by much.  */
9136           if (!CONST_INT_P (XEXP (x, 1)))
9137             *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9138           return true;
9139         }
9140
9141       /* Needs a libcall.  */
9142       *total = COSTS_N_INSNS (2);
9143       return false;
9144
9145     case MINUS:
9146       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9147           && (mode == SFmode || !TARGET_VFP_SINGLE))
9148         {
9149           *total = COSTS_N_INSNS (1);
9150           return false;
9151         }
9152
9153       if (mode == SImode)
9154         {
9155           enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9156           enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9157
9158           if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9159               || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9160               || subcode1 == ROTATE || subcode1 == ROTATERT
9161               || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9162               || subcode1 == ASHIFTRT)
9163             {
9164               /* It's just the cost of the two operands.  */
9165               *total = 0;
9166               return false;
9167             }
9168
9169           *total = COSTS_N_INSNS (1);
9170           return false;
9171         }
9172
9173       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9174       return false;
9175
9176     case PLUS:
9177       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9178           && (mode == SFmode || !TARGET_VFP_SINGLE))
9179         {
9180           *total = COSTS_N_INSNS (1);
9181           return false;
9182         }
9183
9184       /* A shift as a part of ADD costs nothing.  */
9185       if (GET_CODE (XEXP (x, 0)) == MULT
9186           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9187         {
9188           *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9189           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9190           *total += rtx_cost (XEXP (x, 1), code, 1, false);
9191           return true;
9192         }
9193
9194       /* Fall through */
9195     case AND: case XOR: case IOR:
9196       if (mode == SImode)
9197         {
9198           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9199
9200           if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9201               || subcode == LSHIFTRT || subcode == ASHIFTRT
9202               || (code == AND && subcode == NOT))
9203             {
9204               /* It's just the cost of the two operands.  */
9205               *total = 0;
9206               return false;
9207             }
9208         }
9209
9210       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9211       return false;
9212
9213     case MULT:
9214       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9215       return false;
9216
9217     case NEG:
9218       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9219           && (mode == SFmode || !TARGET_VFP_SINGLE))
9220         {
9221           *total = COSTS_N_INSNS (1);
9222           return false;
9223         }
9224
9225       /* Fall through */
9226     case NOT:
9227       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9228
9229       return false;
9230
9231     case IF_THEN_ELSE:
9232       *total = 0;
9233       return false;
9234
9235     case COMPARE:
9236       if (cc_register (XEXP (x, 0), VOIDmode))
9237         * total = 0;
9238       else
9239         *total = COSTS_N_INSNS (1);
9240       return false;
9241
9242     case ABS:
9243       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9244           && (mode == SFmode || !TARGET_VFP_SINGLE))
9245         *total = COSTS_N_INSNS (1);
9246       else
9247         *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9248       return false;
9249
9250     case SIGN_EXTEND:
9251     case ZERO_EXTEND:
9252       return arm_rtx_costs_1 (x, outer_code, total, 0);
9253
9254     case CONST_INT:
9255       if (const_ok_for_arm (INTVAL (x)))
9256         /* A multiplication by a constant requires another instruction
9257            to load the constant to a register.  */
9258         *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9259                                 ? 1 : 0);
9260       else if (const_ok_for_arm (~INTVAL (x)))
9261         *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9262       else if (const_ok_for_arm (-INTVAL (x)))
9263         {
9264           if (outer_code == COMPARE || outer_code == PLUS
9265               || outer_code == MINUS)
9266             *total = 0;
9267           else
9268             *total = COSTS_N_INSNS (1);
9269         }
9270       else
9271         *total = COSTS_N_INSNS (2);
9272       return true;
9273
9274     case CONST:
9275     case LABEL_REF:
9276     case SYMBOL_REF:
9277       *total = COSTS_N_INSNS (2);
9278       return true;
9279
9280     case CONST_DOUBLE:
9281       *total = COSTS_N_INSNS (4);
9282       return true;
9283
9284     case CONST_VECTOR:
9285       if (TARGET_NEON
9286           && TARGET_HARD_FLOAT
9287           && outer_code == SET
9288           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9289           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9290         *total = COSTS_N_INSNS (1);
9291       else
9292         *total = COSTS_N_INSNS (4);
9293       return true;
9294
9295     case HIGH:
9296     case LO_SUM:
9297       /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9298          cost of these slightly.  */
9299       *total = COSTS_N_INSNS (1) + 1;
9300       return true;
9301
9302     case SET:
9303       return false;
9304
9305     default:
9306       if (mode != VOIDmode)
9307         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9308       else
9309         *total = COSTS_N_INSNS (4); /* How knows?  */
9310       return false;
9311     }
9312 }
9313
9314 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9315    operand, then return the operand that is being shifted.  If the shift
9316    is not by a constant, then set SHIFT_REG to point to the operand.
9317    Return NULL if OP is not a shifter operand.  */
9318 static rtx
9319 shifter_op_p (rtx op, rtx *shift_reg)
9320 {
9321   enum rtx_code code = GET_CODE (op);
9322
9323   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9324       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9325     return XEXP (op, 0);
9326   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9327     return XEXP (op, 0);
9328   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9329            || code == ASHIFTRT)
9330     {
9331       if (!CONST_INT_P (XEXP (op, 1)))
9332         *shift_reg = XEXP (op, 1);
9333       return XEXP (op, 0);
9334     }
9335
9336   return NULL;
9337 }
9338
9339 static bool
9340 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9341 {
9342   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9343   gcc_assert (GET_CODE (x) == UNSPEC);
9344
9345   switch (XINT (x, 1))
9346     {
9347     case UNSPEC_UNALIGNED_LOAD:
9348       /* We can only do unaligned loads into the integer unit, and we can't
9349          use LDM or LDRD.  */
9350       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9351       if (speed_p)
9352         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9353                   + extra_cost->ldst.load_unaligned);
9354
9355 #ifdef NOT_YET
9356       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9357                                  ADDR_SPACE_GENERIC, speed_p);
9358 #endif
9359       return true;
9360
9361     case UNSPEC_UNALIGNED_STORE:
9362       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9363       if (speed_p)
9364         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9365                   + extra_cost->ldst.store_unaligned);
9366
9367       *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9368 #ifdef NOT_YET
9369       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9370                                  ADDR_SPACE_GENERIC, speed_p);
9371 #endif
9372       return true;
9373
9374     case UNSPEC_VRINTZ:
9375     case UNSPEC_VRINTP:
9376     case UNSPEC_VRINTM:
9377     case UNSPEC_VRINTR:
9378     case UNSPEC_VRINTX:
9379     case UNSPEC_VRINTA:
9380       *cost = COSTS_N_INSNS (1);
9381       if (speed_p)
9382         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9383
9384       return true;
9385     default:
9386       *cost = COSTS_N_INSNS (2);
9387       break;
9388     }
9389   return false;
9390 }
9391
9392 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9393    call (one insn for -Os) and then one for processing the result.  */
9394 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9395
9396 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9397         do                                                              \
9398           {                                                             \
9399             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9400             if (shift_op != NULL                                        \
9401                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9402               {                                                         \
9403                 if (shift_reg)                                          \
9404                   {                                                     \
9405                     if (speed_p)                                        \
9406                       *cost += extra_cost->alu.arith_shift_reg; \
9407                     *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);  \
9408                   }                                                     \
9409                 else if (speed_p)                                       \
9410                   *cost += extra_cost->alu.arith_shift;         \
9411                                                                         \
9412                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)     \
9413                           + rtx_cost (XEXP (x, 1 - IDX),                \
9414                                       OP, 1, speed_p));         \
9415                 return true;                                            \
9416               }                                                         \
9417           }                                                             \
9418         while (0);
9419
9420 /* RTX costs.  Make an estimate of the cost of executing the operation
9421    X, which is contained with an operation with code OUTER_CODE.
9422    SPEED_P indicates whether the cost desired is the performance cost,
9423    or the size cost.  The estimate is stored in COST and the return
9424    value is TRUE if the cost calculation is final, or FALSE if the
9425    caller should recurse through the operands of X to add additional
9426    costs.
9427
9428    We currently make no attempt to model the size savings of Thumb-2
9429    16-bit instructions.  At the normal points in compilation where
9430    this code is called we have no measure of whether the condition
9431    flags are live or not, and thus no realistic way to determine what
9432    the size will eventually be.  */
9433 static bool
9434 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9435                    const struct cpu_cost_table *extra_cost,
9436                    int *cost, bool speed_p)
9437 {
9438   machine_mode mode = GET_MODE (x);
9439
9440   if (TARGET_THUMB1)
9441     {
9442       if (speed_p)
9443         *cost = thumb1_rtx_costs (x, code, outer_code);
9444       else
9445         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9446       return true;
9447     }
9448
9449   switch (code)
9450     {
9451     case SET:
9452       *cost = 0;
9453       /* SET RTXs don't have a mode so we get it from the destination.  */
9454       mode = GET_MODE (SET_DEST (x));
9455
9456       if (REG_P (SET_SRC (x))
9457           && REG_P (SET_DEST (x)))
9458         {
9459           /* Assume that most copies can be done with a single insn,
9460              unless we don't have HW FP, in which case everything
9461              larger than word mode will require two insns.  */
9462           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9463                                    && GET_MODE_SIZE (mode) > 4)
9464                                   || mode == DImode)
9465                                  ? 2 : 1);
9466           /* Conditional register moves can be encoded
9467              in 16 bits in Thumb mode.  */
9468           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9469             *cost >>= 1;
9470
9471           return true;
9472         }
9473
9474       if (CONST_INT_P (SET_SRC (x)))
9475         {
9476           /* Handle CONST_INT here, since the value doesn't have a mode
9477              and we would otherwise be unable to work out the true cost.  */
9478           *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9479           outer_code = SET;
9480           /* Slightly lower the cost of setting a core reg to a constant.
9481              This helps break up chains and allows for better scheduling.  */
9482           if (REG_P (SET_DEST (x))
9483               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9484             *cost -= 1;
9485           x = SET_SRC (x);
9486           /* Immediate moves with an immediate in the range [0, 255] can be
9487              encoded in 16 bits in Thumb mode.  */
9488           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9489               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9490             *cost >>= 1;
9491           goto const_int_cost;
9492         }
9493
9494       return false;
9495
9496     case MEM:
9497       /* A memory access costs 1 insn if the mode is small, or the address is
9498          a single register, otherwise it costs one insn per word.  */
9499       if (REG_P (XEXP (x, 0)))
9500         *cost = COSTS_N_INSNS (1);
9501       else if (flag_pic
9502                && GET_CODE (XEXP (x, 0)) == PLUS
9503                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9504         /* This will be split into two instructions.
9505            See arm.md:calculate_pic_address.  */
9506         *cost = COSTS_N_INSNS (2);
9507       else
9508         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9509
9510       /* For speed optimizations, add the costs of the address and
9511          accessing memory.  */
9512       if (speed_p)
9513 #ifdef NOT_YET
9514         *cost += (extra_cost->ldst.load
9515                   + arm_address_cost (XEXP (x, 0), mode,
9516                                       ADDR_SPACE_GENERIC, speed_p));
9517 #else
9518         *cost += extra_cost->ldst.load;
9519 #endif
9520       return true;
9521
9522     case PARALLEL:
9523     {
9524    /* Calculations of LDM costs are complex.  We assume an initial cost
9525    (ldm_1st) which will load the number of registers mentioned in
9526    ldm_regs_per_insn_1st registers; then each additional
9527    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9528    formula for N regs is thus:
9529
9530    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9531                              + ldm_regs_per_insn_subsequent - 1)
9532                             / ldm_regs_per_insn_subsequent).
9533
9534    Additional costs may also be added for addressing.  A similar
9535    formula is used for STM.  */
9536
9537       bool is_ldm = load_multiple_operation (x, SImode);
9538       bool is_stm = store_multiple_operation (x, SImode);
9539
9540       *cost = COSTS_N_INSNS (1);
9541
9542       if (is_ldm || is_stm)
9543         {
9544           if (speed_p)
9545             {
9546               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9547               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9548                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9549                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9550               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9551                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9552                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9553
9554               *cost += regs_per_insn_1st
9555                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9556                                             + regs_per_insn_sub - 1)
9557                                           / regs_per_insn_sub);
9558               return true;
9559             }
9560
9561         }
9562       return false;
9563     }
9564     case DIV:
9565     case UDIV:
9566       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9567           && (mode == SFmode || !TARGET_VFP_SINGLE))
9568         *cost = COSTS_N_INSNS (speed_p
9569                                ? extra_cost->fp[mode != SFmode].div : 1);
9570       else if (mode == SImode && TARGET_IDIV)
9571         *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9572       else
9573         *cost = LIBCALL_COST (2);
9574       return false;     /* All arguments must be in registers.  */
9575
9576     case MOD:
9577     case UMOD:
9578       *cost = LIBCALL_COST (2);
9579       return false;     /* All arguments must be in registers.  */
9580
9581     case ROTATE:
9582       if (mode == SImode && REG_P (XEXP (x, 1)))
9583         {
9584           *cost = (COSTS_N_INSNS (2)
9585                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9586           if (speed_p)
9587             *cost += extra_cost->alu.shift_reg;
9588           return true;
9589         }
9590       /* Fall through */
9591     case ROTATERT:
9592     case ASHIFT:
9593     case LSHIFTRT:
9594     case ASHIFTRT:
9595       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9596         {
9597           *cost = (COSTS_N_INSNS (3)
9598                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9599           if (speed_p)
9600             *cost += 2 * extra_cost->alu.shift;
9601           return true;
9602         }
9603       else if (mode == SImode)
9604         {
9605           *cost = (COSTS_N_INSNS (1)
9606                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9607           /* Slightly disparage register shifts at -Os, but not by much.  */
9608           if (!CONST_INT_P (XEXP (x, 1)))
9609             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9610                       + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9611           return true;
9612         }
9613       else if (GET_MODE_CLASS (mode) == MODE_INT
9614                && GET_MODE_SIZE (mode) < 4)
9615         {
9616           if (code == ASHIFT)
9617             {
9618               *cost = (COSTS_N_INSNS (1)
9619                        + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9620               /* Slightly disparage register shifts at -Os, but not by
9621                  much.  */
9622               if (!CONST_INT_P (XEXP (x, 1)))
9623                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9624                           + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9625             }
9626           else if (code == LSHIFTRT || code == ASHIFTRT)
9627             {
9628               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9629                 {
9630                   /* Can use SBFX/UBFX.  */
9631                   *cost = COSTS_N_INSNS (1);
9632                   if (speed_p)
9633                     *cost += extra_cost->alu.bfx;
9634                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9635                 }
9636               else
9637                 {
9638                   *cost = COSTS_N_INSNS (2);
9639                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9640                   if (speed_p)
9641                     {
9642                       if (CONST_INT_P (XEXP (x, 1)))
9643                         *cost += 2 * extra_cost->alu.shift;
9644                       else
9645                         *cost += (extra_cost->alu.shift
9646                                   + extra_cost->alu.shift_reg);
9647                     }
9648                   else
9649                     /* Slightly disparage register shifts.  */
9650                     *cost += !CONST_INT_P (XEXP (x, 1));
9651                 }
9652             }
9653           else /* Rotates.  */
9654             {
9655               *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9656               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9657               if (speed_p)
9658                 {
9659                   if (CONST_INT_P (XEXP (x, 1)))
9660                     *cost += (2 * extra_cost->alu.shift
9661                               + extra_cost->alu.log_shift);
9662                   else
9663                     *cost += (extra_cost->alu.shift
9664                               + extra_cost->alu.shift_reg
9665                               + extra_cost->alu.log_shift_reg);
9666                 }
9667             }
9668           return true;
9669         }
9670
9671       *cost = LIBCALL_COST (2);
9672       return false;
9673
9674     case BSWAP:
9675       if (arm_arch6)
9676         {
9677           if (mode == SImode)
9678             {
9679               *cost = COSTS_N_INSNS (1);
9680               if (speed_p)
9681                 *cost += extra_cost->alu.rev;
9682
9683               return false;
9684             }
9685         }
9686       else
9687         {
9688         /* No rev instruction available.  Look at arm_legacy_rev
9689            and thumb_legacy_rev for the form of RTL used then.  */
9690           if (TARGET_THUMB)
9691             {
9692               *cost = COSTS_N_INSNS (10);
9693
9694               if (speed_p)
9695                 {
9696                   *cost += 6 * extra_cost->alu.shift;
9697                   *cost += 3 * extra_cost->alu.logical;
9698                 }
9699             }
9700           else
9701             {
9702               *cost = COSTS_N_INSNS (5);
9703
9704               if (speed_p)
9705                 {
9706                   *cost += 2 * extra_cost->alu.shift;
9707                   *cost += extra_cost->alu.arith_shift;
9708                   *cost += 2 * extra_cost->alu.logical;
9709                 }
9710             }
9711           return true;
9712         }
9713       return false;
9714
9715     case MINUS:
9716       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9717           && (mode == SFmode || !TARGET_VFP_SINGLE))
9718         {
9719           *cost = COSTS_N_INSNS (1);
9720           if (GET_CODE (XEXP (x, 0)) == MULT
9721               || GET_CODE (XEXP (x, 1)) == MULT)
9722             {
9723               rtx mul_op0, mul_op1, sub_op;
9724
9725               if (speed_p)
9726                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9727
9728               if (GET_CODE (XEXP (x, 0)) == MULT)
9729                 {
9730                   mul_op0 = XEXP (XEXP (x, 0), 0);
9731                   mul_op1 = XEXP (XEXP (x, 0), 1);
9732                   sub_op = XEXP (x, 1);
9733                 }
9734               else
9735                 {
9736                   mul_op0 = XEXP (XEXP (x, 1), 0);
9737                   mul_op1 = XEXP (XEXP (x, 1), 1);
9738                   sub_op = XEXP (x, 0);
9739                 }
9740
9741               /* The first operand of the multiply may be optionally
9742                  negated.  */
9743               if (GET_CODE (mul_op0) == NEG)
9744                 mul_op0 = XEXP (mul_op0, 0);
9745
9746               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9747                         + rtx_cost (mul_op1, code, 0, speed_p)
9748                         + rtx_cost (sub_op, code, 0, speed_p));
9749
9750               return true;
9751             }
9752
9753           if (speed_p)
9754             *cost += extra_cost->fp[mode != SFmode].addsub;
9755           return false;
9756         }
9757
9758       if (mode == SImode)
9759         {
9760           rtx shift_by_reg = NULL;
9761           rtx shift_op;
9762           rtx non_shift_op;
9763
9764           *cost = COSTS_N_INSNS (1);
9765
9766           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9767           if (shift_op == NULL)
9768             {
9769               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9770               non_shift_op = XEXP (x, 0);
9771             }
9772           else
9773             non_shift_op = XEXP (x, 1);
9774
9775           if (shift_op != NULL)
9776             {
9777               if (shift_by_reg != NULL)
9778                 {
9779                   if (speed_p)
9780                     *cost += extra_cost->alu.arith_shift_reg;
9781                   *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9782                 }
9783               else if (speed_p)
9784                 *cost += extra_cost->alu.arith_shift;
9785
9786               *cost += (rtx_cost (shift_op, code, 0, speed_p)
9787                         + rtx_cost (non_shift_op, code, 0, speed_p));
9788               return true;
9789             }
9790
9791           if (arm_arch_thumb2
9792               && GET_CODE (XEXP (x, 1)) == MULT)
9793             {
9794               /* MLS.  */
9795               if (speed_p)
9796                 *cost += extra_cost->mult[0].add;
9797               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9798                         + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9799                         + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9800               return true;
9801             }
9802
9803           if (CONST_INT_P (XEXP (x, 0)))
9804             {
9805               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9806                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9807                                             NULL_RTX, 1, 0);
9808               *cost = COSTS_N_INSNS (insns);
9809               if (speed_p)
9810                 *cost += insns * extra_cost->alu.arith;
9811               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9812               return true;
9813             }
9814
9815           return false;
9816         }
9817
9818       if (GET_MODE_CLASS (mode) == MODE_INT
9819           && GET_MODE_SIZE (mode) < 4)
9820         {
9821           rtx shift_op, shift_reg;
9822           shift_reg = NULL;
9823
9824           /* We check both sides of the MINUS for shifter operands since,
9825              unlike PLUS, it's not commutative.  */
9826
9827           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9828           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9829
9830           /* Slightly disparage, as we might need to widen the result.  */
9831           *cost = 1 + COSTS_N_INSNS (1);
9832           if (speed_p)
9833             *cost += extra_cost->alu.arith;
9834
9835           if (CONST_INT_P (XEXP (x, 0)))
9836             {
9837               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9838               return true;
9839             }
9840
9841           return false;
9842         }
9843
9844       if (mode == DImode)
9845         {
9846           *cost = COSTS_N_INSNS (2);
9847
9848           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9849             {
9850               rtx op1 = XEXP (x, 1);
9851
9852               if (speed_p)
9853                 *cost += 2 * extra_cost->alu.arith;
9854
9855               if (GET_CODE (op1) == ZERO_EXTEND)
9856                 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9857               else
9858                 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9859               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9860                                  0, speed_p);
9861               return true;
9862             }
9863           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9864             {
9865               if (speed_p)
9866                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9867               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9868                                   0, speed_p)
9869                         + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9870               return true;
9871             }
9872           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9873                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9874             {
9875               if (speed_p)
9876                 *cost += (extra_cost->alu.arith
9877                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9878                              ? extra_cost->alu.arith
9879                              : extra_cost->alu.arith_shift));
9880               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9881                         + rtx_cost (XEXP (XEXP (x, 1), 0),
9882                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9883               return true;
9884             }
9885
9886           if (speed_p)
9887             *cost += 2 * extra_cost->alu.arith;
9888           return false;
9889         }
9890
9891       /* Vector mode?  */
9892
9893       *cost = LIBCALL_COST (2);
9894       return false;
9895
9896     case PLUS:
9897       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9898           && (mode == SFmode || !TARGET_VFP_SINGLE))
9899         {
9900           *cost = COSTS_N_INSNS (1);
9901           if (GET_CODE (XEXP (x, 0)) == MULT)
9902             {
9903               rtx mul_op0, mul_op1, add_op;
9904
9905               if (speed_p)
9906                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9907
9908               mul_op0 = XEXP (XEXP (x, 0), 0);
9909               mul_op1 = XEXP (XEXP (x, 0), 1);
9910               add_op = XEXP (x, 1);
9911
9912               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9913                         + rtx_cost (mul_op1, code, 0, speed_p)
9914                         + rtx_cost (add_op, code, 0, speed_p));
9915
9916               return true;
9917             }
9918
9919           if (speed_p)
9920             *cost += extra_cost->fp[mode != SFmode].addsub;
9921           return false;
9922         }
9923       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9924         {
9925           *cost = LIBCALL_COST (2);
9926           return false;
9927         }
9928
9929         /* Narrow modes can be synthesized in SImode, but the range
9930            of useful sub-operations is limited.  Check for shift operations
9931            on one of the operands.  Only left shifts can be used in the
9932            narrow modes.  */
9933       if (GET_MODE_CLASS (mode) == MODE_INT
9934           && GET_MODE_SIZE (mode) < 4)
9935         {
9936           rtx shift_op, shift_reg;
9937           shift_reg = NULL;
9938
9939           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9940
9941           if (CONST_INT_P (XEXP (x, 1)))
9942             {
9943               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9944                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9945                                             NULL_RTX, 1, 0);
9946               *cost = COSTS_N_INSNS (insns);
9947               if (speed_p)
9948                 *cost += insns * extra_cost->alu.arith;
9949               /* Slightly penalize a narrow operation as the result may
9950                  need widening.  */
9951               *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9952               return true;
9953             }
9954
9955           /* Slightly penalize a narrow operation as the result may
9956              need widening.  */
9957           *cost = 1 + COSTS_N_INSNS (1);
9958           if (speed_p)
9959             *cost += extra_cost->alu.arith;
9960
9961           return false;
9962         }
9963
9964       if (mode == SImode)
9965         {
9966           rtx shift_op, shift_reg;
9967
9968           *cost = COSTS_N_INSNS (1);
9969           if (TARGET_INT_SIMD
9970               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9971                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9972             {
9973               /* UXTA[BH] or SXTA[BH].  */
9974               if (speed_p)
9975                 *cost += extra_cost->alu.extend_arith;
9976               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9977                                   speed_p)
9978                         + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9979               return true;
9980             }
9981
9982           shift_reg = NULL;
9983           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9984           if (shift_op != NULL)
9985             {
9986               if (shift_reg)
9987                 {
9988                   if (speed_p)
9989                     *cost += extra_cost->alu.arith_shift_reg;
9990                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9991                 }
9992               else if (speed_p)
9993                 *cost += extra_cost->alu.arith_shift;
9994
9995               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9996                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9997               return true;
9998             }
9999           if (GET_CODE (XEXP (x, 0)) == MULT)
10000             {
10001               rtx mul_op = XEXP (x, 0);
10002
10003               *cost = COSTS_N_INSNS (1);
10004
10005               if (TARGET_DSP_MULTIPLY
10006                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10007                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10008                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10009                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10010                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10011                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10012                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10013                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10014                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10015                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10016                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10017                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10018                                       == 16))))))
10019                 {
10020                   /* SMLA[BT][BT].  */
10021                   if (speed_p)
10022                     *cost += extra_cost->mult[0].extend_add;
10023                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
10024                                       SIGN_EXTEND, 0, speed_p)
10025                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
10026                                         SIGN_EXTEND, 0, speed_p)
10027                             + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10028                   return true;
10029                 }
10030
10031               if (speed_p)
10032                 *cost += extra_cost->mult[0].add;
10033               *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
10034                         + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
10035                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10036               return true;
10037             }
10038           if (CONST_INT_P (XEXP (x, 1)))
10039             {
10040               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10041                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10042                                             NULL_RTX, 1, 0);
10043               *cost = COSTS_N_INSNS (insns);
10044               if (speed_p)
10045                 *cost += insns * extra_cost->alu.arith;
10046               *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
10047               return true;
10048             }
10049           return false;
10050         }
10051
10052       if (mode == DImode)
10053         {
10054           if (arm_arch3m
10055               && GET_CODE (XEXP (x, 0)) == MULT
10056               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10057                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10058                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10059                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10060             {
10061               *cost = COSTS_N_INSNS (1);
10062               if (speed_p)
10063                 *cost += extra_cost->mult[1].extend_add;
10064               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10065                                   ZERO_EXTEND, 0, speed_p)
10066                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10067                                     ZERO_EXTEND, 0, speed_p)
10068                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10069               return true;
10070             }
10071
10072           *cost = COSTS_N_INSNS (2);
10073
10074           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10075               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10076             {
10077               if (speed_p)
10078                 *cost += (extra_cost->alu.arith
10079                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10080                              ? extra_cost->alu.arith
10081                              : extra_cost->alu.arith_shift));
10082
10083               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10084                                   speed_p)
10085                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10086               return true;
10087             }
10088
10089           if (speed_p)
10090             *cost += 2 * extra_cost->alu.arith;
10091           return false;
10092         }
10093
10094       /* Vector mode?  */
10095       *cost = LIBCALL_COST (2);
10096       return false;
10097     case IOR:
10098       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10099         {
10100           *cost = COSTS_N_INSNS (1);
10101           if (speed_p)
10102             *cost += extra_cost->alu.rev;
10103
10104           return true;
10105         }
10106     /* Fall through.  */
10107     case AND: case XOR:
10108       if (mode == SImode)
10109         {
10110           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10111           rtx op0 = XEXP (x, 0);
10112           rtx shift_op, shift_reg;
10113
10114           *cost = COSTS_N_INSNS (1);
10115
10116           if (subcode == NOT
10117               && (code == AND
10118                   || (code == IOR && TARGET_THUMB2)))
10119             op0 = XEXP (op0, 0);
10120
10121           shift_reg = NULL;
10122           shift_op = shifter_op_p (op0, &shift_reg);
10123           if (shift_op != NULL)
10124             {
10125               if (shift_reg)
10126                 {
10127                   if (speed_p)
10128                     *cost += extra_cost->alu.log_shift_reg;
10129                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10130                 }
10131               else if (speed_p)
10132                 *cost += extra_cost->alu.log_shift;
10133
10134               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10135                         + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10136               return true;
10137             }
10138
10139           if (CONST_INT_P (XEXP (x, 1)))
10140             {
10141               int insns = arm_gen_constant (code, SImode, NULL_RTX,
10142                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10143                                             NULL_RTX, 1, 0);
10144
10145               *cost = COSTS_N_INSNS (insns);
10146               if (speed_p)
10147                 *cost += insns * extra_cost->alu.logical;
10148               *cost += rtx_cost (op0, code, 0, speed_p);
10149               return true;
10150             }
10151
10152           if (speed_p)
10153             *cost += extra_cost->alu.logical;
10154           *cost += (rtx_cost (op0, code, 0, speed_p)
10155                     + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10156           return true;
10157         }
10158
10159       if (mode == DImode)
10160         {
10161           rtx op0 = XEXP (x, 0);
10162           enum rtx_code subcode = GET_CODE (op0);
10163
10164           *cost = COSTS_N_INSNS (2);
10165
10166           if (subcode == NOT
10167               && (code == AND
10168                   || (code == IOR && TARGET_THUMB2)))
10169             op0 = XEXP (op0, 0);
10170
10171           if (GET_CODE (op0) == ZERO_EXTEND)
10172             {
10173               if (speed_p)
10174                 *cost += 2 * extra_cost->alu.logical;
10175
10176               *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10177                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10178               return true;
10179             }
10180           else if (GET_CODE (op0) == SIGN_EXTEND)
10181             {
10182               if (speed_p)
10183                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10184
10185               *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10186                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10187               return true;
10188             }
10189
10190           if (speed_p)
10191             *cost += 2 * extra_cost->alu.logical;
10192
10193           return true;
10194         }
10195       /* Vector mode?  */
10196
10197       *cost = LIBCALL_COST (2);
10198       return false;
10199
10200     case MULT:
10201       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10202           && (mode == SFmode || !TARGET_VFP_SINGLE))
10203         {
10204           rtx op0 = XEXP (x, 0);
10205
10206           *cost = COSTS_N_INSNS (1);
10207
10208           if (GET_CODE (op0) == NEG)
10209             op0 = XEXP (op0, 0);
10210
10211           if (speed_p)
10212             *cost += extra_cost->fp[mode != SFmode].mult;
10213
10214           *cost += (rtx_cost (op0, MULT, 0, speed_p)
10215                     + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10216           return true;
10217         }
10218       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10219         {
10220           *cost = LIBCALL_COST (2);
10221           return false;
10222         }
10223
10224       if (mode == SImode)
10225         {
10226           *cost = COSTS_N_INSNS (1);
10227           if (TARGET_DSP_MULTIPLY
10228               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10229                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10230                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10231                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10232                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10233                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10234                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10235                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10236                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10237                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10238                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10239                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10240                                   == 16))))))
10241             {
10242               /* SMUL[TB][TB].  */
10243               if (speed_p)
10244                 *cost += extra_cost->mult[0].extend;
10245               *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10246                         + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10247               return true;
10248             }
10249           if (speed_p)
10250             *cost += extra_cost->mult[0].simple;
10251           return false;
10252         }
10253
10254       if (mode == DImode)
10255         {
10256           if (arm_arch3m
10257               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10258                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10259                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10260                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10261             {
10262               *cost = COSTS_N_INSNS (1);
10263               if (speed_p)
10264                 *cost += extra_cost->mult[1].extend;
10265               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10266                                   ZERO_EXTEND, 0, speed_p)
10267                         + rtx_cost (XEXP (XEXP (x, 1), 0),
10268                                     ZERO_EXTEND, 0, speed_p));
10269               return true;
10270             }
10271
10272           *cost = LIBCALL_COST (2);
10273           return false;
10274         }
10275
10276       /* Vector mode?  */
10277       *cost = LIBCALL_COST (2);
10278       return false;
10279
10280     case NEG:
10281       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10282           && (mode == SFmode || !TARGET_VFP_SINGLE))
10283         {
10284           *cost = COSTS_N_INSNS (1);
10285           if (speed_p)
10286             *cost += extra_cost->fp[mode != SFmode].neg;
10287
10288           return false;
10289         }
10290       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10291         {
10292           *cost = LIBCALL_COST (1);
10293           return false;
10294         }
10295
10296       if (mode == SImode)
10297         {
10298           if (GET_CODE (XEXP (x, 0)) == ABS)
10299             {
10300               *cost = COSTS_N_INSNS (2);
10301               /* Assume the non-flag-changing variant.  */
10302               if (speed_p)
10303                 *cost += (extra_cost->alu.log_shift
10304                           + extra_cost->alu.arith_shift);
10305               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10306               return true;
10307             }
10308
10309           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10310               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10311             {
10312               *cost = COSTS_N_INSNS (2);
10313               /* No extra cost for MOV imm and MVN imm.  */
10314               /* If the comparison op is using the flags, there's no further
10315                  cost, otherwise we need to add the cost of the comparison.  */
10316               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10317                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10318                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10319                 {
10320                   *cost += (COSTS_N_INSNS (1)
10321                             + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10322                                         speed_p)
10323                             + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10324                                         speed_p));
10325                   if (speed_p)
10326                     *cost += extra_cost->alu.arith;
10327                 }
10328               return true;
10329             }
10330           *cost = COSTS_N_INSNS (1);
10331           if (speed_p)
10332             *cost += extra_cost->alu.arith;
10333           return false;
10334         }
10335
10336       if (GET_MODE_CLASS (mode) == MODE_INT
10337           && GET_MODE_SIZE (mode) < 4)
10338         {
10339           /* Slightly disparage, as we might need an extend operation.  */
10340           *cost = 1 + COSTS_N_INSNS (1);
10341           if (speed_p)
10342             *cost += extra_cost->alu.arith;
10343           return false;
10344         }
10345
10346       if (mode == DImode)
10347         {
10348           *cost = COSTS_N_INSNS (2);
10349           if (speed_p)
10350             *cost += 2 * extra_cost->alu.arith;
10351           return false;
10352         }
10353
10354       /* Vector mode?  */
10355       *cost = LIBCALL_COST (1);
10356       return false;
10357
10358     case NOT:
10359       if (mode == SImode)
10360         {
10361           rtx shift_op;
10362           rtx shift_reg = NULL;
10363
10364           *cost = COSTS_N_INSNS (1);
10365           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10366
10367           if (shift_op)
10368             {
10369               if (shift_reg != NULL)
10370                 {
10371                   if (speed_p)
10372                     *cost += extra_cost->alu.log_shift_reg;
10373                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10374                 }
10375               else if (speed_p)
10376                 *cost += extra_cost->alu.log_shift;
10377               *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10378               return true;
10379             }
10380
10381           if (speed_p)
10382             *cost += extra_cost->alu.logical;
10383           return false;
10384         }
10385       if (mode == DImode)
10386         {
10387           *cost = COSTS_N_INSNS (2);
10388           return false;
10389         }
10390
10391       /* Vector mode?  */
10392
10393       *cost += LIBCALL_COST (1);
10394       return false;
10395
10396     case IF_THEN_ELSE:
10397       {
10398         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10399           {
10400             *cost = COSTS_N_INSNS (4);
10401             return true;
10402           }
10403         int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10404         int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10405
10406         *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10407         /* Assume that if one arm of the if_then_else is a register,
10408            that it will be tied with the result and eliminate the
10409            conditional insn.  */
10410         if (REG_P (XEXP (x, 1)))
10411           *cost += op2cost;
10412         else if (REG_P (XEXP (x, 2)))
10413           *cost += op1cost;
10414         else
10415           {
10416             if (speed_p)
10417               {
10418                 if (extra_cost->alu.non_exec_costs_exec)
10419                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10420                 else
10421                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10422               }
10423             else
10424               *cost += op1cost + op2cost;
10425           }
10426       }
10427       return true;
10428
10429     case COMPARE:
10430       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10431         *cost = 0;
10432       else
10433         {
10434           machine_mode op0mode;
10435           /* We'll mostly assume that the cost of a compare is the cost of the
10436              LHS.  However, there are some notable exceptions.  */
10437
10438           /* Floating point compares are never done as side-effects.  */
10439           op0mode = GET_MODE (XEXP (x, 0));
10440           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10441               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10442             {
10443               *cost = COSTS_N_INSNS (1);
10444               if (speed_p)
10445                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10446
10447               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10448                 {
10449                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10450                   return true;
10451                 }
10452
10453               return false;
10454             }
10455           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10456             {
10457               *cost = LIBCALL_COST (2);
10458               return false;
10459             }
10460
10461           /* DImode compares normally take two insns.  */
10462           if (op0mode == DImode)
10463             {
10464               *cost = COSTS_N_INSNS (2);
10465               if (speed_p)
10466                 *cost += 2 * extra_cost->alu.arith;
10467               return false;
10468             }
10469
10470           if (op0mode == SImode)
10471             {
10472               rtx shift_op;
10473               rtx shift_reg;
10474
10475               if (XEXP (x, 1) == const0_rtx
10476                   && !(REG_P (XEXP (x, 0))
10477                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10478                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10479                 {
10480                   *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10481
10482                   /* Multiply operations that set the flags are often
10483                      significantly more expensive.  */
10484                   if (speed_p
10485                       && GET_CODE (XEXP (x, 0)) == MULT
10486                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10487                     *cost += extra_cost->mult[0].flag_setting;
10488
10489                   if (speed_p
10490                       && GET_CODE (XEXP (x, 0)) == PLUS
10491                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10492                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10493                                                             0), 1), mode))
10494                     *cost += extra_cost->mult[0].flag_setting;
10495                   return true;
10496                 }
10497
10498               shift_reg = NULL;
10499               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10500               if (shift_op != NULL)
10501                 {
10502                   *cost = COSTS_N_INSNS (1);
10503                   if (shift_reg != NULL)
10504                     {
10505                       *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10506                       if (speed_p)
10507                         *cost += extra_cost->alu.arith_shift_reg;
10508                     }
10509                   else if (speed_p)
10510                     *cost += extra_cost->alu.arith_shift;
10511                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10512                             + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10513                   return true;
10514                 }
10515
10516               *cost = COSTS_N_INSNS (1);
10517               if (speed_p)
10518                 *cost += extra_cost->alu.arith;
10519               if (CONST_INT_P (XEXP (x, 1))
10520                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10521                 {
10522                   *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10523                   return true;
10524                 }
10525               return false;
10526             }
10527
10528           /* Vector mode?  */
10529
10530           *cost = LIBCALL_COST (2);
10531           return false;
10532         }
10533       return true;
10534
10535     case EQ:
10536     case NE:
10537     case LT:
10538     case LE:
10539     case GT:
10540     case GE:
10541     case LTU:
10542     case LEU:
10543     case GEU:
10544     case GTU:
10545     case ORDERED:
10546     case UNORDERED:
10547     case UNEQ:
10548     case UNLE:
10549     case UNLT:
10550     case UNGE:
10551     case UNGT:
10552     case LTGT:
10553       if (outer_code == SET)
10554         {
10555           /* Is it a store-flag operation?  */
10556           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10557               && XEXP (x, 1) == const0_rtx)
10558             {
10559               /* Thumb also needs an IT insn.  */
10560               *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10561               return true;
10562             }
10563           if (XEXP (x, 1) == const0_rtx)
10564             {
10565               switch (code)
10566                 {
10567                 case LT:
10568                   /* LSR Rd, Rn, #31.  */
10569                   *cost = COSTS_N_INSNS (1);
10570                   if (speed_p)
10571                     *cost += extra_cost->alu.shift;
10572                   break;
10573
10574                 case EQ:
10575                   /* RSBS T1, Rn, #0
10576                      ADC  Rd, Rn, T1.  */
10577
10578                 case NE:
10579                   /* SUBS T1, Rn, #1
10580                      SBC  Rd, Rn, T1.  */
10581                   *cost = COSTS_N_INSNS (2);
10582                   break;
10583
10584                 case LE:
10585                   /* RSBS T1, Rn, Rn, LSR #31
10586                      ADC  Rd, Rn, T1. */
10587                   *cost = COSTS_N_INSNS (2);
10588                   if (speed_p)
10589                     *cost += extra_cost->alu.arith_shift;
10590                   break;
10591
10592                 case GT:
10593                   /* RSB  Rd, Rn, Rn, ASR #1
10594                      LSR  Rd, Rd, #31.  */
10595                   *cost = COSTS_N_INSNS (2);
10596                   if (speed_p)
10597                     *cost += (extra_cost->alu.arith_shift
10598                               + extra_cost->alu.shift);
10599                   break;
10600
10601                 case GE:
10602                   /* ASR  Rd, Rn, #31
10603                      ADD  Rd, Rn, #1.  */
10604                   *cost = COSTS_N_INSNS (2);
10605                   if (speed_p)
10606                     *cost += extra_cost->alu.shift;
10607                   break;
10608
10609                 default:
10610                   /* Remaining cases are either meaningless or would take
10611                      three insns anyway.  */
10612                   *cost = COSTS_N_INSNS (3);
10613                   break;
10614                 }
10615               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10616               return true;
10617             }
10618           else
10619             {
10620               *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10621               if (CONST_INT_P (XEXP (x, 1))
10622                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10623                 {
10624                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10625                   return true;
10626                 }
10627
10628               return false;
10629             }
10630         }
10631       /* Not directly inside a set.  If it involves the condition code
10632          register it must be the condition for a branch, cond_exec or
10633          I_T_E operation.  Since the comparison is performed elsewhere
10634          this is just the control part which has no additional
10635          cost.  */
10636       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10637                && XEXP (x, 1) == const0_rtx)
10638         {
10639           *cost = 0;
10640           return true;
10641         }
10642       return false;
10643
10644     case ABS:
10645       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10646           && (mode == SFmode || !TARGET_VFP_SINGLE))
10647         {
10648           *cost = COSTS_N_INSNS (1);
10649           if (speed_p)
10650             *cost += extra_cost->fp[mode != SFmode].neg;
10651
10652           return false;
10653         }
10654       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10655         {
10656           *cost = LIBCALL_COST (1);
10657           return false;
10658         }
10659
10660       if (mode == SImode)
10661         {
10662           *cost = COSTS_N_INSNS (1);
10663           if (speed_p)
10664             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10665           return false;
10666         }
10667       /* Vector mode?  */
10668       *cost = LIBCALL_COST (1);
10669       return false;
10670
10671     case SIGN_EXTEND:
10672       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10673           && MEM_P (XEXP (x, 0)))
10674         {
10675           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10676
10677           if (mode == DImode)
10678             *cost += COSTS_N_INSNS (1);
10679
10680           if (!speed_p)
10681             return true;
10682
10683           if (GET_MODE (XEXP (x, 0)) == SImode)
10684             *cost += extra_cost->ldst.load;
10685           else
10686             *cost += extra_cost->ldst.load_sign_extend;
10687
10688           if (mode == DImode)
10689             *cost += extra_cost->alu.shift;
10690
10691           return true;
10692         }
10693
10694       /* Widening from less than 32-bits requires an extend operation.  */
10695       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10696         {
10697           /* We have SXTB/SXTH.  */
10698           *cost = COSTS_N_INSNS (1);
10699           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10700           if (speed_p)
10701             *cost += extra_cost->alu.extend;
10702         }
10703       else if (GET_MODE (XEXP (x, 0)) != SImode)
10704         {
10705           /* Needs two shifts.  */
10706           *cost = COSTS_N_INSNS (2);
10707           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10708           if (speed_p)
10709             *cost += 2 * extra_cost->alu.shift;
10710         }
10711
10712       /* Widening beyond 32-bits requires one more insn.  */
10713       if (mode == DImode)
10714         {
10715           *cost += COSTS_N_INSNS (1);
10716           if (speed_p)
10717             *cost += extra_cost->alu.shift;
10718         }
10719
10720       return true;
10721
10722     case ZERO_EXTEND:
10723       if ((arm_arch4
10724            || GET_MODE (XEXP (x, 0)) == SImode
10725            || GET_MODE (XEXP (x, 0)) == QImode)
10726           && MEM_P (XEXP (x, 0)))
10727         {
10728           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10729
10730           if (mode == DImode)
10731             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10732
10733           return true;
10734         }
10735
10736       /* Widening from less than 32-bits requires an extend operation.  */
10737       if (GET_MODE (XEXP (x, 0)) == QImode)
10738         {
10739           /* UXTB can be a shorter instruction in Thumb2, but it might
10740              be slower than the AND Rd, Rn, #255 alternative.  When
10741              optimizing for speed it should never be slower to use
10742              AND, and we don't really model 16-bit vs 32-bit insns
10743              here.  */
10744           *cost = COSTS_N_INSNS (1);
10745           if (speed_p)
10746             *cost += extra_cost->alu.logical;
10747         }
10748       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10749         {
10750           /* We have UXTB/UXTH.  */
10751           *cost = COSTS_N_INSNS (1);
10752           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10753           if (speed_p)
10754             *cost += extra_cost->alu.extend;
10755         }
10756       else if (GET_MODE (XEXP (x, 0)) != SImode)
10757         {
10758           /* Needs two shifts.  It's marginally preferable to use
10759              shifts rather than two BIC instructions as the second
10760              shift may merge with a subsequent insn as a shifter
10761              op.  */
10762           *cost = COSTS_N_INSNS (2);
10763           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10764           if (speed_p)
10765             *cost += 2 * extra_cost->alu.shift;
10766         }
10767       else  /* GET_MODE (XEXP (x, 0)) == SImode.  */
10768         *cost = COSTS_N_INSNS (1);
10769
10770       /* Widening beyond 32-bits requires one more insn.  */
10771       if (mode == DImode)
10772         {
10773           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10774         }
10775
10776       return true;
10777
10778     case CONST_INT:
10779       *cost = 0;
10780       /* CONST_INT has no mode, so we cannot tell for sure how many
10781          insns are really going to be needed.  The best we can do is
10782          look at the value passed.  If it fits in SImode, then assume
10783          that's the mode it will be used for.  Otherwise assume it
10784          will be used in DImode.  */
10785       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10786         mode = SImode;
10787       else
10788         mode = DImode;
10789
10790       /* Avoid blowing up in arm_gen_constant ().  */
10791       if (!(outer_code == PLUS
10792             || outer_code == AND
10793             || outer_code == IOR
10794             || outer_code == XOR
10795             || outer_code == MINUS))
10796         outer_code = SET;
10797
10798     const_int_cost:
10799       if (mode == SImode)
10800         {
10801           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10802                                                     INTVAL (x), NULL, NULL,
10803                                                     0, 0));
10804           /* Extra costs?  */
10805         }
10806       else
10807         {
10808           *cost += COSTS_N_INSNS (arm_gen_constant
10809                                   (outer_code, SImode, NULL,
10810                                    trunc_int_for_mode (INTVAL (x), SImode),
10811                                    NULL, NULL, 0, 0)
10812                                   + arm_gen_constant (outer_code, SImode, NULL,
10813                                                       INTVAL (x) >> 32, NULL,
10814                                                       NULL, 0, 0));
10815           /* Extra costs?  */
10816         }
10817
10818       return true;
10819
10820     case CONST:
10821     case LABEL_REF:
10822     case SYMBOL_REF:
10823       if (speed_p)
10824         {
10825           if (arm_arch_thumb2 && !flag_pic)
10826             *cost = COSTS_N_INSNS (2);
10827           else
10828             *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10829         }
10830       else
10831         *cost = COSTS_N_INSNS (2);
10832
10833       if (flag_pic)
10834         {
10835           *cost += COSTS_N_INSNS (1);
10836           if (speed_p)
10837             *cost += extra_cost->alu.arith;
10838         }
10839
10840       return true;
10841
10842     case CONST_FIXED:
10843       *cost = COSTS_N_INSNS (4);
10844       /* Fixme.  */
10845       return true;
10846
10847     case CONST_DOUBLE:
10848       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10849           && (mode == SFmode || !TARGET_VFP_SINGLE))
10850         {
10851           if (vfp3_const_double_rtx (x))
10852             {
10853               *cost = COSTS_N_INSNS (1);
10854               if (speed_p)
10855                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10856               return true;
10857             }
10858
10859           if (speed_p)
10860             {
10861               *cost = COSTS_N_INSNS (1);
10862               if (mode == DFmode)
10863                 *cost += extra_cost->ldst.loadd;
10864               else
10865                 *cost += extra_cost->ldst.loadf;
10866             }
10867           else
10868             *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10869
10870           return true;
10871         }
10872       *cost = COSTS_N_INSNS (4);
10873       return true;
10874
10875     case CONST_VECTOR:
10876       /* Fixme.  */
10877       if (TARGET_NEON
10878           && TARGET_HARD_FLOAT
10879           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10880           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10881         *cost = COSTS_N_INSNS (1);
10882       else
10883         *cost = COSTS_N_INSNS (4);
10884       return true;
10885
10886     case HIGH:
10887     case LO_SUM:
10888       *cost = COSTS_N_INSNS (1);
10889       /* When optimizing for size, we prefer constant pool entries to
10890          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10891       if (!speed_p)
10892         *cost += 1;
10893       return true;
10894
10895     case CLZ:
10896       *cost = COSTS_N_INSNS (1);
10897       if (speed_p)
10898         *cost += extra_cost->alu.clz;
10899       return false;
10900
10901     case SMIN:
10902       if (XEXP (x, 1) == const0_rtx)
10903         {
10904           *cost = COSTS_N_INSNS (1);
10905           if (speed_p)
10906             *cost += extra_cost->alu.log_shift;
10907           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10908           return true;
10909         }
10910       /* Fall through.  */
10911     case SMAX:
10912     case UMIN:
10913     case UMAX:
10914       *cost = COSTS_N_INSNS (2);
10915       return false;
10916
10917     case TRUNCATE:
10918       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10919           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10920           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10921           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10922           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10923                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10924               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10925                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10926                       == ZERO_EXTEND))))
10927         {
10928           *cost = COSTS_N_INSNS (1);
10929           if (speed_p)
10930             *cost += extra_cost->mult[1].extend;
10931           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10932                               speed_p)
10933                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10934                                 0, speed_p));
10935           return true;
10936         }
10937       *cost = LIBCALL_COST (1);
10938       return false;
10939
10940     case UNSPEC:
10941       return arm_unspec_cost (x, outer_code, speed_p, cost);
10942
10943     case PC:
10944       /* Reading the PC is like reading any other register.  Writing it
10945          is more expensive, but we take that into account elsewhere.  */
10946       *cost = 0;
10947       return true;
10948
10949     case ZERO_EXTRACT:
10950       /* TODO: Simple zero_extract of bottom bits using AND.  */
10951       /* Fall through.  */
10952     case SIGN_EXTRACT:
10953       if (arm_arch6
10954           && mode == SImode
10955           && CONST_INT_P (XEXP (x, 1))
10956           && CONST_INT_P (XEXP (x, 2)))
10957         {
10958           *cost = COSTS_N_INSNS (1);
10959           if (speed_p)
10960             *cost += extra_cost->alu.bfx;
10961           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10962           return true;
10963         }
10964       /* Without UBFX/SBFX, need to resort to shift operations.  */
10965       *cost = COSTS_N_INSNS (2);
10966       if (speed_p)
10967         *cost += 2 * extra_cost->alu.shift;
10968       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10969       return true;
10970
10971     case FLOAT_EXTEND:
10972       if (TARGET_HARD_FLOAT)
10973         {
10974           *cost = COSTS_N_INSNS (1);
10975           if (speed_p)
10976             *cost += extra_cost->fp[mode == DFmode].widen;
10977           if (!TARGET_FPU_ARMV8
10978               && GET_MODE (XEXP (x, 0)) == HFmode)
10979             {
10980               /* Pre v8, widening HF->DF is a two-step process, first
10981                  widening to SFmode.  */
10982               *cost += COSTS_N_INSNS (1);
10983               if (speed_p)
10984                 *cost += extra_cost->fp[0].widen;
10985             }
10986           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10987           return true;
10988         }
10989
10990       *cost = LIBCALL_COST (1);
10991       return false;
10992
10993     case FLOAT_TRUNCATE:
10994       if (TARGET_HARD_FLOAT)
10995         {
10996           *cost = COSTS_N_INSNS (1);
10997           if (speed_p)
10998             *cost += extra_cost->fp[mode == DFmode].narrow;
10999           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11000           return true;
11001           /* Vector modes?  */
11002         }
11003       *cost = LIBCALL_COST (1);
11004       return false;
11005
11006     case FMA:
11007       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11008         {
11009           rtx op0 = XEXP (x, 0);
11010           rtx op1 = XEXP (x, 1);
11011           rtx op2 = XEXP (x, 2);
11012
11013           *cost = COSTS_N_INSNS (1);
11014
11015           /* vfms or vfnma.  */
11016           if (GET_CODE (op0) == NEG)
11017             op0 = XEXP (op0, 0);
11018
11019           /* vfnms or vfnma.  */
11020           if (GET_CODE (op2) == NEG)
11021             op2 = XEXP (op2, 0);
11022
11023           *cost += rtx_cost (op0, FMA, 0, speed_p);
11024           *cost += rtx_cost (op1, FMA, 1, speed_p);
11025           *cost += rtx_cost (op2, FMA, 2, speed_p);
11026
11027           if (speed_p)
11028             *cost += extra_cost->fp[mode ==DFmode].fma;
11029
11030           return true;
11031         }
11032
11033       *cost = LIBCALL_COST (3);
11034       return false;
11035
11036     case FIX:
11037     case UNSIGNED_FIX:
11038       if (TARGET_HARD_FLOAT)
11039         {
11040           if (GET_MODE_CLASS (mode) == MODE_INT)
11041             {
11042               *cost = COSTS_N_INSNS (1);
11043               if (speed_p)
11044                 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
11045               /* Strip of the 'cost' of rounding towards zero.  */
11046               if (GET_CODE (XEXP (x, 0)) == FIX)
11047                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11048               else
11049                 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11050               /* ??? Increase the cost to deal with transferring from
11051                  FP -> CORE registers?  */
11052               return true;
11053             }
11054           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11055                    && TARGET_FPU_ARMV8)
11056             {
11057               *cost = COSTS_N_INSNS (1);
11058               if (speed_p)
11059                 *cost += extra_cost->fp[mode == DFmode].roundint;
11060               return false;
11061             }
11062           /* Vector costs? */
11063         }
11064       *cost = LIBCALL_COST (1);
11065       return false;
11066
11067     case FLOAT:
11068     case UNSIGNED_FLOAT:
11069       if (TARGET_HARD_FLOAT)
11070         {
11071           /* ??? Increase the cost to deal with transferring from CORE
11072              -> FP registers?  */
11073           *cost = COSTS_N_INSNS (1);
11074           if (speed_p)
11075             *cost += extra_cost->fp[mode == DFmode].fromint;
11076           return false;
11077         }
11078       *cost = LIBCALL_COST (1);
11079       return false;
11080
11081     case CALL:
11082       *cost = COSTS_N_INSNS (1);
11083       return true;
11084
11085     case ASM_OPERANDS:
11086       {
11087       /* Just a guess.  Guess number of instructions in the asm
11088          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11089          though (see PR60663).  */
11090         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11091         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11092
11093         *cost = COSTS_N_INSNS (asm_length + num_operands);
11094         return true;
11095       }
11096     default:
11097       if (mode != VOIDmode)
11098         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11099       else
11100         *cost = COSTS_N_INSNS (4); /* Who knows?  */
11101       return false;
11102     }
11103 }
11104
11105 #undef HANDLE_NARROW_SHIFT_ARITH
11106
11107 /* RTX costs when optimizing for size.  */
11108 static bool
11109 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11110                int *total, bool speed)
11111 {
11112   bool result;
11113
11114   if (TARGET_OLD_RTX_COSTS
11115       || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11116     {
11117       /* Old way.  (Deprecated.)  */
11118       if (!speed)
11119         result = arm_size_rtx_costs (x, (enum rtx_code) code,
11120                                      (enum rtx_code) outer_code, total);
11121       else
11122         result = current_tune->rtx_costs (x,  (enum rtx_code) code,
11123                                           (enum rtx_code) outer_code, total,
11124                                           speed);
11125     }
11126   else
11127     {
11128     /* New way.  */
11129       if (current_tune->insn_extra_cost)
11130         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11131                                      (enum rtx_code) outer_code,
11132                                      current_tune->insn_extra_cost,
11133                                      total, speed);
11134     /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11135        && current_tune->insn_extra_cost != NULL  */
11136       else
11137         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11138                                     (enum rtx_code) outer_code,
11139                                     &generic_extra_costs, total, speed);
11140     }
11141
11142   if (dump_file && (dump_flags & TDF_DETAILS))
11143     {
11144       print_rtl_single (dump_file, x);
11145       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11146                *total, result ? "final" : "partial");
11147     }
11148   return result;
11149 }
11150
11151 /* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
11152    supported on any "slowmul" cores, so it can be ignored.  */
11153
11154 static bool
11155 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11156                        int *total, bool speed)
11157 {
11158   machine_mode mode = GET_MODE (x);
11159
11160   if (TARGET_THUMB)
11161     {
11162       *total = thumb1_rtx_costs (x, code, outer_code);
11163       return true;
11164     }
11165
11166   switch (code)
11167     {
11168     case MULT:
11169       if (GET_MODE_CLASS (mode) == MODE_FLOAT
11170           || mode == DImode)
11171         {
11172           *total = COSTS_N_INSNS (20);
11173           return false;
11174         }
11175
11176       if (CONST_INT_P (XEXP (x, 1)))
11177         {
11178           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11179                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11180           int cost, const_ok = const_ok_for_arm (i);
11181           int j, booth_unit_size;
11182
11183           /* Tune as appropriate.  */
11184           cost = const_ok ? 4 : 8;
11185           booth_unit_size = 2;
11186           for (j = 0; i && j < 32; j += booth_unit_size)
11187             {
11188               i >>= booth_unit_size;
11189               cost++;
11190             }
11191
11192           *total = COSTS_N_INSNS (cost);
11193           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11194           return true;
11195         }
11196
11197       *total = COSTS_N_INSNS (20);
11198       return false;
11199
11200     default:
11201       return arm_rtx_costs_1 (x, outer_code, total, speed);;
11202     }
11203 }
11204
11205
11206 /* RTX cost for cores with a fast multiply unit (M variants).  */
11207
11208 static bool
11209 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11210                        int *total, bool speed)
11211 {
11212   machine_mode mode = GET_MODE (x);
11213
11214   if (TARGET_THUMB1)
11215     {
11216       *total = thumb1_rtx_costs (x, code, outer_code);
11217       return true;
11218     }
11219
11220   /* ??? should thumb2 use different costs?  */
11221   switch (code)
11222     {
11223     case MULT:
11224       /* There is no point basing this on the tuning, since it is always the
11225          fast variant if it exists at all.  */
11226       if (mode == DImode
11227           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11228           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11229               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11230         {
11231           *total = COSTS_N_INSNS(2);
11232           return false;
11233         }
11234
11235
11236       if (mode == DImode)
11237         {
11238           *total = COSTS_N_INSNS (5);
11239           return false;
11240         }
11241
11242       if (CONST_INT_P (XEXP (x, 1)))
11243         {
11244           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11245                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11246           int cost, const_ok = const_ok_for_arm (i);
11247           int j, booth_unit_size;
11248
11249           /* Tune as appropriate.  */
11250           cost = const_ok ? 4 : 8;
11251           booth_unit_size = 8;
11252           for (j = 0; i && j < 32; j += booth_unit_size)
11253             {
11254               i >>= booth_unit_size;
11255               cost++;
11256             }
11257
11258           *total = COSTS_N_INSNS(cost);
11259           return false;
11260         }
11261
11262       if (mode == SImode)
11263         {
11264           *total = COSTS_N_INSNS (4);
11265           return false;
11266         }
11267
11268       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11269         {
11270           if (TARGET_HARD_FLOAT
11271               && (mode == SFmode
11272                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11273             {
11274               *total = COSTS_N_INSNS (1);
11275               return false;
11276             }
11277         }
11278
11279       /* Requires a lib call */
11280       *total = COSTS_N_INSNS (20);
11281       return false;
11282
11283     default:
11284       return arm_rtx_costs_1 (x, outer_code, total, speed);
11285     }
11286 }
11287
11288
11289 /* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
11290    so it can be ignored.  */
11291
11292 static bool
11293 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11294                       int *total, bool speed)
11295 {
11296   machine_mode mode = GET_MODE (x);
11297
11298   if (TARGET_THUMB)
11299     {
11300       *total = thumb1_rtx_costs (x, code, outer_code);
11301       return true;
11302     }
11303
11304   switch (code)
11305     {
11306     case COMPARE:
11307       if (GET_CODE (XEXP (x, 0)) != MULT)
11308         return arm_rtx_costs_1 (x, outer_code, total, speed);
11309
11310       /* A COMPARE of a MULT is slow on XScale; the muls instruction
11311          will stall until the multiplication is complete.  */
11312       *total = COSTS_N_INSNS (3);
11313       return false;
11314
11315     case MULT:
11316       /* There is no point basing this on the tuning, since it is always the
11317          fast variant if it exists at all.  */
11318       if (mode == DImode
11319           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11320           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11321               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11322         {
11323           *total = COSTS_N_INSNS (2);
11324           return false;
11325         }
11326
11327
11328       if (mode == DImode)
11329         {
11330           *total = COSTS_N_INSNS (5);
11331           return false;
11332         }
11333
11334       if (CONST_INT_P (XEXP (x, 1)))
11335         {
11336           /* If operand 1 is a constant we can more accurately
11337              calculate the cost of the multiply.  The multiplier can
11338              retire 15 bits on the first cycle and a further 12 on the
11339              second.  We do, of course, have to load the constant into
11340              a register first.  */
11341           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11342           /* There's a general overhead of one cycle.  */
11343           int cost = 1;
11344           unsigned HOST_WIDE_INT masked_const;
11345
11346           if (i & 0x80000000)
11347             i = ~i;
11348
11349           i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11350
11351           masked_const = i & 0xffff8000;
11352           if (masked_const != 0)
11353             {
11354               cost++;
11355               masked_const = i & 0xf8000000;
11356               if (masked_const != 0)
11357                 cost++;
11358             }
11359           *total = COSTS_N_INSNS (cost);
11360           return false;
11361         }
11362
11363       if (mode == SImode)
11364         {
11365           *total = COSTS_N_INSNS (3);
11366           return false;
11367         }
11368
11369       /* Requires a lib call */
11370       *total = COSTS_N_INSNS (20);
11371       return false;
11372
11373     default:
11374       return arm_rtx_costs_1 (x, outer_code, total, speed);
11375     }
11376 }
11377
11378
11379 /* RTX costs for 9e (and later) cores.  */
11380
11381 static bool
11382 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11383                   int *total, bool speed)
11384 {
11385   machine_mode mode = GET_MODE (x);
11386
11387   if (TARGET_THUMB1)
11388     {
11389       switch (code)
11390         {
11391         case MULT:
11392           /* Small multiply: 32 cycles for an integer multiply inst.  */
11393           if (arm_arch6m && arm_m_profile_small_mul)
11394             *total = COSTS_N_INSNS (32);
11395           else
11396             *total = COSTS_N_INSNS (3);
11397           return true;
11398
11399         default:
11400           *total = thumb1_rtx_costs (x, code, outer_code);
11401           return true;
11402         }
11403     }
11404
11405   switch (code)
11406     {
11407     case MULT:
11408       /* There is no point basing this on the tuning, since it is always the
11409          fast variant if it exists at all.  */
11410       if (mode == DImode
11411           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11412           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11413               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11414         {
11415           *total = COSTS_N_INSNS (2);
11416           return false;
11417         }
11418
11419
11420       if (mode == DImode)
11421         {
11422           *total = COSTS_N_INSNS (5);
11423           return false;
11424         }
11425
11426       if (mode == SImode)
11427         {
11428           *total = COSTS_N_INSNS (2);
11429           return false;
11430         }
11431
11432       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11433         {
11434           if (TARGET_HARD_FLOAT
11435               && (mode == SFmode
11436                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11437             {
11438               *total = COSTS_N_INSNS (1);
11439               return false;
11440             }
11441         }
11442
11443       *total = COSTS_N_INSNS (20);
11444       return false;
11445
11446     default:
11447       return arm_rtx_costs_1 (x, outer_code, total, speed);
11448     }
11449 }
11450 /* All address computations that can be done are free, but rtx cost returns
11451    the same for practically all of them.  So we weight the different types
11452    of address here in the order (most pref first):
11453    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11454 static inline int
11455 arm_arm_address_cost (rtx x)
11456 {
11457   enum rtx_code c  = GET_CODE (x);
11458
11459   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11460     return 0;
11461   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11462     return 10;
11463
11464   if (c == PLUS)
11465     {
11466       if (CONST_INT_P (XEXP (x, 1)))
11467         return 2;
11468
11469       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11470         return 3;
11471
11472       return 4;
11473     }
11474
11475   return 6;
11476 }
11477
11478 static inline int
11479 arm_thumb_address_cost (rtx x)
11480 {
11481   enum rtx_code c  = GET_CODE (x);
11482
11483   if (c == REG)
11484     return 1;
11485   if (c == PLUS
11486       && REG_P (XEXP (x, 0))
11487       && CONST_INT_P (XEXP (x, 1)))
11488     return 1;
11489
11490   return 2;
11491 }
11492
11493 static int
11494 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11495                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11496 {
11497   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11498 }
11499
11500 /* Adjust cost hook for XScale.  */
11501 static bool
11502 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11503 {
11504   /* Some true dependencies can have a higher cost depending
11505      on precisely how certain input operands are used.  */
11506   if (REG_NOTE_KIND(link) == 0
11507       && recog_memoized (insn) >= 0
11508       && recog_memoized (dep) >= 0)
11509     {
11510       int shift_opnum = get_attr_shift (insn);
11511       enum attr_type attr_type = get_attr_type (dep);
11512
11513       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11514          operand for INSN.  If we have a shifted input operand and the
11515          instruction we depend on is another ALU instruction, then we may
11516          have to account for an additional stall.  */
11517       if (shift_opnum != 0
11518           && (attr_type == TYPE_ALU_SHIFT_IMM
11519               || attr_type == TYPE_ALUS_SHIFT_IMM
11520               || attr_type == TYPE_LOGIC_SHIFT_IMM
11521               || attr_type == TYPE_LOGICS_SHIFT_IMM
11522               || attr_type == TYPE_ALU_SHIFT_REG
11523               || attr_type == TYPE_ALUS_SHIFT_REG
11524               || attr_type == TYPE_LOGIC_SHIFT_REG
11525               || attr_type == TYPE_LOGICS_SHIFT_REG
11526               || attr_type == TYPE_MOV_SHIFT
11527               || attr_type == TYPE_MVN_SHIFT
11528               || attr_type == TYPE_MOV_SHIFT_REG
11529               || attr_type == TYPE_MVN_SHIFT_REG))
11530         {
11531           rtx shifted_operand;
11532           int opno;
11533
11534           /* Get the shifted operand.  */
11535           extract_insn (insn);
11536           shifted_operand = recog_data.operand[shift_opnum];
11537
11538           /* Iterate over all the operands in DEP.  If we write an operand
11539              that overlaps with SHIFTED_OPERAND, then we have increase the
11540              cost of this dependency.  */
11541           extract_insn (dep);
11542           preprocess_constraints (dep);
11543           for (opno = 0; opno < recog_data.n_operands; opno++)
11544             {
11545               /* We can ignore strict inputs.  */
11546               if (recog_data.operand_type[opno] == OP_IN)
11547                 continue;
11548
11549               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11550                                            shifted_operand))
11551                 {
11552                   *cost = 2;
11553                   return false;
11554                 }
11555             }
11556         }
11557     }
11558   return true;
11559 }
11560
11561 /* Adjust cost hook for Cortex A9.  */
11562 static bool
11563 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11564 {
11565   switch (REG_NOTE_KIND (link))
11566     {
11567     case REG_DEP_ANTI:
11568       *cost = 0;
11569       return false;
11570
11571     case REG_DEP_TRUE:
11572     case REG_DEP_OUTPUT:
11573         if (recog_memoized (insn) >= 0
11574             && recog_memoized (dep) >= 0)
11575           {
11576             if (GET_CODE (PATTERN (insn)) == SET)
11577               {
11578                 if (GET_MODE_CLASS
11579                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11580                   || GET_MODE_CLASS
11581                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11582                   {
11583                     enum attr_type attr_type_insn = get_attr_type (insn);
11584                     enum attr_type attr_type_dep = get_attr_type (dep);
11585
11586                     /* By default all dependencies of the form
11587                        s0 = s0 <op> s1
11588                        s0 = s0 <op> s2
11589                        have an extra latency of 1 cycle because
11590                        of the input and output dependency in this
11591                        case. However this gets modeled as an true
11592                        dependency and hence all these checks.  */
11593                     if (REG_P (SET_DEST (PATTERN (insn)))
11594                         && REG_P (SET_DEST (PATTERN (dep)))
11595                         && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11596                                                     SET_DEST (PATTERN (dep))))
11597                       {
11598                         /* FMACS is a special case where the dependent
11599                            instruction can be issued 3 cycles before
11600                            the normal latency in case of an output
11601                            dependency.  */
11602                         if ((attr_type_insn == TYPE_FMACS
11603                              || attr_type_insn == TYPE_FMACD)
11604                             && (attr_type_dep == TYPE_FMACS
11605                                 || attr_type_dep == TYPE_FMACD))
11606                           {
11607                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11608                               *cost = insn_default_latency (dep) - 3;
11609                             else
11610                               *cost = insn_default_latency (dep);
11611                             return false;
11612                           }
11613                         else
11614                           {
11615                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11616                               *cost = insn_default_latency (dep) + 1;
11617                             else
11618                               *cost = insn_default_latency (dep);
11619                           }
11620                         return false;
11621                       }
11622                   }
11623               }
11624           }
11625         break;
11626
11627     default:
11628       gcc_unreachable ();
11629     }
11630
11631   return true;
11632 }
11633
11634 /* Adjust cost hook for FA726TE.  */
11635 static bool
11636 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11637 {
11638   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11639      have penalty of 3.  */
11640   if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11641       && recog_memoized (insn) >= 0
11642       && recog_memoized (dep) >= 0
11643       && get_attr_conds (dep) == CONDS_SET)
11644     {
11645       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11646       if (get_attr_conds (insn) == CONDS_USE
11647           && get_attr_type (insn) != TYPE_BRANCH)
11648         {
11649           *cost = 3;
11650           return false;
11651         }
11652
11653       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11654           || get_attr_conds (insn) == CONDS_USE)
11655         {
11656           *cost = 0;
11657           return false;
11658         }
11659     }
11660
11661   return true;
11662 }
11663
11664 /* Implement TARGET_REGISTER_MOVE_COST.
11665
11666    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11667    it is typically more expensive than a single memory access.  We set
11668    the cost to less than two memory accesses so that floating
11669    point to integer conversion does not go through memory.  */
11670
11671 int
11672 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11673                         reg_class_t from, reg_class_t to)
11674 {
11675   if (TARGET_32BIT)
11676     {
11677       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11678           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11679         return 15;
11680       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11681                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11682         return 4;
11683       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11684         return 20;
11685       else
11686         return 2;
11687     }
11688   else
11689     {
11690       if (from == HI_REGS || to == HI_REGS)
11691         return 4;
11692       else
11693         return 2;
11694     }
11695 }
11696
11697 /* Implement TARGET_MEMORY_MOVE_COST.  */
11698
11699 int
11700 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11701                       bool in ATTRIBUTE_UNUSED)
11702 {
11703   if (TARGET_32BIT)
11704     return 10;
11705   else
11706     {
11707       if (GET_MODE_SIZE (mode) < 4)
11708         return 8;
11709       else
11710         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11711     }
11712 }
11713
11714 /* Vectorizer cost model implementation.  */
11715
11716 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11717 static int
11718 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11719                                 tree vectype,
11720                                 int misalign ATTRIBUTE_UNUSED)
11721 {
11722   unsigned elements;
11723
11724   switch (type_of_cost)
11725     {
11726       case scalar_stmt:
11727         return current_tune->vec_costs->scalar_stmt_cost;
11728
11729       case scalar_load:
11730         return current_tune->vec_costs->scalar_load_cost;
11731
11732       case scalar_store:
11733         return current_tune->vec_costs->scalar_store_cost;
11734
11735       case vector_stmt:
11736         return current_tune->vec_costs->vec_stmt_cost;
11737
11738       case vector_load:
11739         return current_tune->vec_costs->vec_align_load_cost;
11740
11741       case vector_store:
11742         return current_tune->vec_costs->vec_store_cost;
11743
11744       case vec_to_scalar:
11745         return current_tune->vec_costs->vec_to_scalar_cost;
11746
11747       case scalar_to_vec:
11748         return current_tune->vec_costs->scalar_to_vec_cost;
11749
11750       case unaligned_load:
11751         return current_tune->vec_costs->vec_unalign_load_cost;
11752
11753       case unaligned_store:
11754         return current_tune->vec_costs->vec_unalign_store_cost;
11755
11756       case cond_branch_taken:
11757         return current_tune->vec_costs->cond_taken_branch_cost;
11758
11759       case cond_branch_not_taken:
11760         return current_tune->vec_costs->cond_not_taken_branch_cost;
11761
11762       case vec_perm:
11763       case vec_promote_demote:
11764         return current_tune->vec_costs->vec_stmt_cost;
11765
11766       case vec_construct:
11767         elements = TYPE_VECTOR_SUBPARTS (vectype);
11768         return elements / 2 + 1;
11769
11770       default:
11771         gcc_unreachable ();
11772     }
11773 }
11774
11775 /* Implement targetm.vectorize.add_stmt_cost.  */
11776
11777 static unsigned
11778 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11779                    struct _stmt_vec_info *stmt_info, int misalign,
11780                    enum vect_cost_model_location where)
11781 {
11782   unsigned *cost = (unsigned *) data;
11783   unsigned retval = 0;
11784
11785   if (flag_vect_cost_model)
11786     {
11787       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11788       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11789
11790       /* Statements in an inner loop relative to the loop being
11791          vectorized are weighted more heavily.  The value here is
11792          arbitrary and could potentially be improved with analysis.  */
11793       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11794         count *= 50;  /* FIXME.  */
11795
11796       retval = (unsigned) (count * stmt_cost);
11797       cost[where] += retval;
11798     }
11799
11800   return retval;
11801 }
11802
11803 /* Return true if and only if this insn can dual-issue only as older.  */
11804 static bool
11805 cortexa7_older_only (rtx_insn *insn)
11806 {
11807   if (recog_memoized (insn) < 0)
11808     return false;
11809
11810   switch (get_attr_type (insn))
11811     {
11812     case TYPE_ALU_DSP_REG:
11813     case TYPE_ALU_SREG:
11814     case TYPE_ALUS_SREG:
11815     case TYPE_LOGIC_REG:
11816     case TYPE_LOGICS_REG:
11817     case TYPE_ADC_REG:
11818     case TYPE_ADCS_REG:
11819     case TYPE_ADR:
11820     case TYPE_BFM:
11821     case TYPE_REV:
11822     case TYPE_MVN_REG:
11823     case TYPE_SHIFT_IMM:
11824     case TYPE_SHIFT_REG:
11825     case TYPE_LOAD_BYTE:
11826     case TYPE_LOAD1:
11827     case TYPE_STORE1:
11828     case TYPE_FFARITHS:
11829     case TYPE_FADDS:
11830     case TYPE_FFARITHD:
11831     case TYPE_FADDD:
11832     case TYPE_FMOV:
11833     case TYPE_F_CVT:
11834     case TYPE_FCMPS:
11835     case TYPE_FCMPD:
11836     case TYPE_FCONSTS:
11837     case TYPE_FCONSTD:
11838     case TYPE_FMULS:
11839     case TYPE_FMACS:
11840     case TYPE_FMULD:
11841     case TYPE_FMACD:
11842     case TYPE_FDIVS:
11843     case TYPE_FDIVD:
11844     case TYPE_F_MRC:
11845     case TYPE_F_MRRC:
11846     case TYPE_F_FLAG:
11847     case TYPE_F_LOADS:
11848     case TYPE_F_STORES:
11849       return true;
11850     default:
11851       return false;
11852     }
11853 }
11854
11855 /* Return true if and only if this insn can dual-issue as younger.  */
11856 static bool
11857 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11858 {
11859   if (recog_memoized (insn) < 0)
11860     {
11861       if (verbose > 5)
11862         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11863       return false;
11864     }
11865
11866   switch (get_attr_type (insn))
11867     {
11868     case TYPE_ALU_IMM:
11869     case TYPE_ALUS_IMM:
11870     case TYPE_LOGIC_IMM:
11871     case TYPE_LOGICS_IMM:
11872     case TYPE_EXTEND:
11873     case TYPE_MVN_IMM:
11874     case TYPE_MOV_IMM:
11875     case TYPE_MOV_REG:
11876     case TYPE_MOV_SHIFT:
11877     case TYPE_MOV_SHIFT_REG:
11878     case TYPE_BRANCH:
11879     case TYPE_CALL:
11880       return true;
11881     default:
11882       return false;
11883     }
11884 }
11885
11886
11887 /* Look for an instruction that can dual issue only as an older
11888    instruction, and move it in front of any instructions that can
11889    dual-issue as younger, while preserving the relative order of all
11890    other instructions in the ready list.  This is a hueuristic to help
11891    dual-issue in later cycles, by postponing issue of more flexible
11892    instructions.  This heuristic may affect dual issue opportunities
11893    in the current cycle.  */
11894 static void
11895 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11896                         int *n_readyp, int clock)
11897 {
11898   int i;
11899   int first_older_only = -1, first_younger = -1;
11900
11901   if (verbose > 5)
11902     fprintf (file,
11903              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11904              clock,
11905              *n_readyp);
11906
11907   /* Traverse the ready list from the head (the instruction to issue
11908      first), and looking for the first instruction that can issue as
11909      younger and the first instruction that can dual-issue only as
11910      older.  */
11911   for (i = *n_readyp - 1; i >= 0; i--)
11912     {
11913       rtx_insn *insn = ready[i];
11914       if (cortexa7_older_only (insn))
11915         {
11916           first_older_only = i;
11917           if (verbose > 5)
11918             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11919           break;
11920         }
11921       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11922         first_younger = i;
11923     }
11924
11925   /* Nothing to reorder because either no younger insn found or insn
11926      that can dual-issue only as older appears before any insn that
11927      can dual-issue as younger.  */
11928   if (first_younger == -1)
11929     {
11930       if (verbose > 5)
11931         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11932       return;
11933     }
11934
11935   /* Nothing to reorder because no older-only insn in the ready list.  */
11936   if (first_older_only == -1)
11937     {
11938       if (verbose > 5)
11939         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11940       return;
11941     }
11942
11943   /* Move first_older_only insn before first_younger.  */
11944   if (verbose > 5)
11945     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11946              INSN_UID(ready [first_older_only]),
11947              INSN_UID(ready [first_younger]));
11948   rtx_insn *first_older_only_insn = ready [first_older_only];
11949   for (i = first_older_only; i < first_younger; i++)
11950     {
11951       ready[i] = ready[i+1];
11952     }
11953
11954   ready[i] = first_older_only_insn;
11955   return;
11956 }
11957
11958 /* Implement TARGET_SCHED_REORDER. */
11959 static int
11960 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11961                    int clock)
11962 {
11963   switch (arm_tune)
11964     {
11965     case cortexa7:
11966       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11967       break;
11968     default:
11969       /* Do nothing for other cores.  */
11970       break;
11971     }
11972
11973   return arm_issue_rate ();
11974 }
11975
11976 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11977    It corrects the value of COST based on the relationship between
11978    INSN and DEP through the dependence LINK.  It returns the new
11979    value. There is a per-core adjust_cost hook to adjust scheduler costs
11980    and the per-core hook can choose to completely override the generic
11981    adjust_cost function. Only put bits of code into arm_adjust_cost that
11982    are common across all cores.  */
11983 static int
11984 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11985 {
11986   rtx i_pat, d_pat;
11987
11988  /* When generating Thumb-1 code, we want to place flag-setting operations
11989     close to a conditional branch which depends on them, so that we can
11990     omit the comparison. */
11991   if (TARGET_THUMB1
11992       && REG_NOTE_KIND (link) == 0
11993       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11994       && recog_memoized (dep) >= 0
11995       && get_attr_conds (dep) == CONDS_SET)
11996     return 0;
11997
11998   if (current_tune->sched_adjust_cost != NULL)
11999     {
12000       if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
12001         return cost;
12002     }
12003
12004   /* XXX Is this strictly true?  */
12005   if (REG_NOTE_KIND (link) == REG_DEP_ANTI
12006       || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
12007     return 0;
12008
12009   /* Call insns don't incur a stall, even if they follow a load.  */
12010   if (REG_NOTE_KIND (link) == 0
12011       && CALL_P (insn))
12012     return 1;
12013
12014   if ((i_pat = single_set (insn)) != NULL
12015       && MEM_P (SET_SRC (i_pat))
12016       && (d_pat = single_set (dep)) != NULL
12017       && MEM_P (SET_DEST (d_pat)))
12018     {
12019       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12020       /* This is a load after a store, there is no conflict if the load reads
12021          from a cached area.  Assume that loads from the stack, and from the
12022          constant pool are cached, and that others will miss.  This is a
12023          hack.  */
12024
12025       if ((GET_CODE (src_mem) == SYMBOL_REF
12026            && CONSTANT_POOL_ADDRESS_P (src_mem))
12027           || reg_mentioned_p (stack_pointer_rtx, src_mem)
12028           || reg_mentioned_p (frame_pointer_rtx, src_mem)
12029           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12030         return 1;
12031     }
12032
12033   return cost;
12034 }
12035
12036 int
12037 arm_max_conditional_execute (void)
12038 {
12039   return max_insns_skipped;
12040 }
12041
12042 static int
12043 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12044 {
12045   if (TARGET_32BIT)
12046     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12047   else
12048     return (optimize > 0) ? 2 : 0;
12049 }
12050
12051 static int
12052 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12053 {
12054   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12055 }
12056
12057 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12058    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12059    sequences of non-executed instructions in IT blocks probably take the same
12060    amount of time as executed instructions (and the IT instruction itself takes
12061    space in icache).  This function was experimentally determined to give good
12062    results on a popular embedded benchmark.  */
12063
12064 static int
12065 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12066 {
12067   return (TARGET_32BIT && speed_p) ? 1
12068          : arm_default_branch_cost (speed_p, predictable_p);
12069 }
12070
12071 static bool fp_consts_inited = false;
12072
12073 static REAL_VALUE_TYPE value_fp0;
12074
12075 static void
12076 init_fp_table (void)
12077 {
12078   REAL_VALUE_TYPE r;
12079
12080   r = REAL_VALUE_ATOF ("0", DFmode);
12081   value_fp0 = r;
12082   fp_consts_inited = true;
12083 }
12084
12085 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12086 int
12087 arm_const_double_rtx (rtx x)
12088 {
12089   REAL_VALUE_TYPE r;
12090
12091   if (!fp_consts_inited)
12092     init_fp_table ();
12093
12094   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12095   if (REAL_VALUE_MINUS_ZERO (r))
12096     return 0;
12097
12098   if (REAL_VALUES_EQUAL (r, value_fp0))
12099     return 1;
12100
12101   return 0;
12102 }
12103
12104 /* VFPv3 has a fairly wide range of representable immediates, formed from
12105    "quarter-precision" floating-point values. These can be evaluated using this
12106    formula (with ^ for exponentiation):
12107
12108      -1^s * n * 2^-r
12109
12110    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12111    16 <= n <= 31 and 0 <= r <= 7.
12112
12113    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12114
12115      - A (most-significant) is the sign bit.
12116      - BCD are the exponent (encoded as r XOR 3).
12117      - EFGH are the mantissa (encoded as n - 16).
12118 */
12119
12120 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12121    fconst[sd] instruction, or -1 if X isn't suitable.  */
12122 static int
12123 vfp3_const_double_index (rtx x)
12124 {
12125   REAL_VALUE_TYPE r, m;
12126   int sign, exponent;
12127   unsigned HOST_WIDE_INT mantissa, mant_hi;
12128   unsigned HOST_WIDE_INT mask;
12129   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12130   bool fail;
12131
12132   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12133     return -1;
12134
12135   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12136
12137   /* We can't represent these things, so detect them first.  */
12138   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12139     return -1;
12140
12141   /* Extract sign, exponent and mantissa.  */
12142   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12143   r = real_value_abs (&r);
12144   exponent = REAL_EXP (&r);
12145   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12146      highest (sign) bit, with a fixed binary point at bit point_pos.
12147      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12148      bits for the mantissa, this may fail (low bits would be lost).  */
12149   real_ldexp (&m, &r, point_pos - exponent);
12150   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12151   mantissa = w.elt (0);
12152   mant_hi = w.elt (1);
12153
12154   /* If there are bits set in the low part of the mantissa, we can't
12155      represent this value.  */
12156   if (mantissa != 0)
12157     return -1;
12158
12159   /* Now make it so that mantissa contains the most-significant bits, and move
12160      the point_pos to indicate that the least-significant bits have been
12161      discarded.  */
12162   point_pos -= HOST_BITS_PER_WIDE_INT;
12163   mantissa = mant_hi;
12164
12165   /* We can permit four significant bits of mantissa only, plus a high bit
12166      which is always 1.  */
12167   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12168   if ((mantissa & mask) != 0)
12169     return -1;
12170
12171   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12172   mantissa >>= point_pos - 5;
12173
12174   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12175      floating-point immediate zero with Neon using an integer-zero load, but
12176      that case is handled elsewhere.)  */
12177   if (mantissa == 0)
12178     return -1;
12179
12180   gcc_assert (mantissa >= 16 && mantissa <= 31);
12181
12182   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12183      normalized significands are in the range [1, 2). (Our mantissa is shifted
12184      left 4 places at this point relative to normalized IEEE754 values).  GCC
12185      internally uses [0.5, 1) (see real.c), so the exponent returned from
12186      REAL_EXP must be altered.  */
12187   exponent = 5 - exponent;
12188
12189   if (exponent < 0 || exponent > 7)
12190     return -1;
12191
12192   /* Sign, mantissa and exponent are now in the correct form to plug into the
12193      formula described in the comment above.  */
12194   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12195 }
12196
12197 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12198 int
12199 vfp3_const_double_rtx (rtx x)
12200 {
12201   if (!TARGET_VFP3)
12202     return 0;
12203
12204   return vfp3_const_double_index (x) != -1;
12205 }
12206
12207 /* Recognize immediates which can be used in various Neon instructions. Legal
12208    immediates are described by the following table (for VMVN variants, the
12209    bitwise inverse of the constant shown is recognized. In either case, VMOV
12210    is output and the correct instruction to use for a given constant is chosen
12211    by the assembler). The constant shown is replicated across all elements of
12212    the destination vector.
12213
12214    insn elems variant constant (binary)
12215    ---- ----- ------- -----------------
12216    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12217    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12218    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12219    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12220    vmov  i16     4    00000000 abcdefgh
12221    vmov  i16     5    abcdefgh 00000000
12222    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12223    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12224    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12225    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12226    vmvn  i16    10    00000000 abcdefgh
12227    vmvn  i16    11    abcdefgh 00000000
12228    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12229    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12230    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12231    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12232    vmov   i8    16    abcdefgh
12233    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12234                       eeeeeeee ffffffff gggggggg hhhhhhhh
12235    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12236    vmov  f32    19    00000000 00000000 00000000 00000000
12237
12238    For case 18, B = !b. Representable values are exactly those accepted by
12239    vfp3_const_double_index, but are output as floating-point numbers rather
12240    than indices.
12241
12242    For case 19, we will change it to vmov.i32 when assembling.
12243
12244    Variants 0-5 (inclusive) may also be used as immediates for the second
12245    operand of VORR/VBIC instructions.
12246
12247    The INVERSE argument causes the bitwise inverse of the given operand to be
12248    recognized instead (used for recognizing legal immediates for the VAND/VORN
12249    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12250    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12251    output, rather than the real insns vbic/vorr).
12252
12253    INVERSE makes no difference to the recognition of float vectors.
12254
12255    The return value is the variant of immediate as shown in the above table, or
12256    -1 if the given value doesn't match any of the listed patterns.
12257 */
12258 static int
12259 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12260                       rtx *modconst, int *elementwidth)
12261 {
12262 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12263   matches = 1;                                  \
12264   for (i = 0; i < idx; i += (STRIDE))           \
12265     if (!(TEST))                                \
12266       matches = 0;                              \
12267   if (matches)                                  \
12268     {                                           \
12269       immtype = (CLASS);                        \
12270       elsize = (ELSIZE);                        \
12271       break;                                    \
12272     }
12273
12274   unsigned int i, elsize = 0, idx = 0, n_elts;
12275   unsigned int innersize;
12276   unsigned char bytes[16];
12277   int immtype = -1, matches;
12278   unsigned int invmask = inverse ? 0xff : 0;
12279   bool vector = GET_CODE (op) == CONST_VECTOR;
12280
12281   if (vector)
12282     {
12283       n_elts = CONST_VECTOR_NUNITS (op);
12284       innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12285     }
12286   else
12287     {
12288       n_elts = 1;
12289       if (mode == VOIDmode)
12290         mode = DImode;
12291       innersize = GET_MODE_SIZE (mode);
12292     }
12293
12294   /* Vectors of float constants.  */
12295   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12296     {
12297       rtx el0 = CONST_VECTOR_ELT (op, 0);
12298       REAL_VALUE_TYPE r0;
12299
12300       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12301         return -1;
12302
12303       REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12304
12305       for (i = 1; i < n_elts; i++)
12306         {
12307           rtx elt = CONST_VECTOR_ELT (op, i);
12308           REAL_VALUE_TYPE re;
12309
12310           REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12311
12312           if (!REAL_VALUES_EQUAL (r0, re))
12313             return -1;
12314         }
12315
12316       if (modconst)
12317         *modconst = CONST_VECTOR_ELT (op, 0);
12318
12319       if (elementwidth)
12320         *elementwidth = 0;
12321
12322       if (el0 == CONST0_RTX (GET_MODE (el0)))
12323         return 19;
12324       else
12325         return 18;
12326     }
12327
12328   /* Splat vector constant out into a byte vector.  */
12329   for (i = 0; i < n_elts; i++)
12330     {
12331       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12332       unsigned HOST_WIDE_INT elpart;
12333       unsigned int part, parts;
12334
12335       if (CONST_INT_P (el))
12336         {
12337           elpart = INTVAL (el);
12338           parts = 1;
12339         }
12340       else if (CONST_DOUBLE_P (el))
12341         {
12342           elpart = CONST_DOUBLE_LOW (el);
12343           parts = 2;
12344         }
12345       else
12346         gcc_unreachable ();
12347
12348       for (part = 0; part < parts; part++)
12349         {
12350           unsigned int byte;
12351           for (byte = 0; byte < innersize; byte++)
12352             {
12353               bytes[idx++] = (elpart & 0xff) ^ invmask;
12354               elpart >>= BITS_PER_UNIT;
12355             }
12356           if (CONST_DOUBLE_P (el))
12357             elpart = CONST_DOUBLE_HIGH (el);
12358         }
12359     }
12360
12361   /* Sanity check.  */
12362   gcc_assert (idx == GET_MODE_SIZE (mode));
12363
12364   do
12365     {
12366       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12367                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12368
12369       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12370                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12371
12372       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12373                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12374
12375       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12376                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12377
12378       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12379
12380       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12381
12382       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12383                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12384
12385       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12386                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12387
12388       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12389                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12390
12391       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12392                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12393
12394       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12395
12396       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12397
12398       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12399                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12400
12401       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12402                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12403
12404       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12405                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12406
12407       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12408                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12409
12410       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12411
12412       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12413                         && bytes[i] == bytes[(i + 8) % idx]);
12414     }
12415   while (0);
12416
12417   if (immtype == -1)
12418     return -1;
12419
12420   if (elementwidth)
12421     *elementwidth = elsize;
12422
12423   if (modconst)
12424     {
12425       unsigned HOST_WIDE_INT imm = 0;
12426
12427       /* Un-invert bytes of recognized vector, if necessary.  */
12428       if (invmask != 0)
12429         for (i = 0; i < idx; i++)
12430           bytes[i] ^= invmask;
12431
12432       if (immtype == 17)
12433         {
12434           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12435           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12436
12437           for (i = 0; i < 8; i++)
12438             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12439                    << (i * BITS_PER_UNIT);
12440
12441           *modconst = GEN_INT (imm);
12442         }
12443       else
12444         {
12445           unsigned HOST_WIDE_INT imm = 0;
12446
12447           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12448             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12449
12450           *modconst = GEN_INT (imm);
12451         }
12452     }
12453
12454   return immtype;
12455 #undef CHECK
12456 }
12457
12458 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12459    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12460    float elements), and a modified constant (whatever should be output for a
12461    VMOV) in *MODCONST.  */
12462
12463 int
12464 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12465                                rtx *modconst, int *elementwidth)
12466 {
12467   rtx tmpconst;
12468   int tmpwidth;
12469   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12470
12471   if (retval == -1)
12472     return 0;
12473
12474   if (modconst)
12475     *modconst = tmpconst;
12476
12477   if (elementwidth)
12478     *elementwidth = tmpwidth;
12479
12480   return 1;
12481 }
12482
12483 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12484    the immediate is valid, write a constant suitable for using as an operand
12485    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12486    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12487
12488 int
12489 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12490                                 rtx *modconst, int *elementwidth)
12491 {
12492   rtx tmpconst;
12493   int tmpwidth;
12494   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12495
12496   if (retval < 0 || retval > 5)
12497     return 0;
12498
12499   if (modconst)
12500     *modconst = tmpconst;
12501
12502   if (elementwidth)
12503     *elementwidth = tmpwidth;
12504
12505   return 1;
12506 }
12507
12508 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12509    the immediate is valid, write a constant suitable for using as an operand
12510    to VSHR/VSHL to *MODCONST and the corresponding element width to
12511    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12512    because they have different limitations.  */
12513
12514 int
12515 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12516                                 rtx *modconst, int *elementwidth,
12517                                 bool isleftshift)
12518 {
12519   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12520   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12521   unsigned HOST_WIDE_INT last_elt = 0;
12522   unsigned HOST_WIDE_INT maxshift;
12523
12524   /* Split vector constant out into a byte vector.  */
12525   for (i = 0; i < n_elts; i++)
12526     {
12527       rtx el = CONST_VECTOR_ELT (op, i);
12528       unsigned HOST_WIDE_INT elpart;
12529
12530       if (CONST_INT_P (el))
12531         elpart = INTVAL (el);
12532       else if (CONST_DOUBLE_P (el))
12533         return 0;
12534       else
12535         gcc_unreachable ();
12536
12537       if (i != 0 && elpart != last_elt)
12538         return 0;
12539
12540       last_elt = elpart;
12541     }
12542
12543   /* Shift less than element size.  */
12544   maxshift = innersize * 8;
12545
12546   if (isleftshift)
12547     {
12548       /* Left shift immediate value can be from 0 to <size>-1.  */
12549       if (last_elt >= maxshift)
12550         return 0;
12551     }
12552   else
12553     {
12554       /* Right shift immediate value can be from 1 to <size>.  */
12555       if (last_elt == 0 || last_elt > maxshift)
12556         return 0;
12557     }
12558
12559   if (elementwidth)
12560     *elementwidth = innersize * 8;
12561
12562   if (modconst)
12563     *modconst = CONST_VECTOR_ELT (op, 0);
12564
12565   return 1;
12566 }
12567
12568 /* Return a string suitable for output of Neon immediate logic operation
12569    MNEM.  */
12570
12571 char *
12572 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12573                              int inverse, int quad)
12574 {
12575   int width, is_valid;
12576   static char templ[40];
12577
12578   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12579
12580   gcc_assert (is_valid != 0);
12581
12582   if (quad)
12583     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12584   else
12585     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12586
12587   return templ;
12588 }
12589
12590 /* Return a string suitable for output of Neon immediate shift operation
12591    (VSHR or VSHL) MNEM.  */
12592
12593 char *
12594 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12595                              machine_mode mode, int quad,
12596                              bool isleftshift)
12597 {
12598   int width, is_valid;
12599   static char templ[40];
12600
12601   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12602   gcc_assert (is_valid != 0);
12603
12604   if (quad)
12605     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12606   else
12607     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12608
12609   return templ;
12610 }
12611
12612 /* Output a sequence of pairwise operations to implement a reduction.
12613    NOTE: We do "too much work" here, because pairwise operations work on two
12614    registers-worth of operands in one go. Unfortunately we can't exploit those
12615    extra calculations to do the full operation in fewer steps, I don't think.
12616    Although all vector elements of the result but the first are ignored, we
12617    actually calculate the same result in each of the elements. An alternative
12618    such as initially loading a vector with zero to use as each of the second
12619    operands would use up an additional register and take an extra instruction,
12620    for no particular gain.  */
12621
12622 void
12623 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12624                       rtx (*reduc) (rtx, rtx, rtx))
12625 {
12626   machine_mode inner = GET_MODE_INNER (mode);
12627   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12628   rtx tmpsum = op1;
12629
12630   for (i = parts / 2; i >= 1; i /= 2)
12631     {
12632       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12633       emit_insn (reduc (dest, tmpsum, tmpsum));
12634       tmpsum = dest;
12635     }
12636 }
12637
12638 /* If VALS is a vector constant that can be loaded into a register
12639    using VDUP, generate instructions to do so and return an RTX to
12640    assign to the register.  Otherwise return NULL_RTX.  */
12641
12642 static rtx
12643 neon_vdup_constant (rtx vals)
12644 {
12645   machine_mode mode = GET_MODE (vals);
12646   machine_mode inner_mode = GET_MODE_INNER (mode);
12647   int n_elts = GET_MODE_NUNITS (mode);
12648   bool all_same = true;
12649   rtx x;
12650   int i;
12651
12652   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12653     return NULL_RTX;
12654
12655   for (i = 0; i < n_elts; ++i)
12656     {
12657       x = XVECEXP (vals, 0, i);
12658       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12659         all_same = false;
12660     }
12661
12662   if (!all_same)
12663     /* The elements are not all the same.  We could handle repeating
12664        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12665        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12666        vdup.i16).  */
12667     return NULL_RTX;
12668
12669   /* We can load this constant by using VDUP and a constant in a
12670      single ARM register.  This will be cheaper than a vector
12671      load.  */
12672
12673   x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12674   return gen_rtx_VEC_DUPLICATE (mode, x);
12675 }
12676
12677 /* Generate code to load VALS, which is a PARALLEL containing only
12678    constants (for vec_init) or CONST_VECTOR, efficiently into a
12679    register.  Returns an RTX to copy into the register, or NULL_RTX
12680    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12681
12682 rtx
12683 neon_make_constant (rtx vals)
12684 {
12685   machine_mode mode = GET_MODE (vals);
12686   rtx target;
12687   rtx const_vec = NULL_RTX;
12688   int n_elts = GET_MODE_NUNITS (mode);
12689   int n_const = 0;
12690   int i;
12691
12692   if (GET_CODE (vals) == CONST_VECTOR)
12693     const_vec = vals;
12694   else if (GET_CODE (vals) == PARALLEL)
12695     {
12696       /* A CONST_VECTOR must contain only CONST_INTs and
12697          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12698          Only store valid constants in a CONST_VECTOR.  */
12699       for (i = 0; i < n_elts; ++i)
12700         {
12701           rtx x = XVECEXP (vals, 0, i);
12702           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12703             n_const++;
12704         }
12705       if (n_const == n_elts)
12706         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12707     }
12708   else
12709     gcc_unreachable ();
12710
12711   if (const_vec != NULL
12712       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12713     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12714     return const_vec;
12715   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12716     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12717        pipeline cycle; creating the constant takes one or two ARM
12718        pipeline cycles.  */
12719     return target;
12720   else if (const_vec != NULL_RTX)
12721     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12722        (for either double or quad vectors).  We can not take advantage
12723        of single-cycle VLD1 because we need a PC-relative addressing
12724        mode.  */
12725     return const_vec;
12726   else
12727     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12728        We can not construct an initializer.  */
12729     return NULL_RTX;
12730 }
12731
12732 /* Initialize vector TARGET to VALS.  */
12733
12734 void
12735 neon_expand_vector_init (rtx target, rtx vals)
12736 {
12737   machine_mode mode = GET_MODE (target);
12738   machine_mode inner_mode = GET_MODE_INNER (mode);
12739   int n_elts = GET_MODE_NUNITS (mode);
12740   int n_var = 0, one_var = -1;
12741   bool all_same = true;
12742   rtx x, mem;
12743   int i;
12744
12745   for (i = 0; i < n_elts; ++i)
12746     {
12747       x = XVECEXP (vals, 0, i);
12748       if (!CONSTANT_P (x))
12749         ++n_var, one_var = i;
12750
12751       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12752         all_same = false;
12753     }
12754
12755   if (n_var == 0)
12756     {
12757       rtx constant = neon_make_constant (vals);
12758       if (constant != NULL_RTX)
12759         {
12760           emit_move_insn (target, constant);
12761           return;
12762         }
12763     }
12764
12765   /* Splat a single non-constant element if we can.  */
12766   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12767     {
12768       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12769       emit_insn (gen_rtx_SET (VOIDmode, target,
12770                               gen_rtx_VEC_DUPLICATE (mode, x)));
12771       return;
12772     }
12773
12774   /* One field is non-constant.  Load constant then overwrite varying
12775      field.  This is more efficient than using the stack.  */
12776   if (n_var == 1)
12777     {
12778       rtx copy = copy_rtx (vals);
12779       rtx index = GEN_INT (one_var);
12780
12781       /* Load constant part of vector, substitute neighboring value for
12782          varying element.  */
12783       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12784       neon_expand_vector_init (target, copy);
12785
12786       /* Insert variable.  */
12787       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12788       switch (mode)
12789         {
12790         case V8QImode:
12791           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12792           break;
12793         case V16QImode:
12794           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12795           break;
12796         case V4HImode:
12797           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12798           break;
12799         case V8HImode:
12800           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12801           break;
12802         case V2SImode:
12803           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12804           break;
12805         case V4SImode:
12806           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12807           break;
12808         case V2SFmode:
12809           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12810           break;
12811         case V4SFmode:
12812           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12813           break;
12814         case V2DImode:
12815           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12816           break;
12817         default:
12818           gcc_unreachable ();
12819         }
12820       return;
12821     }
12822
12823   /* Construct the vector in memory one field at a time
12824      and load the whole vector.  */
12825   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12826   for (i = 0; i < n_elts; i++)
12827     emit_move_insn (adjust_address_nv (mem, inner_mode,
12828                                     i * GET_MODE_SIZE (inner_mode)),
12829                     XVECEXP (vals, 0, i));
12830   emit_move_insn (target, mem);
12831 }
12832
12833 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12834    ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
12835    reported source locations are bogus.  */
12836
12837 static void
12838 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12839               const char *err)
12840 {
12841   HOST_WIDE_INT lane;
12842
12843   gcc_assert (CONST_INT_P (operand));
12844
12845   lane = INTVAL (operand);
12846
12847   if (lane < low || lane >= high)
12848     error (err);
12849 }
12850
12851 /* Bounds-check lanes.  */
12852
12853 void
12854 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12855 {
12856   bounds_check (operand, low, high, "lane out of range");
12857 }
12858
12859 /* Bounds-check constants.  */
12860
12861 void
12862 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12863 {
12864   bounds_check (operand, low, high, "constant out of range");
12865 }
12866
12867 HOST_WIDE_INT
12868 neon_element_bits (machine_mode mode)
12869 {
12870   if (mode == DImode)
12871     return GET_MODE_BITSIZE (mode);
12872   else
12873     return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12874 }
12875
12876 \f
12877 /* Predicates for `match_operand' and `match_operator'.  */
12878
12879 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12880    WB is true if full writeback address modes are allowed and is false
12881    if limited writeback address modes (POST_INC and PRE_DEC) are
12882    allowed.  */
12883
12884 int
12885 arm_coproc_mem_operand (rtx op, bool wb)
12886 {
12887   rtx ind;
12888
12889   /* Reject eliminable registers.  */
12890   if (! (reload_in_progress || reload_completed || lra_in_progress)
12891       && (   reg_mentioned_p (frame_pointer_rtx, op)
12892           || reg_mentioned_p (arg_pointer_rtx, op)
12893           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12894           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12895           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12896           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12897     return FALSE;
12898
12899   /* Constants are converted into offsets from labels.  */
12900   if (!MEM_P (op))
12901     return FALSE;
12902
12903   ind = XEXP (op, 0);
12904
12905   if (reload_completed
12906       && (GET_CODE (ind) == LABEL_REF
12907           || (GET_CODE (ind) == CONST
12908               && GET_CODE (XEXP (ind, 0)) == PLUS
12909               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12910               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12911     return TRUE;
12912
12913   /* Match: (mem (reg)).  */
12914   if (REG_P (ind))
12915     return arm_address_register_rtx_p (ind, 0);
12916
12917   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12918      acceptable in any case (subject to verification by
12919      arm_address_register_rtx_p).  We need WB to be true to accept
12920      PRE_INC and POST_DEC.  */
12921   if (GET_CODE (ind) == POST_INC
12922       || GET_CODE (ind) == PRE_DEC
12923       || (wb
12924           && (GET_CODE (ind) == PRE_INC
12925               || GET_CODE (ind) == POST_DEC)))
12926     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12927
12928   if (wb
12929       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12930       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12931       && GET_CODE (XEXP (ind, 1)) == PLUS
12932       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12933     ind = XEXP (ind, 1);
12934
12935   /* Match:
12936      (plus (reg)
12937            (const)).  */
12938   if (GET_CODE (ind) == PLUS
12939       && REG_P (XEXP (ind, 0))
12940       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12941       && CONST_INT_P (XEXP (ind, 1))
12942       && INTVAL (XEXP (ind, 1)) > -1024
12943       && INTVAL (XEXP (ind, 1)) <  1024
12944       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12945     return TRUE;
12946
12947   return FALSE;
12948 }
12949
12950 /* Return TRUE if OP is a memory operand which we can load or store a vector
12951    to/from. TYPE is one of the following values:
12952     0 - Vector load/stor (vldr)
12953     1 - Core registers (ldm)
12954     2 - Element/structure loads (vld1)
12955  */
12956 int
12957 neon_vector_mem_operand (rtx op, int type, bool strict)
12958 {
12959   rtx ind;
12960
12961   /* Reject eliminable registers.  */
12962   if (! (reload_in_progress || reload_completed)
12963       && (   reg_mentioned_p (frame_pointer_rtx, op)
12964           || reg_mentioned_p (arg_pointer_rtx, op)
12965           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12966           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12967           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12968           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12969     return !strict;
12970
12971   /* Constants are converted into offsets from labels.  */
12972   if (!MEM_P (op))
12973     return FALSE;
12974
12975   ind = XEXP (op, 0);
12976
12977   if (reload_completed
12978       && (GET_CODE (ind) == LABEL_REF
12979           || (GET_CODE (ind) == CONST
12980               && GET_CODE (XEXP (ind, 0)) == PLUS
12981               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12982               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12983     return TRUE;
12984
12985   /* Match: (mem (reg)).  */
12986   if (REG_P (ind))
12987     return arm_address_register_rtx_p (ind, 0);
12988
12989   /* Allow post-increment with Neon registers.  */
12990   if ((type != 1 && GET_CODE (ind) == POST_INC)
12991       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12992     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12993
12994   /* Allow post-increment by register for VLDn */
12995   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12996       && GET_CODE (XEXP (ind, 1)) == PLUS
12997       && REG_P (XEXP (XEXP (ind, 1), 1)))
12998      return true;
12999
13000   /* Match:
13001      (plus (reg)
13002           (const)).  */
13003   if (type == 0
13004       && GET_CODE (ind) == PLUS
13005       && REG_P (XEXP (ind, 0))
13006       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13007       && CONST_INT_P (XEXP (ind, 1))
13008       && INTVAL (XEXP (ind, 1)) > -1024
13009       /* For quad modes, we restrict the constant offset to be slightly less
13010          than what the instruction format permits.  We have no such constraint
13011          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13012       && (INTVAL (XEXP (ind, 1))
13013           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13014       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13015     return TRUE;
13016
13017   return FALSE;
13018 }
13019
13020 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13021    type.  */
13022 int
13023 neon_struct_mem_operand (rtx op)
13024 {
13025   rtx ind;
13026
13027   /* Reject eliminable registers.  */
13028   if (! (reload_in_progress || reload_completed)
13029       && (   reg_mentioned_p (frame_pointer_rtx, op)
13030           || reg_mentioned_p (arg_pointer_rtx, op)
13031           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13032           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13033           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13034           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13035     return FALSE;
13036
13037   /* Constants are converted into offsets from labels.  */
13038   if (!MEM_P (op))
13039     return FALSE;
13040
13041   ind = XEXP (op, 0);
13042
13043   if (reload_completed
13044       && (GET_CODE (ind) == LABEL_REF
13045           || (GET_CODE (ind) == CONST
13046               && GET_CODE (XEXP (ind, 0)) == PLUS
13047               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13048               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13049     return TRUE;
13050
13051   /* Match: (mem (reg)).  */
13052   if (REG_P (ind))
13053     return arm_address_register_rtx_p (ind, 0);
13054
13055   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13056   if (GET_CODE (ind) == POST_INC
13057       || GET_CODE (ind) == PRE_DEC)
13058     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13059
13060   return FALSE;
13061 }
13062
13063 /* Return true if X is a register that will be eliminated later on.  */
13064 int
13065 arm_eliminable_register (rtx x)
13066 {
13067   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13068                        || REGNO (x) == ARG_POINTER_REGNUM
13069                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13070                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13071 }
13072
13073 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13074    coprocessor registers.  Otherwise return NO_REGS.  */
13075
13076 enum reg_class
13077 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13078 {
13079   if (mode == HFmode)
13080     {
13081       if (!TARGET_NEON_FP16)
13082         return GENERAL_REGS;
13083       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13084         return NO_REGS;
13085       return GENERAL_REGS;
13086     }
13087
13088   /* The neon move patterns handle all legitimate vector and struct
13089      addresses.  */
13090   if (TARGET_NEON
13091       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13092       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13093           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13094           || VALID_NEON_STRUCT_MODE (mode)))
13095     return NO_REGS;
13096
13097   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13098     return NO_REGS;
13099
13100   return GENERAL_REGS;
13101 }
13102
13103 /* Values which must be returned in the most-significant end of the return
13104    register.  */
13105
13106 static bool
13107 arm_return_in_msb (const_tree valtype)
13108 {
13109   return (TARGET_AAPCS_BASED
13110           && BYTES_BIG_ENDIAN
13111           && (AGGREGATE_TYPE_P (valtype)
13112               || TREE_CODE (valtype) == COMPLEX_TYPE
13113               || FIXED_POINT_TYPE_P (valtype)));
13114 }
13115
13116 /* Return TRUE if X references a SYMBOL_REF.  */
13117 int
13118 symbol_mentioned_p (rtx x)
13119 {
13120   const char * fmt;
13121   int i;
13122
13123   if (GET_CODE (x) == SYMBOL_REF)
13124     return 1;
13125
13126   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13127      are constant offsets, not symbols.  */
13128   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13129     return 0;
13130
13131   fmt = GET_RTX_FORMAT (GET_CODE (x));
13132
13133   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13134     {
13135       if (fmt[i] == 'E')
13136         {
13137           int j;
13138
13139           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13140             if (symbol_mentioned_p (XVECEXP (x, i, j)))
13141               return 1;
13142         }
13143       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13144         return 1;
13145     }
13146
13147   return 0;
13148 }
13149
13150 /* Return TRUE if X references a LABEL_REF.  */
13151 int
13152 label_mentioned_p (rtx x)
13153 {
13154   const char * fmt;
13155   int i;
13156
13157   if (GET_CODE (x) == LABEL_REF)
13158     return 1;
13159
13160   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13161      instruction, but they are constant offsets, not symbols.  */
13162   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13163     return 0;
13164
13165   fmt = GET_RTX_FORMAT (GET_CODE (x));
13166   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13167     {
13168       if (fmt[i] == 'E')
13169         {
13170           int j;
13171
13172           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13173             if (label_mentioned_p (XVECEXP (x, i, j)))
13174               return 1;
13175         }
13176       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13177         return 1;
13178     }
13179
13180   return 0;
13181 }
13182
13183 int
13184 tls_mentioned_p (rtx x)
13185 {
13186   switch (GET_CODE (x))
13187     {
13188     case CONST:
13189       return tls_mentioned_p (XEXP (x, 0));
13190
13191     case UNSPEC:
13192       if (XINT (x, 1) == UNSPEC_TLS)
13193         return 1;
13194
13195     default:
13196       return 0;
13197     }
13198 }
13199
13200 /* Must not copy any rtx that uses a pc-relative address.  */
13201
13202 static int
13203 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
13204 {
13205   if (GET_CODE (*x) == UNSPEC
13206       && (XINT (*x, 1) == UNSPEC_PIC_BASE
13207           || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
13208     return 1;
13209   return 0;
13210 }
13211
13212 static bool
13213 arm_cannot_copy_insn_p (rtx_insn *insn)
13214 {
13215   /* The tls call insn cannot be copied, as it is paired with a data
13216      word.  */
13217   if (recog_memoized (insn) == CODE_FOR_tlscall)
13218     return true;
13219
13220   return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
13221 }
13222
13223 enum rtx_code
13224 minmax_code (rtx x)
13225 {
13226   enum rtx_code code = GET_CODE (x);
13227
13228   switch (code)
13229     {
13230     case SMAX:
13231       return GE;
13232     case SMIN:
13233       return LE;
13234     case UMIN:
13235       return LEU;
13236     case UMAX:
13237       return GEU;
13238     default:
13239       gcc_unreachable ();
13240     }
13241 }
13242
13243 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
13244
13245 bool
13246 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13247                         int *mask, bool *signed_sat)
13248 {
13249   /* The high bound must be a power of two minus one.  */
13250   int log = exact_log2 (INTVAL (hi_bound) + 1);
13251   if (log == -1)
13252     return false;
13253
13254   /* The low bound is either zero (for usat) or one less than the
13255      negation of the high bound (for ssat).  */
13256   if (INTVAL (lo_bound) == 0)
13257     {
13258       if (mask)
13259         *mask = log;
13260       if (signed_sat)
13261         *signed_sat = false;
13262
13263       return true;
13264     }
13265
13266   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13267     {
13268       if (mask)
13269         *mask = log + 1;
13270       if (signed_sat)
13271         *signed_sat = true;
13272
13273       return true;
13274     }
13275
13276   return false;
13277 }
13278
13279 /* Return 1 if memory locations are adjacent.  */
13280 int
13281 adjacent_mem_locations (rtx a, rtx b)
13282 {
13283   /* We don't guarantee to preserve the order of these memory refs.  */
13284   if (volatile_refs_p (a) || volatile_refs_p (b))
13285     return 0;
13286
13287   if ((REG_P (XEXP (a, 0))
13288        || (GET_CODE (XEXP (a, 0)) == PLUS
13289            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13290       && (REG_P (XEXP (b, 0))
13291           || (GET_CODE (XEXP (b, 0)) == PLUS
13292               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13293     {
13294       HOST_WIDE_INT val0 = 0, val1 = 0;
13295       rtx reg0, reg1;
13296       int val_diff;
13297
13298       if (GET_CODE (XEXP (a, 0)) == PLUS)
13299         {
13300           reg0 = XEXP (XEXP (a, 0), 0);
13301           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13302         }
13303       else
13304         reg0 = XEXP (a, 0);
13305
13306       if (GET_CODE (XEXP (b, 0)) == PLUS)
13307         {
13308           reg1 = XEXP (XEXP (b, 0), 0);
13309           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13310         }
13311       else
13312         reg1 = XEXP (b, 0);
13313
13314       /* Don't accept any offset that will require multiple
13315          instructions to handle, since this would cause the
13316          arith_adjacentmem pattern to output an overlong sequence.  */
13317       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13318         return 0;
13319
13320       /* Don't allow an eliminable register: register elimination can make
13321          the offset too large.  */
13322       if (arm_eliminable_register (reg0))
13323         return 0;
13324
13325       val_diff = val1 - val0;
13326
13327       if (arm_ld_sched)
13328         {
13329           /* If the target has load delay slots, then there's no benefit
13330              to using an ldm instruction unless the offset is zero and
13331              we are optimizing for size.  */
13332           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13333                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13334                   && (val_diff == 4 || val_diff == -4));
13335         }
13336
13337       return ((REGNO (reg0) == REGNO (reg1))
13338               && (val_diff == 4 || val_diff == -4));
13339     }
13340
13341   return 0;
13342 }
13343
13344 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13345    for load operations, false for store operations.  CONSECUTIVE is true
13346    if the register numbers in the operation must be consecutive in the register
13347    bank. RETURN_PC is true if value is to be loaded in PC.
13348    The pattern we are trying to match for load is:
13349      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13350       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13351        :
13352        :
13353       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13354      ]
13355      where
13356      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13357      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13358      3.  If consecutive is TRUE, then for kth register being loaded,
13359          REGNO (R_dk) = REGNO (R_d0) + k.
13360    The pattern for store is similar.  */
13361 bool
13362 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13363                      bool consecutive, bool return_pc)
13364 {
13365   HOST_WIDE_INT count = XVECLEN (op, 0);
13366   rtx reg, mem, addr;
13367   unsigned regno;
13368   unsigned first_regno;
13369   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13370   rtx elt;
13371   bool addr_reg_in_reglist = false;
13372   bool update = false;
13373   int reg_increment;
13374   int offset_adj;
13375   int regs_per_val;
13376
13377   /* If not in SImode, then registers must be consecutive
13378      (e.g., VLDM instructions for DFmode).  */
13379   gcc_assert ((mode == SImode) || consecutive);
13380   /* Setting return_pc for stores is illegal.  */
13381   gcc_assert (!return_pc || load);
13382
13383   /* Set up the increments and the regs per val based on the mode.  */
13384   reg_increment = GET_MODE_SIZE (mode);
13385   regs_per_val = reg_increment / 4;
13386   offset_adj = return_pc ? 1 : 0;
13387
13388   if (count <= 1
13389       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13390       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13391     return false;
13392
13393   /* Check if this is a write-back.  */
13394   elt = XVECEXP (op, 0, offset_adj);
13395   if (GET_CODE (SET_SRC (elt)) == PLUS)
13396     {
13397       i++;
13398       base = 1;
13399       update = true;
13400
13401       /* The offset adjustment must be the number of registers being
13402          popped times the size of a single register.  */
13403       if (!REG_P (SET_DEST (elt))
13404           || !REG_P (XEXP (SET_SRC (elt), 0))
13405           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13406           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13407           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13408              ((count - 1 - offset_adj) * reg_increment))
13409         return false;
13410     }
13411
13412   i = i + offset_adj;
13413   base = base + offset_adj;
13414   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13415      success depends on the type: VLDM can do just one reg,
13416      LDM must do at least two.  */
13417   if ((count <= i) && (mode == SImode))
13418       return false;
13419
13420   elt = XVECEXP (op, 0, i - 1);
13421   if (GET_CODE (elt) != SET)
13422     return false;
13423
13424   if (load)
13425     {
13426       reg = SET_DEST (elt);
13427       mem = SET_SRC (elt);
13428     }
13429   else
13430     {
13431       reg = SET_SRC (elt);
13432       mem = SET_DEST (elt);
13433     }
13434
13435   if (!REG_P (reg) || !MEM_P (mem))
13436     return false;
13437
13438   regno = REGNO (reg);
13439   first_regno = regno;
13440   addr = XEXP (mem, 0);
13441   if (GET_CODE (addr) == PLUS)
13442     {
13443       if (!CONST_INT_P (XEXP (addr, 1)))
13444         return false;
13445
13446       offset = INTVAL (XEXP (addr, 1));
13447       addr = XEXP (addr, 0);
13448     }
13449
13450   if (!REG_P (addr))
13451     return false;
13452
13453   /* Don't allow SP to be loaded unless it is also the base register. It
13454      guarantees that SP is reset correctly when an LDM instruction
13455      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13456   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13457     return false;
13458
13459   for (; i < count; i++)
13460     {
13461       elt = XVECEXP (op, 0, i);
13462       if (GET_CODE (elt) != SET)
13463         return false;
13464
13465       if (load)
13466         {
13467           reg = SET_DEST (elt);
13468           mem = SET_SRC (elt);
13469         }
13470       else
13471         {
13472           reg = SET_SRC (elt);
13473           mem = SET_DEST (elt);
13474         }
13475
13476       if (!REG_P (reg)
13477           || GET_MODE (reg) != mode
13478           || REGNO (reg) <= regno
13479           || (consecutive
13480               && (REGNO (reg) !=
13481                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13482           /* Don't allow SP to be loaded unless it is also the base register. It
13483              guarantees that SP is reset correctly when an LDM instruction
13484              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13485           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13486           || !MEM_P (mem)
13487           || GET_MODE (mem) != mode
13488           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13489                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13490                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13491                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13492                    offset + (i - base) * reg_increment))
13493               && (!REG_P (XEXP (mem, 0))
13494                   || offset + (i - base) * reg_increment != 0)))
13495         return false;
13496
13497       regno = REGNO (reg);
13498       if (regno == REGNO (addr))
13499         addr_reg_in_reglist = true;
13500     }
13501
13502   if (load)
13503     {
13504       if (update && addr_reg_in_reglist)
13505         return false;
13506
13507       /* For Thumb-1, address register is always modified - either by write-back
13508          or by explicit load.  If the pattern does not describe an update,
13509          then the address register must be in the list of loaded registers.  */
13510       if (TARGET_THUMB1)
13511         return update || addr_reg_in_reglist;
13512     }
13513
13514   return true;
13515 }
13516
13517 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13518    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13519    instruction.  ADD_OFFSET is nonzero if the base address register needs
13520    to be modified with an add instruction before we can use it.  */
13521
13522 static bool
13523 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13524                                  int nops, HOST_WIDE_INT add_offset)
13525  {
13526   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13527      if the offset isn't small enough.  The reason 2 ldrs are faster
13528      is because these ARMs are able to do more than one cache access
13529      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13530      whilst the ARM8 has a double bandwidth cache.  This means that
13531      these cores can do both an instruction fetch and a data fetch in
13532      a single cycle, so the trick of calculating the address into a
13533      scratch register (one of the result regs) and then doing a load
13534      multiple actually becomes slower (and no smaller in code size).
13535      That is the transformation
13536
13537         ldr     rd1, [rbase + offset]
13538         ldr     rd2, [rbase + offset + 4]
13539
13540      to
13541
13542         add     rd1, rbase, offset
13543         ldmia   rd1, {rd1, rd2}
13544
13545      produces worse code -- '3 cycles + any stalls on rd2' instead of
13546      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13547      access per cycle, the first sequence could never complete in less
13548      than 6 cycles, whereas the ldm sequence would only take 5 and
13549      would make better use of sequential accesses if not hitting the
13550      cache.
13551
13552      We cheat here and test 'arm_ld_sched' which we currently know to
13553      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13554      changes, then the test below needs to be reworked.  */
13555   if (nops == 2 && arm_ld_sched && add_offset != 0)
13556     return false;
13557
13558   /* XScale has load-store double instructions, but they have stricter
13559      alignment requirements than load-store multiple, so we cannot
13560      use them.
13561
13562      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13563      the pipeline until completion.
13564
13565         NREGS           CYCLES
13566           1               3
13567           2               4
13568           3               5
13569           4               6
13570
13571      An ldr instruction takes 1-3 cycles, but does not block the
13572      pipeline.
13573
13574         NREGS           CYCLES
13575           1              1-3
13576           2              2-6
13577           3              3-9
13578           4              4-12
13579
13580      Best case ldr will always win.  However, the more ldr instructions
13581      we issue, the less likely we are to be able to schedule them well.
13582      Using ldr instructions also increases code size.
13583
13584      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13585      for counts of 3 or 4 regs.  */
13586   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13587     return false;
13588   return true;
13589 }
13590
13591 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13592    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13593    an array ORDER which describes the sequence to use when accessing the
13594    offsets that produces an ascending order.  In this sequence, each
13595    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13596    must have been filled in with the lowest offset by the caller.
13597    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13598    we use to verify that ORDER produces an ascending order of registers.
13599    Return true if it was possible to construct such an order, false if
13600    not.  */
13601
13602 static bool
13603 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13604                       int *unsorted_regs)
13605 {
13606   int i;
13607   for (i = 1; i < nops; i++)
13608     {
13609       int j;
13610
13611       order[i] = order[i - 1];
13612       for (j = 0; j < nops; j++)
13613         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13614           {
13615             /* We must find exactly one offset that is higher than the
13616                previous one by 4.  */
13617             if (order[i] != order[i - 1])
13618               return false;
13619             order[i] = j;
13620           }
13621       if (order[i] == order[i - 1])
13622         return false;
13623       /* The register numbers must be ascending.  */
13624       if (unsorted_regs != NULL
13625           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13626         return false;
13627     }
13628   return true;
13629 }
13630
13631 /* Used to determine in a peephole whether a sequence of load
13632    instructions can be changed into a load-multiple instruction.
13633    NOPS is the number of separate load instructions we are examining.  The
13634    first NOPS entries in OPERANDS are the destination registers, the
13635    next NOPS entries are memory operands.  If this function is
13636    successful, *BASE is set to the common base register of the memory
13637    accesses; *LOAD_OFFSET is set to the first memory location's offset
13638    from that base register.
13639    REGS is an array filled in with the destination register numbers.
13640    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13641    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13642    the sequence of registers in REGS matches the loads from ascending memory
13643    locations, and the function verifies that the register numbers are
13644    themselves ascending.  If CHECK_REGS is false, the register numbers
13645    are stored in the order they are found in the operands.  */
13646 static int
13647 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13648                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13649 {
13650   int unsorted_regs[MAX_LDM_STM_OPS];
13651   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13652   int order[MAX_LDM_STM_OPS];
13653   rtx base_reg_rtx = NULL;
13654   int base_reg = -1;
13655   int i, ldm_case;
13656
13657   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13658      easily extended if required.  */
13659   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13660
13661   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13662
13663   /* Loop over the operands and check that the memory references are
13664      suitable (i.e. immediate offsets from the same base register).  At
13665      the same time, extract the target register, and the memory
13666      offsets.  */
13667   for (i = 0; i < nops; i++)
13668     {
13669       rtx reg;
13670       rtx offset;
13671
13672       /* Convert a subreg of a mem into the mem itself.  */
13673       if (GET_CODE (operands[nops + i]) == SUBREG)
13674         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13675
13676       gcc_assert (MEM_P (operands[nops + i]));
13677
13678       /* Don't reorder volatile memory references; it doesn't seem worth
13679          looking for the case where the order is ok anyway.  */
13680       if (MEM_VOLATILE_P (operands[nops + i]))
13681         return 0;
13682
13683       offset = const0_rtx;
13684
13685       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13686            || (GET_CODE (reg) == SUBREG
13687                && REG_P (reg = SUBREG_REG (reg))))
13688           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13689               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13690                   || (GET_CODE (reg) == SUBREG
13691                       && REG_P (reg = SUBREG_REG (reg))))
13692               && (CONST_INT_P (offset
13693                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13694         {
13695           if (i == 0)
13696             {
13697               base_reg = REGNO (reg);
13698               base_reg_rtx = reg;
13699               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13700                 return 0;
13701             }
13702           else if (base_reg != (int) REGNO (reg))
13703             /* Not addressed from the same base register.  */
13704             return 0;
13705
13706           unsorted_regs[i] = (REG_P (operands[i])
13707                               ? REGNO (operands[i])
13708                               : REGNO (SUBREG_REG (operands[i])));
13709
13710           /* If it isn't an integer register, or if it overwrites the
13711              base register but isn't the last insn in the list, then
13712              we can't do this.  */
13713           if (unsorted_regs[i] < 0
13714               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13715               || unsorted_regs[i] > 14
13716               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13717             return 0;
13718
13719           /* Don't allow SP to be loaded unless it is also the base
13720              register.  It guarantees that SP is reset correctly when
13721              an LDM instruction is interrupted.  Otherwise, we might
13722              end up with a corrupt stack.  */
13723           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13724             return 0;
13725
13726           unsorted_offsets[i] = INTVAL (offset);
13727           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13728             order[0] = i;
13729         }
13730       else
13731         /* Not a suitable memory address.  */
13732         return 0;
13733     }
13734
13735   /* All the useful information has now been extracted from the
13736      operands into unsorted_regs and unsorted_offsets; additionally,
13737      order[0] has been set to the lowest offset in the list.  Sort
13738      the offsets into order, verifying that they are adjacent, and
13739      check that the register numbers are ascending.  */
13740   if (!compute_offset_order (nops, unsorted_offsets, order,
13741                              check_regs ? unsorted_regs : NULL))
13742     return 0;
13743
13744   if (saved_order)
13745     memcpy (saved_order, order, sizeof order);
13746
13747   if (base)
13748     {
13749       *base = base_reg;
13750
13751       for (i = 0; i < nops; i++)
13752         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13753
13754       *load_offset = unsorted_offsets[order[0]];
13755     }
13756
13757   if (TARGET_THUMB1
13758       && !peep2_reg_dead_p (nops, base_reg_rtx))
13759     return 0;
13760
13761   if (unsorted_offsets[order[0]] == 0)
13762     ldm_case = 1; /* ldmia */
13763   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13764     ldm_case = 2; /* ldmib */
13765   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13766     ldm_case = 3; /* ldmda */
13767   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13768     ldm_case = 4; /* ldmdb */
13769   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13770            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13771     ldm_case = 5;
13772   else
13773     return 0;
13774
13775   if (!multiple_operation_profitable_p (false, nops,
13776                                         ldm_case == 5
13777                                         ? unsorted_offsets[order[0]] : 0))
13778     return 0;
13779
13780   return ldm_case;
13781 }
13782
13783 /* Used to determine in a peephole whether a sequence of store instructions can
13784    be changed into a store-multiple instruction.
13785    NOPS is the number of separate store instructions we are examining.
13786    NOPS_TOTAL is the total number of instructions recognized by the peephole
13787    pattern.
13788    The first NOPS entries in OPERANDS are the source registers, the next
13789    NOPS entries are memory operands.  If this function is successful, *BASE is
13790    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13791    to the first memory location's offset from that base register.  REGS is an
13792    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13793    likewise filled with the corresponding rtx's.
13794    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13795    numbers to an ascending order of stores.
13796    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13797    from ascending memory locations, and the function verifies that the register
13798    numbers are themselves ascending.  If CHECK_REGS is false, the register
13799    numbers are stored in the order they are found in the operands.  */
13800 static int
13801 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13802                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13803                          HOST_WIDE_INT *load_offset, bool check_regs)
13804 {
13805   int unsorted_regs[MAX_LDM_STM_OPS];
13806   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13807   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13808   int order[MAX_LDM_STM_OPS];
13809   int base_reg = -1;
13810   rtx base_reg_rtx = NULL;
13811   int i, stm_case;
13812
13813   /* Write back of base register is currently only supported for Thumb 1.  */
13814   int base_writeback = TARGET_THUMB1;
13815
13816   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13817      easily extended if required.  */
13818   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13819
13820   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13821
13822   /* Loop over the operands and check that the memory references are
13823      suitable (i.e. immediate offsets from the same base register).  At
13824      the same time, extract the target register, and the memory
13825      offsets.  */
13826   for (i = 0; i < nops; i++)
13827     {
13828       rtx reg;
13829       rtx offset;
13830
13831       /* Convert a subreg of a mem into the mem itself.  */
13832       if (GET_CODE (operands[nops + i]) == SUBREG)
13833         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13834
13835       gcc_assert (MEM_P (operands[nops + i]));
13836
13837       /* Don't reorder volatile memory references; it doesn't seem worth
13838          looking for the case where the order is ok anyway.  */
13839       if (MEM_VOLATILE_P (operands[nops + i]))
13840         return 0;
13841
13842       offset = const0_rtx;
13843
13844       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13845            || (GET_CODE (reg) == SUBREG
13846                && REG_P (reg = SUBREG_REG (reg))))
13847           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13848               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13849                   || (GET_CODE (reg) == SUBREG
13850                       && REG_P (reg = SUBREG_REG (reg))))
13851               && (CONST_INT_P (offset
13852                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13853         {
13854           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13855                                   ? operands[i] : SUBREG_REG (operands[i]));
13856           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13857
13858           if (i == 0)
13859             {
13860               base_reg = REGNO (reg);
13861               base_reg_rtx = reg;
13862               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13863                 return 0;
13864             }
13865           else if (base_reg != (int) REGNO (reg))
13866             /* Not addressed from the same base register.  */
13867             return 0;
13868
13869           /* If it isn't an integer register, then we can't do this.  */
13870           if (unsorted_regs[i] < 0
13871               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13872               /* The effects are unpredictable if the base register is
13873                  both updated and stored.  */
13874               || (base_writeback && unsorted_regs[i] == base_reg)
13875               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13876               || unsorted_regs[i] > 14)
13877             return 0;
13878
13879           unsorted_offsets[i] = INTVAL (offset);
13880           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13881             order[0] = i;
13882         }
13883       else
13884         /* Not a suitable memory address.  */
13885         return 0;
13886     }
13887
13888   /* All the useful information has now been extracted from the
13889      operands into unsorted_regs and unsorted_offsets; additionally,
13890      order[0] has been set to the lowest offset in the list.  Sort
13891      the offsets into order, verifying that they are adjacent, and
13892      check that the register numbers are ascending.  */
13893   if (!compute_offset_order (nops, unsorted_offsets, order,
13894                              check_regs ? unsorted_regs : NULL))
13895     return 0;
13896
13897   if (saved_order)
13898     memcpy (saved_order, order, sizeof order);
13899
13900   if (base)
13901     {
13902       *base = base_reg;
13903
13904       for (i = 0; i < nops; i++)
13905         {
13906           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13907           if (reg_rtxs)
13908             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13909         }
13910
13911       *load_offset = unsorted_offsets[order[0]];
13912     }
13913
13914   if (TARGET_THUMB1
13915       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13916     return 0;
13917
13918   if (unsorted_offsets[order[0]] == 0)
13919     stm_case = 1; /* stmia */
13920   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13921     stm_case = 2; /* stmib */
13922   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13923     stm_case = 3; /* stmda */
13924   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13925     stm_case = 4; /* stmdb */
13926   else
13927     return 0;
13928
13929   if (!multiple_operation_profitable_p (false, nops, 0))
13930     return 0;
13931
13932   return stm_case;
13933 }
13934 \f
13935 /* Routines for use in generating RTL.  */
13936
13937 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13938    the instruction; REGS and MEMS are arrays containing the operands.
13939    BASEREG is the base register to be used in addressing the memory operands.
13940    WBACK_OFFSET is nonzero if the instruction should update the base
13941    register.  */
13942
13943 static rtx
13944 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13945                          HOST_WIDE_INT wback_offset)
13946 {
13947   int i = 0, j;
13948   rtx result;
13949
13950   if (!multiple_operation_profitable_p (false, count, 0))
13951     {
13952       rtx seq;
13953
13954       start_sequence ();
13955
13956       for (i = 0; i < count; i++)
13957         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13958
13959       if (wback_offset != 0)
13960         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13961
13962       seq = get_insns ();
13963       end_sequence ();
13964
13965       return seq;
13966     }
13967
13968   result = gen_rtx_PARALLEL (VOIDmode,
13969                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13970   if (wback_offset != 0)
13971     {
13972       XVECEXP (result, 0, 0)
13973         = gen_rtx_SET (VOIDmode, basereg,
13974                        plus_constant (Pmode, basereg, wback_offset));
13975       i = 1;
13976       count++;
13977     }
13978
13979   for (j = 0; i < count; i++, j++)
13980     XVECEXP (result, 0, i)
13981       = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13982
13983   return result;
13984 }
13985
13986 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13987    the instruction; REGS and MEMS are arrays containing the operands.
13988    BASEREG is the base register to be used in addressing the memory operands.
13989    WBACK_OFFSET is nonzero if the instruction should update the base
13990    register.  */
13991
13992 static rtx
13993 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13994                           HOST_WIDE_INT wback_offset)
13995 {
13996   int i = 0, j;
13997   rtx result;
13998
13999   if (GET_CODE (basereg) == PLUS)
14000     basereg = XEXP (basereg, 0);
14001
14002   if (!multiple_operation_profitable_p (false, count, 0))
14003     {
14004       rtx seq;
14005
14006       start_sequence ();
14007
14008       for (i = 0; i < count; i++)
14009         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14010
14011       if (wback_offset != 0)
14012         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14013
14014       seq = get_insns ();
14015       end_sequence ();
14016
14017       return seq;
14018     }
14019
14020   result = gen_rtx_PARALLEL (VOIDmode,
14021                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14022   if (wback_offset != 0)
14023     {
14024       XVECEXP (result, 0, 0)
14025         = gen_rtx_SET (VOIDmode, basereg,
14026                        plus_constant (Pmode, basereg, wback_offset));
14027       i = 1;
14028       count++;
14029     }
14030
14031   for (j = 0; i < count; i++, j++)
14032     XVECEXP (result, 0, i)
14033       = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
14034
14035   return result;
14036 }
14037
14038 /* Generate either a load-multiple or a store-multiple instruction.  This
14039    function can be used in situations where we can start with a single MEM
14040    rtx and adjust its address upwards.
14041    COUNT is the number of operations in the instruction, not counting a
14042    possible update of the base register.  REGS is an array containing the
14043    register operands.
14044    BASEREG is the base register to be used in addressing the memory operands,
14045    which are constructed from BASEMEM.
14046    WRITE_BACK specifies whether the generated instruction should include an
14047    update of the base register.
14048    OFFSETP is used to pass an offset to and from this function; this offset
14049    is not used when constructing the address (instead BASEMEM should have an
14050    appropriate offset in its address), it is used only for setting
14051    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14052
14053 static rtx
14054 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14055                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14056 {
14057   rtx mems[MAX_LDM_STM_OPS];
14058   HOST_WIDE_INT offset = *offsetp;
14059   int i;
14060
14061   gcc_assert (count <= MAX_LDM_STM_OPS);
14062
14063   if (GET_CODE (basereg) == PLUS)
14064     basereg = XEXP (basereg, 0);
14065
14066   for (i = 0; i < count; i++)
14067     {
14068       rtx addr = plus_constant (Pmode, basereg, i * 4);
14069       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14070       offset += 4;
14071     }
14072
14073   if (write_back)
14074     *offsetp = offset;
14075
14076   if (is_load)
14077     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14078                                     write_back ? 4 * count : 0);
14079   else
14080     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14081                                      write_back ? 4 * count : 0);
14082 }
14083
14084 rtx
14085 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14086                        rtx basemem, HOST_WIDE_INT *offsetp)
14087 {
14088   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14089                               offsetp);
14090 }
14091
14092 rtx
14093 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14094                         rtx basemem, HOST_WIDE_INT *offsetp)
14095 {
14096   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14097                               offsetp);
14098 }
14099
14100 /* Called from a peephole2 expander to turn a sequence of loads into an
14101    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14102    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14103    is true if we can reorder the registers because they are used commutatively
14104    subsequently.
14105    Returns true iff we could generate a new instruction.  */
14106
14107 bool
14108 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14109 {
14110   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14111   rtx mems[MAX_LDM_STM_OPS];
14112   int i, j, base_reg;
14113   rtx base_reg_rtx;
14114   HOST_WIDE_INT offset;
14115   int write_back = FALSE;
14116   int ldm_case;
14117   rtx addr;
14118
14119   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14120                                      &base_reg, &offset, !sort_regs);
14121
14122   if (ldm_case == 0)
14123     return false;
14124
14125   if (sort_regs)
14126     for (i = 0; i < nops - 1; i++)
14127       for (j = i + 1; j < nops; j++)
14128         if (regs[i] > regs[j])
14129           {
14130             int t = regs[i];
14131             regs[i] = regs[j];
14132             regs[j] = t;
14133           }
14134   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14135
14136   if (TARGET_THUMB1)
14137     {
14138       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14139       gcc_assert (ldm_case == 1 || ldm_case == 5);
14140       write_back = TRUE;
14141     }
14142
14143   if (ldm_case == 5)
14144     {
14145       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14146       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14147       offset = 0;
14148       if (!TARGET_THUMB1)
14149         {
14150           base_reg = regs[0];
14151           base_reg_rtx = newbase;
14152         }
14153     }
14154
14155   for (i = 0; i < nops; i++)
14156     {
14157       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14158       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14159                                               SImode, addr, 0);
14160     }
14161   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14162                                       write_back ? offset + i * 4 : 0));
14163   return true;
14164 }
14165
14166 /* Called from a peephole2 expander to turn a sequence of stores into an
14167    STM instruction.  OPERANDS are the operands found by the peephole matcher;
14168    NOPS indicates how many separate stores we are trying to combine.
14169    Returns true iff we could generate a new instruction.  */
14170
14171 bool
14172 gen_stm_seq (rtx *operands, int nops)
14173 {
14174   int i;
14175   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14176   rtx mems[MAX_LDM_STM_OPS];
14177   int base_reg;
14178   rtx base_reg_rtx;
14179   HOST_WIDE_INT offset;
14180   int write_back = FALSE;
14181   int stm_case;
14182   rtx addr;
14183   bool base_reg_dies;
14184
14185   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14186                                       mem_order, &base_reg, &offset, true);
14187
14188   if (stm_case == 0)
14189     return false;
14190
14191   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14192
14193   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14194   if (TARGET_THUMB1)
14195     {
14196       gcc_assert (base_reg_dies);
14197       write_back = TRUE;
14198     }
14199
14200   if (stm_case == 5)
14201     {
14202       gcc_assert (base_reg_dies);
14203       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14204       offset = 0;
14205     }
14206
14207   addr = plus_constant (Pmode, base_reg_rtx, offset);
14208
14209   for (i = 0; i < nops; i++)
14210     {
14211       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14212       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14213                                               SImode, addr, 0);
14214     }
14215   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14216                                        write_back ? offset + i * 4 : 0));
14217   return true;
14218 }
14219
14220 /* Called from a peephole2 expander to turn a sequence of stores that are
14221    preceded by constant loads into an STM instruction.  OPERANDS are the
14222    operands found by the peephole matcher; NOPS indicates how many
14223    separate stores we are trying to combine; there are 2 * NOPS
14224    instructions in the peephole.
14225    Returns true iff we could generate a new instruction.  */
14226
14227 bool
14228 gen_const_stm_seq (rtx *operands, int nops)
14229 {
14230   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14231   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14232   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14233   rtx mems[MAX_LDM_STM_OPS];
14234   int base_reg;
14235   rtx base_reg_rtx;
14236   HOST_WIDE_INT offset;
14237   int write_back = FALSE;
14238   int stm_case;
14239   rtx addr;
14240   bool base_reg_dies;
14241   int i, j;
14242   HARD_REG_SET allocated;
14243
14244   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14245                                       mem_order, &base_reg, &offset, false);
14246
14247   if (stm_case == 0)
14248     return false;
14249
14250   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14251
14252   /* If the same register is used more than once, try to find a free
14253      register.  */
14254   CLEAR_HARD_REG_SET (allocated);
14255   for (i = 0; i < nops; i++)
14256     {
14257       for (j = i + 1; j < nops; j++)
14258         if (regs[i] == regs[j])
14259           {
14260             rtx t = peep2_find_free_register (0, nops * 2,
14261                                               TARGET_THUMB1 ? "l" : "r",
14262                                               SImode, &allocated);
14263             if (t == NULL_RTX)
14264               return false;
14265             reg_rtxs[i] = t;
14266             regs[i] = REGNO (t);
14267           }
14268     }
14269
14270   /* Compute an ordering that maps the register numbers to an ascending
14271      sequence.  */
14272   reg_order[0] = 0;
14273   for (i = 0; i < nops; i++)
14274     if (regs[i] < regs[reg_order[0]])
14275       reg_order[0] = i;
14276
14277   for (i = 1; i < nops; i++)
14278     {
14279       int this_order = reg_order[i - 1];
14280       for (j = 0; j < nops; j++)
14281         if (regs[j] > regs[reg_order[i - 1]]
14282             && (this_order == reg_order[i - 1]
14283                 || regs[j] < regs[this_order]))
14284           this_order = j;
14285       reg_order[i] = this_order;
14286     }
14287
14288   /* Ensure that registers that must be live after the instruction end
14289      up with the correct value.  */
14290   for (i = 0; i < nops; i++)
14291     {
14292       int this_order = reg_order[i];
14293       if ((this_order != mem_order[i]
14294            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14295           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14296         return false;
14297     }
14298
14299   /* Load the constants.  */
14300   for (i = 0; i < nops; i++)
14301     {
14302       rtx op = operands[2 * nops + mem_order[i]];
14303       sorted_regs[i] = regs[reg_order[i]];
14304       emit_move_insn (reg_rtxs[reg_order[i]], op);
14305     }
14306
14307   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14308
14309   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14310   if (TARGET_THUMB1)
14311     {
14312       gcc_assert (base_reg_dies);
14313       write_back = TRUE;
14314     }
14315
14316   if (stm_case == 5)
14317     {
14318       gcc_assert (base_reg_dies);
14319       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14320       offset = 0;
14321     }
14322
14323   addr = plus_constant (Pmode, base_reg_rtx, offset);
14324
14325   for (i = 0; i < nops; i++)
14326     {
14327       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14328       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14329                                               SImode, addr, 0);
14330     }
14331   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14332                                        write_back ? offset + i * 4 : 0));
14333   return true;
14334 }
14335
14336 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14337    unaligned copies on processors which support unaligned semantics for those
14338    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14339    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14340    An interleave factor of 1 (the minimum) will perform no interleaving.
14341    Load/store multiple are used for aligned addresses where possible.  */
14342
14343 static void
14344 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14345                                    HOST_WIDE_INT length,
14346                                    unsigned int interleave_factor)
14347 {
14348   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14349   int *regnos = XALLOCAVEC (int, interleave_factor);
14350   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14351   HOST_WIDE_INT i, j;
14352   HOST_WIDE_INT remaining = length, words;
14353   rtx halfword_tmp = NULL, byte_tmp = NULL;
14354   rtx dst, src;
14355   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14356   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14357   HOST_WIDE_INT srcoffset, dstoffset;
14358   HOST_WIDE_INT src_autoinc, dst_autoinc;
14359   rtx mem, addr;
14360
14361   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14362
14363   /* Use hard registers if we have aligned source or destination so we can use
14364      load/store multiple with contiguous registers.  */
14365   if (dst_aligned || src_aligned)
14366     for (i = 0; i < interleave_factor; i++)
14367       regs[i] = gen_rtx_REG (SImode, i);
14368   else
14369     for (i = 0; i < interleave_factor; i++)
14370       regs[i] = gen_reg_rtx (SImode);
14371
14372   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14373   src = copy_addr_to_reg (XEXP (srcbase, 0));
14374
14375   srcoffset = dstoffset = 0;
14376
14377   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14378      For copying the last bytes we want to subtract this offset again.  */
14379   src_autoinc = dst_autoinc = 0;
14380
14381   for (i = 0; i < interleave_factor; i++)
14382     regnos[i] = i;
14383
14384   /* Copy BLOCK_SIZE_BYTES chunks.  */
14385
14386   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14387     {
14388       /* Load words.  */
14389       if (src_aligned && interleave_factor > 1)
14390         {
14391           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14392                                             TRUE, srcbase, &srcoffset));
14393           src_autoinc += UNITS_PER_WORD * interleave_factor;
14394         }
14395       else
14396         {
14397           for (j = 0; j < interleave_factor; j++)
14398             {
14399               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14400                                                  - src_autoinc));
14401               mem = adjust_automodify_address (srcbase, SImode, addr,
14402                                                srcoffset + j * UNITS_PER_WORD);
14403               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14404             }
14405           srcoffset += block_size_bytes;
14406         }
14407
14408       /* Store words.  */
14409       if (dst_aligned && interleave_factor > 1)
14410         {
14411           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14412                                              TRUE, dstbase, &dstoffset));
14413           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14414         }
14415       else
14416         {
14417           for (j = 0; j < interleave_factor; j++)
14418             {
14419               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14420                                                  - dst_autoinc));
14421               mem = adjust_automodify_address (dstbase, SImode, addr,
14422                                                dstoffset + j * UNITS_PER_WORD);
14423               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14424             }
14425           dstoffset += block_size_bytes;
14426         }
14427
14428       remaining -= block_size_bytes;
14429     }
14430
14431   /* Copy any whole words left (note these aren't interleaved with any
14432      subsequent halfword/byte load/stores in the interests of simplicity).  */
14433
14434   words = remaining / UNITS_PER_WORD;
14435
14436   gcc_assert (words < interleave_factor);
14437
14438   if (src_aligned && words > 1)
14439     {
14440       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14441                                         &srcoffset));
14442       src_autoinc += UNITS_PER_WORD * words;
14443     }
14444   else
14445     {
14446       for (j = 0; j < words; j++)
14447         {
14448           addr = plus_constant (Pmode, src,
14449                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14450           mem = adjust_automodify_address (srcbase, SImode, addr,
14451                                            srcoffset + j * UNITS_PER_WORD);
14452           emit_insn (gen_unaligned_loadsi (regs[j], mem));
14453         }
14454       srcoffset += words * UNITS_PER_WORD;
14455     }
14456
14457   if (dst_aligned && words > 1)
14458     {
14459       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14460                                          &dstoffset));
14461       dst_autoinc += words * UNITS_PER_WORD;
14462     }
14463   else
14464     {
14465       for (j = 0; j < words; j++)
14466         {
14467           addr = plus_constant (Pmode, dst,
14468                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14469           mem = adjust_automodify_address (dstbase, SImode, addr,
14470                                            dstoffset + j * UNITS_PER_WORD);
14471           emit_insn (gen_unaligned_storesi (mem, regs[j]));
14472         }
14473       dstoffset += words * UNITS_PER_WORD;
14474     }
14475
14476   remaining -= words * UNITS_PER_WORD;
14477
14478   gcc_assert (remaining < 4);
14479
14480   /* Copy a halfword if necessary.  */
14481
14482   if (remaining >= 2)
14483     {
14484       halfword_tmp = gen_reg_rtx (SImode);
14485
14486       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14487       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14488       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14489
14490       /* Either write out immediately, or delay until we've loaded the last
14491          byte, depending on interleave factor.  */
14492       if (interleave_factor == 1)
14493         {
14494           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14495           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14496           emit_insn (gen_unaligned_storehi (mem,
14497                        gen_lowpart (HImode, halfword_tmp)));
14498           halfword_tmp = NULL;
14499           dstoffset += 2;
14500         }
14501
14502       remaining -= 2;
14503       srcoffset += 2;
14504     }
14505
14506   gcc_assert (remaining < 2);
14507
14508   /* Copy last byte.  */
14509
14510   if ((remaining & 1) != 0)
14511     {
14512       byte_tmp = gen_reg_rtx (SImode);
14513
14514       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14515       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14516       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14517
14518       if (interleave_factor == 1)
14519         {
14520           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14521           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14522           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14523           byte_tmp = NULL;
14524           dstoffset++;
14525         }
14526
14527       remaining--;
14528       srcoffset++;
14529     }
14530
14531   /* Store last halfword if we haven't done so already.  */
14532
14533   if (halfword_tmp)
14534     {
14535       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14536       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14537       emit_insn (gen_unaligned_storehi (mem,
14538                    gen_lowpart (HImode, halfword_tmp)));
14539       dstoffset += 2;
14540     }
14541
14542   /* Likewise for last byte.  */
14543
14544   if (byte_tmp)
14545     {
14546       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14547       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14548       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14549       dstoffset++;
14550     }
14551
14552   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14553 }
14554
14555 /* From mips_adjust_block_mem:
14556
14557    Helper function for doing a loop-based block operation on memory
14558    reference MEM.  Each iteration of the loop will operate on LENGTH
14559    bytes of MEM.
14560
14561    Create a new base register for use within the loop and point it to
14562    the start of MEM.  Create a new memory reference that uses this
14563    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14564
14565 static void
14566 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14567                       rtx *loop_mem)
14568 {
14569   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14570
14571   /* Although the new mem does not refer to a known location,
14572      it does keep up to LENGTH bytes of alignment.  */
14573   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14574   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14575 }
14576
14577 /* From mips_block_move_loop:
14578
14579    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14580    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14581    the memory regions do not overlap.  */
14582
14583 static void
14584 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14585                                unsigned int interleave_factor,
14586                                HOST_WIDE_INT bytes_per_iter)
14587 {
14588   rtx src_reg, dest_reg, final_src, test;
14589   HOST_WIDE_INT leftover;
14590
14591   leftover = length % bytes_per_iter;
14592   length -= leftover;
14593
14594   /* Create registers and memory references for use within the loop.  */
14595   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14596   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14597
14598   /* Calculate the value that SRC_REG should have after the last iteration of
14599      the loop.  */
14600   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14601                                    0, 0, OPTAB_WIDEN);
14602
14603   /* Emit the start of the loop.  */
14604   rtx_code_label *label = gen_label_rtx ();
14605   emit_label (label);
14606
14607   /* Emit the loop body.  */
14608   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14609                                      interleave_factor);
14610
14611   /* Move on to the next block.  */
14612   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14613   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14614
14615   /* Emit the loop condition.  */
14616   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14617   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14618
14619   /* Mop up any left-over bytes.  */
14620   if (leftover)
14621     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14622 }
14623
14624 /* Emit a block move when either the source or destination is unaligned (not
14625    aligned to a four-byte boundary).  This may need further tuning depending on
14626    core type, optimize_size setting, etc.  */
14627
14628 static int
14629 arm_movmemqi_unaligned (rtx *operands)
14630 {
14631   HOST_WIDE_INT length = INTVAL (operands[2]);
14632
14633   if (optimize_size)
14634     {
14635       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14636       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14637       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14638          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14639          or dst_aligned though: allow more interleaving in those cases since the
14640          resulting code can be smaller.  */
14641       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14642       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14643
14644       if (length > 12)
14645         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14646                                        interleave_factor, bytes_per_iter);
14647       else
14648         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14649                                            interleave_factor);
14650     }
14651   else
14652     {
14653       /* Note that the loop created by arm_block_move_unaligned_loop may be
14654          subject to loop unrolling, which makes tuning this condition a little
14655          redundant.  */
14656       if (length > 32)
14657         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14658       else
14659         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14660     }
14661
14662   return 1;
14663 }
14664
14665 int
14666 arm_gen_movmemqi (rtx *operands)
14667 {
14668   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14669   HOST_WIDE_INT srcoffset, dstoffset;
14670   int i;
14671   rtx src, dst, srcbase, dstbase;
14672   rtx part_bytes_reg = NULL;
14673   rtx mem;
14674
14675   if (!CONST_INT_P (operands[2])
14676       || !CONST_INT_P (operands[3])
14677       || INTVAL (operands[2]) > 64)
14678     return 0;
14679
14680   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14681     return arm_movmemqi_unaligned (operands);
14682
14683   if (INTVAL (operands[3]) & 3)
14684     return 0;
14685
14686   dstbase = operands[0];
14687   srcbase = operands[1];
14688
14689   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14690   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14691
14692   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14693   out_words_to_go = INTVAL (operands[2]) / 4;
14694   last_bytes = INTVAL (operands[2]) & 3;
14695   dstoffset = srcoffset = 0;
14696
14697   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14698     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14699
14700   for (i = 0; in_words_to_go >= 2; i+=4)
14701     {
14702       if (in_words_to_go > 4)
14703         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14704                                           TRUE, srcbase, &srcoffset));
14705       else
14706         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14707                                           src, FALSE, srcbase,
14708                                           &srcoffset));
14709
14710       if (out_words_to_go)
14711         {
14712           if (out_words_to_go > 4)
14713             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14714                                                TRUE, dstbase, &dstoffset));
14715           else if (out_words_to_go != 1)
14716             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14717                                                out_words_to_go, dst,
14718                                                (last_bytes == 0
14719                                                 ? FALSE : TRUE),
14720                                                dstbase, &dstoffset));
14721           else
14722             {
14723               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14724               emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14725               if (last_bytes != 0)
14726                 {
14727                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14728                   dstoffset += 4;
14729                 }
14730             }
14731         }
14732
14733       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14734       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14735     }
14736
14737   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14738   if (out_words_to_go)
14739     {
14740       rtx sreg;
14741
14742       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14743       sreg = copy_to_reg (mem);
14744
14745       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14746       emit_move_insn (mem, sreg);
14747       in_words_to_go--;
14748
14749       gcc_assert (!in_words_to_go);     /* Sanity check */
14750     }
14751
14752   if (in_words_to_go)
14753     {
14754       gcc_assert (in_words_to_go > 0);
14755
14756       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14757       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14758     }
14759
14760   gcc_assert (!last_bytes || part_bytes_reg);
14761
14762   if (BYTES_BIG_ENDIAN && last_bytes)
14763     {
14764       rtx tmp = gen_reg_rtx (SImode);
14765
14766       /* The bytes we want are in the top end of the word.  */
14767       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14768                               GEN_INT (8 * (4 - last_bytes))));
14769       part_bytes_reg = tmp;
14770
14771       while (last_bytes)
14772         {
14773           mem = adjust_automodify_address (dstbase, QImode,
14774                                            plus_constant (Pmode, dst,
14775                                                           last_bytes - 1),
14776                                            dstoffset + last_bytes - 1);
14777           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14778
14779           if (--last_bytes)
14780             {
14781               tmp = gen_reg_rtx (SImode);
14782               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14783               part_bytes_reg = tmp;
14784             }
14785         }
14786
14787     }
14788   else
14789     {
14790       if (last_bytes > 1)
14791         {
14792           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14793           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14794           last_bytes -= 2;
14795           if (last_bytes)
14796             {
14797               rtx tmp = gen_reg_rtx (SImode);
14798               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14799               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14800               part_bytes_reg = tmp;
14801               dstoffset += 2;
14802             }
14803         }
14804
14805       if (last_bytes)
14806         {
14807           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14808           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14809         }
14810     }
14811
14812   return 1;
14813 }
14814
14815 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14816 by mode size.  */
14817 inline static rtx
14818 next_consecutive_mem (rtx mem)
14819 {
14820   machine_mode mode = GET_MODE (mem);
14821   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14822   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14823
14824   return adjust_automodify_address (mem, mode, addr, offset);
14825 }
14826
14827 /* Copy using LDRD/STRD instructions whenever possible.
14828    Returns true upon success. */
14829 bool
14830 gen_movmem_ldrd_strd (rtx *operands)
14831 {
14832   unsigned HOST_WIDE_INT len;
14833   HOST_WIDE_INT align;
14834   rtx src, dst, base;
14835   rtx reg0;
14836   bool src_aligned, dst_aligned;
14837   bool src_volatile, dst_volatile;
14838
14839   gcc_assert (CONST_INT_P (operands[2]));
14840   gcc_assert (CONST_INT_P (operands[3]));
14841
14842   len = UINTVAL (operands[2]);
14843   if (len > 64)
14844     return false;
14845
14846   /* Maximum alignment we can assume for both src and dst buffers.  */
14847   align = INTVAL (operands[3]);
14848
14849   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14850     return false;
14851
14852   /* Place src and dst addresses in registers
14853      and update the corresponding mem rtx.  */
14854   dst = operands[0];
14855   dst_volatile = MEM_VOLATILE_P (dst);
14856   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14857   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14858   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14859
14860   src = operands[1];
14861   src_volatile = MEM_VOLATILE_P (src);
14862   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14863   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14864   src = adjust_automodify_address (src, VOIDmode, base, 0);
14865
14866   if (!unaligned_access && !(src_aligned && dst_aligned))
14867     return false;
14868
14869   if (src_volatile || dst_volatile)
14870     return false;
14871
14872   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14873   if (!(dst_aligned || src_aligned))
14874     return arm_gen_movmemqi (operands);
14875
14876   src = adjust_address (src, DImode, 0);
14877   dst = adjust_address (dst, DImode, 0);
14878   while (len >= 8)
14879     {
14880       len -= 8;
14881       reg0 = gen_reg_rtx (DImode);
14882       if (src_aligned)
14883         emit_move_insn (reg0, src);
14884       else
14885         emit_insn (gen_unaligned_loaddi (reg0, src));
14886
14887       if (dst_aligned)
14888         emit_move_insn (dst, reg0);
14889       else
14890         emit_insn (gen_unaligned_storedi (dst, reg0));
14891
14892       src = next_consecutive_mem (src);
14893       dst = next_consecutive_mem (dst);
14894     }
14895
14896   gcc_assert (len < 8);
14897   if (len >= 4)
14898     {
14899       /* More than a word but less than a double-word to copy.  Copy a word.  */
14900       reg0 = gen_reg_rtx (SImode);
14901       src = adjust_address (src, SImode, 0);
14902       dst = adjust_address (dst, SImode, 0);
14903       if (src_aligned)
14904         emit_move_insn (reg0, src);
14905       else
14906         emit_insn (gen_unaligned_loadsi (reg0, src));
14907
14908       if (dst_aligned)
14909         emit_move_insn (dst, reg0);
14910       else
14911         emit_insn (gen_unaligned_storesi (dst, reg0));
14912
14913       src = next_consecutive_mem (src);
14914       dst = next_consecutive_mem (dst);
14915       len -= 4;
14916     }
14917
14918   if (len == 0)
14919     return true;
14920
14921   /* Copy the remaining bytes.  */
14922   if (len >= 2)
14923     {
14924       dst = adjust_address (dst, HImode, 0);
14925       src = adjust_address (src, HImode, 0);
14926       reg0 = gen_reg_rtx (SImode);
14927       if (src_aligned)
14928         emit_insn (gen_zero_extendhisi2 (reg0, src));
14929       else
14930         emit_insn (gen_unaligned_loadhiu (reg0, src));
14931
14932       if (dst_aligned)
14933         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14934       else
14935         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14936
14937       src = next_consecutive_mem (src);
14938       dst = next_consecutive_mem (dst);
14939       if (len == 2)
14940         return true;
14941     }
14942
14943   dst = adjust_address (dst, QImode, 0);
14944   src = adjust_address (src, QImode, 0);
14945   reg0 = gen_reg_rtx (QImode);
14946   emit_move_insn (reg0, src);
14947   emit_move_insn (dst, reg0);
14948   return true;
14949 }
14950
14951 /* Select a dominance comparison mode if possible for a test of the general
14952    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14953    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14954    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14955    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14956    In all cases OP will be either EQ or NE, but we don't need to know which
14957    here.  If we are unable to support a dominance comparison we return
14958    CC mode.  This will then fail to match for the RTL expressions that
14959    generate this call.  */
14960 machine_mode
14961 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14962 {
14963   enum rtx_code cond1, cond2;
14964   int swapped = 0;
14965
14966   /* Currently we will probably get the wrong result if the individual
14967      comparisons are not simple.  This also ensures that it is safe to
14968      reverse a comparison if necessary.  */
14969   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14970        != CCmode)
14971       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14972           != CCmode))
14973     return CCmode;
14974
14975   /* The if_then_else variant of this tests the second condition if the
14976      first passes, but is true if the first fails.  Reverse the first
14977      condition to get a true "inclusive-or" expression.  */
14978   if (cond_or == DOM_CC_NX_OR_Y)
14979     cond1 = reverse_condition (cond1);
14980
14981   /* If the comparisons are not equal, and one doesn't dominate the other,
14982      then we can't do this.  */
14983   if (cond1 != cond2
14984       && !comparison_dominates_p (cond1, cond2)
14985       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14986     return CCmode;
14987
14988   if (swapped)
14989     {
14990       enum rtx_code temp = cond1;
14991       cond1 = cond2;
14992       cond2 = temp;
14993     }
14994
14995   switch (cond1)
14996     {
14997     case EQ:
14998       if (cond_or == DOM_CC_X_AND_Y)
14999         return CC_DEQmode;
15000
15001       switch (cond2)
15002         {
15003         case EQ: return CC_DEQmode;
15004         case LE: return CC_DLEmode;
15005         case LEU: return CC_DLEUmode;
15006         case GE: return CC_DGEmode;
15007         case GEU: return CC_DGEUmode;
15008         default: gcc_unreachable ();
15009         }
15010
15011     case LT:
15012       if (cond_or == DOM_CC_X_AND_Y)
15013         return CC_DLTmode;
15014
15015       switch (cond2)
15016         {
15017         case  LT:
15018             return CC_DLTmode;
15019         case LE:
15020           return CC_DLEmode;
15021         case NE:
15022           return CC_DNEmode;
15023         default:
15024           gcc_unreachable ();
15025         }
15026
15027     case GT:
15028       if (cond_or == DOM_CC_X_AND_Y)
15029         return CC_DGTmode;
15030
15031       switch (cond2)
15032         {
15033         case GT:
15034           return CC_DGTmode;
15035         case GE:
15036           return CC_DGEmode;
15037         case NE:
15038           return CC_DNEmode;
15039         default:
15040           gcc_unreachable ();
15041         }
15042
15043     case LTU:
15044       if (cond_or == DOM_CC_X_AND_Y)
15045         return CC_DLTUmode;
15046
15047       switch (cond2)
15048         {
15049         case LTU:
15050           return CC_DLTUmode;
15051         case LEU:
15052           return CC_DLEUmode;
15053         case NE:
15054           return CC_DNEmode;
15055         default:
15056           gcc_unreachable ();
15057         }
15058
15059     case GTU:
15060       if (cond_or == DOM_CC_X_AND_Y)
15061         return CC_DGTUmode;
15062
15063       switch (cond2)
15064         {
15065         case GTU:
15066           return CC_DGTUmode;
15067         case GEU:
15068           return CC_DGEUmode;
15069         case NE:
15070           return CC_DNEmode;
15071         default:
15072           gcc_unreachable ();
15073         }
15074
15075     /* The remaining cases only occur when both comparisons are the
15076        same.  */
15077     case NE:
15078       gcc_assert (cond1 == cond2);
15079       return CC_DNEmode;
15080
15081     case LE:
15082       gcc_assert (cond1 == cond2);
15083       return CC_DLEmode;
15084
15085     case GE:
15086       gcc_assert (cond1 == cond2);
15087       return CC_DGEmode;
15088
15089     case LEU:
15090       gcc_assert (cond1 == cond2);
15091       return CC_DLEUmode;
15092
15093     case GEU:
15094       gcc_assert (cond1 == cond2);
15095       return CC_DGEUmode;
15096
15097     default:
15098       gcc_unreachable ();
15099     }
15100 }
15101
15102 machine_mode
15103 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15104 {
15105   /* All floating point compares return CCFP if it is an equality
15106      comparison, and CCFPE otherwise.  */
15107   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15108     {
15109       switch (op)
15110         {
15111         case EQ:
15112         case NE:
15113         case UNORDERED:
15114         case ORDERED:
15115         case UNLT:
15116         case UNLE:
15117         case UNGT:
15118         case UNGE:
15119         case UNEQ:
15120         case LTGT:
15121           return CCFPmode;
15122
15123         case LT:
15124         case LE:
15125         case GT:
15126         case GE:
15127           return CCFPEmode;
15128
15129         default:
15130           gcc_unreachable ();
15131         }
15132     }
15133
15134   /* A compare with a shifted operand.  Because of canonicalization, the
15135      comparison will have to be swapped when we emit the assembler.  */
15136   if (GET_MODE (y) == SImode
15137       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15138       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15139           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15140           || GET_CODE (x) == ROTATERT))
15141     return CC_SWPmode;
15142
15143   /* This operation is performed swapped, but since we only rely on the Z
15144      flag we don't need an additional mode.  */
15145   if (GET_MODE (y) == SImode
15146       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15147       && GET_CODE (x) == NEG
15148       && (op == EQ || op == NE))
15149     return CC_Zmode;
15150
15151   /* This is a special case that is used by combine to allow a
15152      comparison of a shifted byte load to be split into a zero-extend
15153      followed by a comparison of the shifted integer (only valid for
15154      equalities and unsigned inequalities).  */
15155   if (GET_MODE (x) == SImode
15156       && GET_CODE (x) == ASHIFT
15157       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15158       && GET_CODE (XEXP (x, 0)) == SUBREG
15159       && MEM_P (SUBREG_REG (XEXP (x, 0)))
15160       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15161       && (op == EQ || op == NE
15162           || op == GEU || op == GTU || op == LTU || op == LEU)
15163       && CONST_INT_P (y))
15164     return CC_Zmode;
15165
15166   /* A construct for a conditional compare, if the false arm contains
15167      0, then both conditions must be true, otherwise either condition
15168      must be true.  Not all conditions are possible, so CCmode is
15169      returned if it can't be done.  */
15170   if (GET_CODE (x) == IF_THEN_ELSE
15171       && (XEXP (x, 2) == const0_rtx
15172           || XEXP (x, 2) == const1_rtx)
15173       && COMPARISON_P (XEXP (x, 0))
15174       && COMPARISON_P (XEXP (x, 1)))
15175     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15176                                          INTVAL (XEXP (x, 2)));
15177
15178   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
15179   if (GET_CODE (x) == AND
15180       && (op == EQ || op == NE)
15181       && COMPARISON_P (XEXP (x, 0))
15182       && COMPARISON_P (XEXP (x, 1)))
15183     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15184                                          DOM_CC_X_AND_Y);
15185
15186   if (GET_CODE (x) == IOR
15187       && (op == EQ || op == NE)
15188       && COMPARISON_P (XEXP (x, 0))
15189       && COMPARISON_P (XEXP (x, 1)))
15190     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15191                                          DOM_CC_X_OR_Y);
15192
15193   /* An operation (on Thumb) where we want to test for a single bit.
15194      This is done by shifting that bit up into the top bit of a
15195      scratch register; we can then branch on the sign bit.  */
15196   if (TARGET_THUMB1
15197       && GET_MODE (x) == SImode
15198       && (op == EQ || op == NE)
15199       && GET_CODE (x) == ZERO_EXTRACT
15200       && XEXP (x, 1) == const1_rtx)
15201     return CC_Nmode;
15202
15203   /* An operation that sets the condition codes as a side-effect, the
15204      V flag is not set correctly, so we can only use comparisons where
15205      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15206      instead.)  */
15207   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15208   if (GET_MODE (x) == SImode
15209       && y == const0_rtx
15210       && (op == EQ || op == NE || op == LT || op == GE)
15211       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15212           || GET_CODE (x) == AND || GET_CODE (x) == IOR
15213           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15214           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15215           || GET_CODE (x) == LSHIFTRT
15216           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15217           || GET_CODE (x) == ROTATERT
15218           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15219     return CC_NOOVmode;
15220
15221   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15222     return CC_Zmode;
15223
15224   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15225       && GET_CODE (x) == PLUS
15226       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15227     return CC_Cmode;
15228
15229   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15230     {
15231       switch (op)
15232         {
15233         case EQ:
15234         case NE:
15235           /* A DImode comparison against zero can be implemented by
15236              or'ing the two halves together.  */
15237           if (y == const0_rtx)
15238             return CC_Zmode;
15239
15240           /* We can do an equality test in three Thumb instructions.  */
15241           if (!TARGET_32BIT)
15242             return CC_Zmode;
15243
15244           /* FALLTHROUGH */
15245
15246         case LTU:
15247         case LEU:
15248         case GTU:
15249         case GEU:
15250           /* DImode unsigned comparisons can be implemented by cmp +
15251              cmpeq without a scratch register.  Not worth doing in
15252              Thumb-2.  */
15253           if (TARGET_32BIT)
15254             return CC_CZmode;
15255
15256           /* FALLTHROUGH */
15257
15258         case LT:
15259         case LE:
15260         case GT:
15261         case GE:
15262           /* DImode signed and unsigned comparisons can be implemented
15263              by cmp + sbcs with a scratch register, but that does not
15264              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
15265           gcc_assert (op != EQ && op != NE);
15266           return CC_NCVmode;
15267
15268         default:
15269           gcc_unreachable ();
15270         }
15271     }
15272
15273   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15274     return GET_MODE (x);
15275
15276   return CCmode;
15277 }
15278
15279 /* X and Y are two things to compare using CODE.  Emit the compare insn and
15280    return the rtx for register 0 in the proper mode.  FP means this is a
15281    floating point compare: I don't think that it is needed on the arm.  */
15282 rtx
15283 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15284 {
15285   machine_mode mode;
15286   rtx cc_reg;
15287   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15288
15289   /* We might have X as a constant, Y as a register because of the predicates
15290      used for cmpdi.  If so, force X to a register here.  */
15291   if (dimode_comparison && !REG_P (x))
15292     x = force_reg (DImode, x);
15293
15294   mode = SELECT_CC_MODE (code, x, y);
15295   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15296
15297   if (dimode_comparison
15298       && mode != CC_CZmode)
15299     {
15300       rtx clobber, set;
15301
15302       /* To compare two non-zero values for equality, XOR them and
15303          then compare against zero.  Not used for ARM mode; there
15304          CC_CZmode is cheaper.  */
15305       if (mode == CC_Zmode && y != const0_rtx)
15306         {
15307           gcc_assert (!reload_completed);
15308           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15309           y = const0_rtx;
15310         }
15311
15312       /* A scratch register is required.  */
15313       if (reload_completed)
15314         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15315       else
15316         scratch = gen_rtx_SCRATCH (SImode);
15317
15318       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15319       set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15320       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15321     }
15322   else
15323     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15324
15325   return cc_reg;
15326 }
15327
15328 /* Generate a sequence of insns that will generate the correct return
15329    address mask depending on the physical architecture that the program
15330    is running on.  */
15331 rtx
15332 arm_gen_return_addr_mask (void)
15333 {
15334   rtx reg = gen_reg_rtx (Pmode);
15335
15336   emit_insn (gen_return_addr_mask (reg));
15337   return reg;
15338 }
15339
15340 void
15341 arm_reload_in_hi (rtx *operands)
15342 {
15343   rtx ref = operands[1];
15344   rtx base, scratch;
15345   HOST_WIDE_INT offset = 0;
15346
15347   if (GET_CODE (ref) == SUBREG)
15348     {
15349       offset = SUBREG_BYTE (ref);
15350       ref = SUBREG_REG (ref);
15351     }
15352
15353   if (REG_P (ref))
15354     {
15355       /* We have a pseudo which has been spilt onto the stack; there
15356          are two cases here: the first where there is a simple
15357          stack-slot replacement and a second where the stack-slot is
15358          out of range, or is used as a subreg.  */
15359       if (reg_equiv_mem (REGNO (ref)))
15360         {
15361           ref = reg_equiv_mem (REGNO (ref));
15362           base = find_replacement (&XEXP (ref, 0));
15363         }
15364       else
15365         /* The slot is out of range, or was dressed up in a SUBREG.  */
15366         base = reg_equiv_address (REGNO (ref));
15367     }
15368   else
15369     base = find_replacement (&XEXP (ref, 0));
15370
15371   /* Handle the case where the address is too complex to be offset by 1.  */
15372   if (GET_CODE (base) == MINUS
15373       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15374     {
15375       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15376
15377       emit_set_insn (base_plus, base);
15378       base = base_plus;
15379     }
15380   else if (GET_CODE (base) == PLUS)
15381     {
15382       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15383       HOST_WIDE_INT hi, lo;
15384
15385       offset += INTVAL (XEXP (base, 1));
15386       base = XEXP (base, 0);
15387
15388       /* Rework the address into a legal sequence of insns.  */
15389       /* Valid range for lo is -4095 -> 4095 */
15390       lo = (offset >= 0
15391             ? (offset & 0xfff)
15392             : -((-offset) & 0xfff));
15393
15394       /* Corner case, if lo is the max offset then we would be out of range
15395          once we have added the additional 1 below, so bump the msb into the
15396          pre-loading insn(s).  */
15397       if (lo == 4095)
15398         lo &= 0x7ff;
15399
15400       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15401              ^ (HOST_WIDE_INT) 0x80000000)
15402             - (HOST_WIDE_INT) 0x80000000);
15403
15404       gcc_assert (hi + lo == offset);
15405
15406       if (hi != 0)
15407         {
15408           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15409
15410           /* Get the base address; addsi3 knows how to handle constants
15411              that require more than one insn.  */
15412           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15413           base = base_plus;
15414           offset = lo;
15415         }
15416     }
15417
15418   /* Operands[2] may overlap operands[0] (though it won't overlap
15419      operands[1]), that's why we asked for a DImode reg -- so we can
15420      use the bit that does not overlap.  */
15421   if (REGNO (operands[2]) == REGNO (operands[0]))
15422     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15423   else
15424     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15425
15426   emit_insn (gen_zero_extendqisi2 (scratch,
15427                                    gen_rtx_MEM (QImode,
15428                                                 plus_constant (Pmode, base,
15429                                                                offset))));
15430   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15431                                    gen_rtx_MEM (QImode,
15432                                                 plus_constant (Pmode, base,
15433                                                                offset + 1))));
15434   if (!BYTES_BIG_ENDIAN)
15435     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15436                    gen_rtx_IOR (SImode,
15437                                 gen_rtx_ASHIFT
15438                                 (SImode,
15439                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15440                                  GEN_INT (8)),
15441                                 scratch));
15442   else
15443     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15444                    gen_rtx_IOR (SImode,
15445                                 gen_rtx_ASHIFT (SImode, scratch,
15446                                                 GEN_INT (8)),
15447                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15448 }
15449
15450 /* Handle storing a half-word to memory during reload by synthesizing as two
15451    byte stores.  Take care not to clobber the input values until after we
15452    have moved them somewhere safe.  This code assumes that if the DImode
15453    scratch in operands[2] overlaps either the input value or output address
15454    in some way, then that value must die in this insn (we absolutely need
15455    two scratch registers for some corner cases).  */
15456 void
15457 arm_reload_out_hi (rtx *operands)
15458 {
15459   rtx ref = operands[0];
15460   rtx outval = operands[1];
15461   rtx base, scratch;
15462   HOST_WIDE_INT offset = 0;
15463
15464   if (GET_CODE (ref) == SUBREG)
15465     {
15466       offset = SUBREG_BYTE (ref);
15467       ref = SUBREG_REG (ref);
15468     }
15469
15470   if (REG_P (ref))
15471     {
15472       /* We have a pseudo which has been spilt onto the stack; there
15473          are two cases here: the first where there is a simple
15474          stack-slot replacement and a second where the stack-slot is
15475          out of range, or is used as a subreg.  */
15476       if (reg_equiv_mem (REGNO (ref)))
15477         {
15478           ref = reg_equiv_mem (REGNO (ref));
15479           base = find_replacement (&XEXP (ref, 0));
15480         }
15481       else
15482         /* The slot is out of range, or was dressed up in a SUBREG.  */
15483         base = reg_equiv_address (REGNO (ref));
15484     }
15485   else
15486     base = find_replacement (&XEXP (ref, 0));
15487
15488   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15489
15490   /* Handle the case where the address is too complex to be offset by 1.  */
15491   if (GET_CODE (base) == MINUS
15492       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15493     {
15494       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15495
15496       /* Be careful not to destroy OUTVAL.  */
15497       if (reg_overlap_mentioned_p (base_plus, outval))
15498         {
15499           /* Updating base_plus might destroy outval, see if we can
15500              swap the scratch and base_plus.  */
15501           if (!reg_overlap_mentioned_p (scratch, outval))
15502             {
15503               rtx tmp = scratch;
15504               scratch = base_plus;
15505               base_plus = tmp;
15506             }
15507           else
15508             {
15509               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15510
15511               /* Be conservative and copy OUTVAL into the scratch now,
15512                  this should only be necessary if outval is a subreg
15513                  of something larger than a word.  */
15514               /* XXX Might this clobber base?  I can't see how it can,
15515                  since scratch is known to overlap with OUTVAL, and
15516                  must be wider than a word.  */
15517               emit_insn (gen_movhi (scratch_hi, outval));
15518               outval = scratch_hi;
15519             }
15520         }
15521
15522       emit_set_insn (base_plus, base);
15523       base = base_plus;
15524     }
15525   else if (GET_CODE (base) == PLUS)
15526     {
15527       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15528       HOST_WIDE_INT hi, lo;
15529
15530       offset += INTVAL (XEXP (base, 1));
15531       base = XEXP (base, 0);
15532
15533       /* Rework the address into a legal sequence of insns.  */
15534       /* Valid range for lo is -4095 -> 4095 */
15535       lo = (offset >= 0
15536             ? (offset & 0xfff)
15537             : -((-offset) & 0xfff));
15538
15539       /* Corner case, if lo is the max offset then we would be out of range
15540          once we have added the additional 1 below, so bump the msb into the
15541          pre-loading insn(s).  */
15542       if (lo == 4095)
15543         lo &= 0x7ff;
15544
15545       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15546              ^ (HOST_WIDE_INT) 0x80000000)
15547             - (HOST_WIDE_INT) 0x80000000);
15548
15549       gcc_assert (hi + lo == offset);
15550
15551       if (hi != 0)
15552         {
15553           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15554
15555           /* Be careful not to destroy OUTVAL.  */
15556           if (reg_overlap_mentioned_p (base_plus, outval))
15557             {
15558               /* Updating base_plus might destroy outval, see if we
15559                  can swap the scratch and base_plus.  */
15560               if (!reg_overlap_mentioned_p (scratch, outval))
15561                 {
15562                   rtx tmp = scratch;
15563                   scratch = base_plus;
15564                   base_plus = tmp;
15565                 }
15566               else
15567                 {
15568                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15569
15570                   /* Be conservative and copy outval into scratch now,
15571                      this should only be necessary if outval is a
15572                      subreg of something larger than a word.  */
15573                   /* XXX Might this clobber base?  I can't see how it
15574                      can, since scratch is known to overlap with
15575                      outval.  */
15576                   emit_insn (gen_movhi (scratch_hi, outval));
15577                   outval = scratch_hi;
15578                 }
15579             }
15580
15581           /* Get the base address; addsi3 knows how to handle constants
15582              that require more than one insn.  */
15583           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15584           base = base_plus;
15585           offset = lo;
15586         }
15587     }
15588
15589   if (BYTES_BIG_ENDIAN)
15590     {
15591       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15592                                          plus_constant (Pmode, base,
15593                                                         offset + 1)),
15594                             gen_lowpart (QImode, outval)));
15595       emit_insn (gen_lshrsi3 (scratch,
15596                               gen_rtx_SUBREG (SImode, outval, 0),
15597                               GEN_INT (8)));
15598       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15599                                                                 offset)),
15600                             gen_lowpart (QImode, scratch)));
15601     }
15602   else
15603     {
15604       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15605                                                                 offset)),
15606                             gen_lowpart (QImode, outval)));
15607       emit_insn (gen_lshrsi3 (scratch,
15608                               gen_rtx_SUBREG (SImode, outval, 0),
15609                               GEN_INT (8)));
15610       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15611                                          plus_constant (Pmode, base,
15612                                                         offset + 1)),
15613                             gen_lowpart (QImode, scratch)));
15614     }
15615 }
15616
15617 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15618    (padded to the size of a word) should be passed in a register.  */
15619
15620 static bool
15621 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15622 {
15623   if (TARGET_AAPCS_BASED)
15624     return must_pass_in_stack_var_size (mode, type);
15625   else
15626     return must_pass_in_stack_var_size_or_pad (mode, type);
15627 }
15628
15629
15630 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15631    Return true if an argument passed on the stack should be padded upwards,
15632    i.e. if the least-significant byte has useful data.
15633    For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
15634    aggregate types are placed in the lowest memory address.  */
15635
15636 bool
15637 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15638 {
15639   if (!TARGET_AAPCS_BASED)
15640     return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15641
15642   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15643     return false;
15644
15645   return true;
15646 }
15647
15648
15649 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15650    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15651    register has useful data, and return the opposite if the most
15652    significant byte does.  */
15653
15654 bool
15655 arm_pad_reg_upward (machine_mode mode,
15656                     tree type, int first ATTRIBUTE_UNUSED)
15657 {
15658   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15659     {
15660       /* For AAPCS, small aggregates, small fixed-point types,
15661          and small complex types are always padded upwards.  */
15662       if (type)
15663         {
15664           if ((AGGREGATE_TYPE_P (type)
15665                || TREE_CODE (type) == COMPLEX_TYPE
15666                || FIXED_POINT_TYPE_P (type))
15667               && int_size_in_bytes (type) <= 4)
15668             return true;
15669         }
15670       else
15671         {
15672           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15673               && GET_MODE_SIZE (mode) <= 4)
15674             return true;
15675         }
15676     }
15677
15678   /* Otherwise, use default padding.  */
15679   return !BYTES_BIG_ENDIAN;
15680 }
15681
15682 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15683    assuming that the address in the base register is word aligned.  */
15684 bool
15685 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15686 {
15687   HOST_WIDE_INT max_offset;
15688
15689   /* Offset must be a multiple of 4 in Thumb mode.  */
15690   if (TARGET_THUMB2 && ((offset & 3) != 0))
15691     return false;
15692
15693   if (TARGET_THUMB2)
15694     max_offset = 1020;
15695   else if (TARGET_ARM)
15696     max_offset = 255;
15697   else
15698     return false;
15699
15700   return ((offset <= max_offset) && (offset >= -max_offset));
15701 }
15702
15703 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15704    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15705    Assumes that the address in the base register RN is word aligned.  Pattern
15706    guarantees that both memory accesses use the same base register,
15707    the offsets are constants within the range, and the gap between the offsets is 4.
15708    If preload complete then check that registers are legal.  WBACK indicates whether
15709    address is updated.  LOAD indicates whether memory access is load or store.  */
15710 bool
15711 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15712                        bool wback, bool load)
15713 {
15714   unsigned int t, t2, n;
15715
15716   if (!reload_completed)
15717     return true;
15718
15719   if (!offset_ok_for_ldrd_strd (offset))
15720     return false;
15721
15722   t = REGNO (rt);
15723   t2 = REGNO (rt2);
15724   n = REGNO (rn);
15725
15726   if ((TARGET_THUMB2)
15727       && ((wback && (n == t || n == t2))
15728           || (t == SP_REGNUM)
15729           || (t == PC_REGNUM)
15730           || (t2 == SP_REGNUM)
15731           || (t2 == PC_REGNUM)
15732           || (!load && (n == PC_REGNUM))
15733           || (load && (t == t2))
15734           /* Triggers Cortex-M3 LDRD errata.  */
15735           || (!wback && load && fix_cm3_ldrd && (n == t))))
15736     return false;
15737
15738   if ((TARGET_ARM)
15739       && ((wback && (n == t || n == t2))
15740           || (t2 == PC_REGNUM)
15741           || (t % 2 != 0)   /* First destination register is not even.  */
15742           || (t2 != t + 1)
15743           /* PC can be used as base register (for offset addressing only),
15744              but it is depricated.  */
15745           || (n == PC_REGNUM)))
15746     return false;
15747
15748   return true;
15749 }
15750
15751 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15752    operand MEM's address contains an immediate offset from the base
15753    register and has no side effects, in which case it sets BASE and
15754    OFFSET accordingly.  */
15755 static bool
15756 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15757 {
15758   rtx addr;
15759
15760   gcc_assert (base != NULL && offset != NULL);
15761
15762   /* TODO: Handle more general memory operand patterns, such as
15763      PRE_DEC and PRE_INC.  */
15764
15765   if (side_effects_p (mem))
15766     return false;
15767
15768   /* Can't deal with subregs.  */
15769   if (GET_CODE (mem) == SUBREG)
15770     return false;
15771
15772   gcc_assert (MEM_P (mem));
15773
15774   *offset = const0_rtx;
15775
15776   addr = XEXP (mem, 0);
15777
15778   /* If addr isn't valid for DImode, then we can't handle it.  */
15779   if (!arm_legitimate_address_p (DImode, addr,
15780                                  reload_in_progress || reload_completed))
15781     return false;
15782
15783   if (REG_P (addr))
15784     {
15785       *base = addr;
15786       return true;
15787     }
15788   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15789     {
15790       *base = XEXP (addr, 0);
15791       *offset = XEXP (addr, 1);
15792       return (REG_P (*base) && CONST_INT_P (*offset));
15793     }
15794
15795   return false;
15796 }
15797
15798 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15799
15800 /* Called from a peephole2 to replace two word-size accesses with a
15801    single LDRD/STRD instruction.  Returns true iff we can generate a
15802    new instruction sequence.  That is, both accesses use the same base
15803    register and the gap between constant offsets is 4.  This function
15804    may reorder its operands to match ldrd/strd RTL templates.
15805    OPERANDS are the operands found by the peephole matcher;
15806    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15807    corresponding memory operands.  LOAD indicaates whether the access
15808    is load or store.  CONST_STORE indicates a store of constant
15809    integer values held in OPERANDS[4,5] and assumes that the pattern
15810    is of length 4 insn, for the purpose of checking dead registers.
15811    COMMUTE indicates that register operands may be reordered.  */
15812 bool
15813 gen_operands_ldrd_strd (rtx *operands, bool load,
15814                         bool const_store, bool commute)
15815 {
15816   int nops = 2;
15817   HOST_WIDE_INT offsets[2], offset;
15818   rtx base = NULL_RTX;
15819   rtx cur_base, cur_offset, tmp;
15820   int i, gap;
15821   HARD_REG_SET regset;
15822
15823   gcc_assert (!const_store || !load);
15824   /* Check that the memory references are immediate offsets from the
15825      same base register.  Extract the base register, the destination
15826      registers, and the corresponding memory offsets.  */
15827   for (i = 0; i < nops; i++)
15828     {
15829       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15830         return false;
15831
15832       if (i == 0)
15833         base = cur_base;
15834       else if (REGNO (base) != REGNO (cur_base))
15835         return false;
15836
15837       offsets[i] = INTVAL (cur_offset);
15838       if (GET_CODE (operands[i]) == SUBREG)
15839         {
15840           tmp = SUBREG_REG (operands[i]);
15841           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15842           operands[i] = tmp;
15843         }
15844     }
15845
15846   /* Make sure there is no dependency between the individual loads.  */
15847   if (load && REGNO (operands[0]) == REGNO (base))
15848     return false; /* RAW */
15849
15850   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15851     return false; /* WAW */
15852
15853   /* If the same input register is used in both stores
15854      when storing different constants, try to find a free register.
15855      For example, the code
15856         mov r0, 0
15857         str r0, [r2]
15858         mov r0, 1
15859         str r0, [r2, #4]
15860      can be transformed into
15861         mov r1, 0
15862         strd r1, r0, [r2]
15863      in Thumb mode assuming that r1 is free.  */
15864   if (const_store
15865       && REGNO (operands[0]) == REGNO (operands[1])
15866       && INTVAL (operands[4]) != INTVAL (operands[5]))
15867     {
15868     if (TARGET_THUMB2)
15869       {
15870         CLEAR_HARD_REG_SET (regset);
15871         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15872         if (tmp == NULL_RTX)
15873           return false;
15874
15875         /* Use the new register in the first load to ensure that
15876            if the original input register is not dead after peephole,
15877            then it will have the correct constant value.  */
15878         operands[0] = tmp;
15879       }
15880     else if (TARGET_ARM)
15881       {
15882         return false;
15883         int regno = REGNO (operands[0]);
15884         if (!peep2_reg_dead_p (4, operands[0]))
15885           {
15886             /* When the input register is even and is not dead after the
15887                pattern, it has to hold the second constant but we cannot
15888                form a legal STRD in ARM mode with this register as the second
15889                register.  */
15890             if (regno % 2 == 0)
15891               return false;
15892
15893             /* Is regno-1 free? */
15894             SET_HARD_REG_SET (regset);
15895             CLEAR_HARD_REG_BIT(regset, regno - 1);
15896             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15897             if (tmp == NULL_RTX)
15898               return false;
15899
15900             operands[0] = tmp;
15901           }
15902         else
15903           {
15904             /* Find a DImode register.  */
15905             CLEAR_HARD_REG_SET (regset);
15906             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15907             if (tmp != NULL_RTX)
15908               {
15909                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15910                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15911               }
15912             else
15913               {
15914                 /* Can we use the input register to form a DI register?  */
15915                 SET_HARD_REG_SET (regset);
15916                 CLEAR_HARD_REG_BIT(regset,
15917                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15918                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15919                 if (tmp == NULL_RTX)
15920                   return false;
15921                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15922               }
15923           }
15924
15925         gcc_assert (operands[0] != NULL_RTX);
15926         gcc_assert (operands[1] != NULL_RTX);
15927         gcc_assert (REGNO (operands[0]) % 2 == 0);
15928         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15929       }
15930     }
15931
15932   /* Make sure the instructions are ordered with lower memory access first.  */
15933   if (offsets[0] > offsets[1])
15934     {
15935       gap = offsets[0] - offsets[1];
15936       offset = offsets[1];
15937
15938       /* Swap the instructions such that lower memory is accessed first.  */
15939       SWAP_RTX (operands[0], operands[1]);
15940       SWAP_RTX (operands[2], operands[3]);
15941       if (const_store)
15942         SWAP_RTX (operands[4], operands[5]);
15943     }
15944   else
15945     {
15946       gap = offsets[1] - offsets[0];
15947       offset = offsets[0];
15948     }
15949
15950   /* Make sure accesses are to consecutive memory locations.  */
15951   if (gap != 4)
15952     return false;
15953
15954   /* Make sure we generate legal instructions.  */
15955   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15956                              false, load))
15957     return true;
15958
15959   /* In Thumb state, where registers are almost unconstrained, there
15960      is little hope to fix it.  */
15961   if (TARGET_THUMB2)
15962     return false;
15963
15964   if (load && commute)
15965     {
15966       /* Try reordering registers.  */
15967       SWAP_RTX (operands[0], operands[1]);
15968       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15969                                  false, load))
15970         return true;
15971     }
15972
15973   if (const_store)
15974     {
15975       /* If input registers are dead after this pattern, they can be
15976          reordered or replaced by other registers that are free in the
15977          current pattern.  */
15978       if (!peep2_reg_dead_p (4, operands[0])
15979           || !peep2_reg_dead_p (4, operands[1]))
15980         return false;
15981
15982       /* Try to reorder the input registers.  */
15983       /* For example, the code
15984            mov r0, 0
15985            mov r1, 1
15986            str r1, [r2]
15987            str r0, [r2, #4]
15988          can be transformed into
15989            mov r1, 0
15990            mov r0, 1
15991            strd r0, [r2]
15992       */
15993       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15994                                   false, false))
15995         {
15996           SWAP_RTX (operands[0], operands[1]);
15997           return true;
15998         }
15999
16000       /* Try to find a free DI register.  */
16001       CLEAR_HARD_REG_SET (regset);
16002       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16003       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16004       while (true)
16005         {
16006           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16007           if (tmp == NULL_RTX)
16008             return false;
16009
16010           /* DREG must be an even-numbered register in DImode.
16011              Split it into SI registers.  */
16012           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16013           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16014           gcc_assert (operands[0] != NULL_RTX);
16015           gcc_assert (operands[1] != NULL_RTX);
16016           gcc_assert (REGNO (operands[0]) % 2 == 0);
16017           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16018
16019           return (operands_ok_ldrd_strd (operands[0], operands[1],
16020                                          base, offset,
16021                                          false, load));
16022         }
16023     }
16024
16025   return false;
16026 }
16027 #undef SWAP_RTX
16028
16029
16030
16031 \f
16032 /* Print a symbolic form of X to the debug file, F.  */
16033 static void
16034 arm_print_value (FILE *f, rtx x)
16035 {
16036   switch (GET_CODE (x))
16037     {
16038     case CONST_INT:
16039       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16040       return;
16041
16042     case CONST_DOUBLE:
16043       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16044       return;
16045
16046     case CONST_VECTOR:
16047       {
16048         int i;
16049
16050         fprintf (f, "<");
16051         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16052           {
16053             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16054             if (i < (CONST_VECTOR_NUNITS (x) - 1))
16055               fputc (',', f);
16056           }
16057         fprintf (f, ">");
16058       }
16059       return;
16060
16061     case CONST_STRING:
16062       fprintf (f, "\"%s\"", XSTR (x, 0));
16063       return;
16064
16065     case SYMBOL_REF:
16066       fprintf (f, "`%s'", XSTR (x, 0));
16067       return;
16068
16069     case LABEL_REF:
16070       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16071       return;
16072
16073     case CONST:
16074       arm_print_value (f, XEXP (x, 0));
16075       return;
16076
16077     case PLUS:
16078       arm_print_value (f, XEXP (x, 0));
16079       fprintf (f, "+");
16080       arm_print_value (f, XEXP (x, 1));
16081       return;
16082
16083     case PC:
16084       fprintf (f, "pc");
16085       return;
16086
16087     default:
16088       fprintf (f, "????");
16089       return;
16090     }
16091 }
16092 \f
16093 /* Routines for manipulation of the constant pool.  */
16094
16095 /* Arm instructions cannot load a large constant directly into a
16096    register; they have to come from a pc relative load.  The constant
16097    must therefore be placed in the addressable range of the pc
16098    relative load.  Depending on the precise pc relative load
16099    instruction the range is somewhere between 256 bytes and 4k.  This
16100    means that we often have to dump a constant inside a function, and
16101    generate code to branch around it.
16102
16103    It is important to minimize this, since the branches will slow
16104    things down and make the code larger.
16105
16106    Normally we can hide the table after an existing unconditional
16107    branch so that there is no interruption of the flow, but in the
16108    worst case the code looks like this:
16109
16110         ldr     rn, L1
16111         ...
16112         b       L2
16113         align
16114         L1:     .long value
16115         L2:
16116         ...
16117
16118         ldr     rn, L3
16119         ...
16120         b       L4
16121         align
16122         L3:     .long value
16123         L4:
16124         ...
16125
16126    We fix this by performing a scan after scheduling, which notices
16127    which instructions need to have their operands fetched from the
16128    constant table and builds the table.
16129
16130    The algorithm starts by building a table of all the constants that
16131    need fixing up and all the natural barriers in the function (places
16132    where a constant table can be dropped without breaking the flow).
16133    For each fixup we note how far the pc-relative replacement will be
16134    able to reach and the offset of the instruction into the function.
16135
16136    Having built the table we then group the fixes together to form
16137    tables that are as large as possible (subject to addressing
16138    constraints) and emit each table of constants after the last
16139    barrier that is within range of all the instructions in the group.
16140    If a group does not contain a barrier, then we forcibly create one
16141    by inserting a jump instruction into the flow.  Once the table has
16142    been inserted, the insns are then modified to reference the
16143    relevant entry in the pool.
16144
16145    Possible enhancements to the algorithm (not implemented) are:
16146
16147    1) For some processors and object formats, there may be benefit in
16148    aligning the pools to the start of cache lines; this alignment
16149    would need to be taken into account when calculating addressability
16150    of a pool.  */
16151
16152 /* These typedefs are located at the start of this file, so that
16153    they can be used in the prototypes there.  This comment is to
16154    remind readers of that fact so that the following structures
16155    can be understood more easily.
16156
16157      typedef struct minipool_node    Mnode;
16158      typedef struct minipool_fixup   Mfix;  */
16159
16160 struct minipool_node
16161 {
16162   /* Doubly linked chain of entries.  */
16163   Mnode * next;
16164   Mnode * prev;
16165   /* The maximum offset into the code that this entry can be placed.  While
16166      pushing fixes for forward references, all entries are sorted in order
16167      of increasing max_address.  */
16168   HOST_WIDE_INT max_address;
16169   /* Similarly for an entry inserted for a backwards ref.  */
16170   HOST_WIDE_INT min_address;
16171   /* The number of fixes referencing this entry.  This can become zero
16172      if we "unpush" an entry.  In this case we ignore the entry when we
16173      come to emit the code.  */
16174   int refcount;
16175   /* The offset from the start of the minipool.  */
16176   HOST_WIDE_INT offset;
16177   /* The value in table.  */
16178   rtx value;
16179   /* The mode of value.  */
16180   machine_mode mode;
16181   /* The size of the value.  With iWMMXt enabled
16182      sizes > 4 also imply an alignment of 8-bytes.  */
16183   int fix_size;
16184 };
16185
16186 struct minipool_fixup
16187 {
16188   Mfix *            next;
16189   rtx_insn *        insn;
16190   HOST_WIDE_INT     address;
16191   rtx *             loc;
16192   machine_mode mode;
16193   int               fix_size;
16194   rtx               value;
16195   Mnode *           minipool;
16196   HOST_WIDE_INT     forwards;
16197   HOST_WIDE_INT     backwards;
16198 };
16199
16200 /* Fixes less than a word need padding out to a word boundary.  */
16201 #define MINIPOOL_FIX_SIZE(mode) \
16202   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16203
16204 static Mnode *  minipool_vector_head;
16205 static Mnode *  minipool_vector_tail;
16206 static rtx_code_label   *minipool_vector_label;
16207 static int      minipool_pad;
16208
16209 /* The linked list of all minipool fixes required for this function.  */
16210 Mfix *          minipool_fix_head;
16211 Mfix *          minipool_fix_tail;
16212 /* The fix entry for the current minipool, once it has been placed.  */
16213 Mfix *          minipool_barrier;
16214
16215 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16216 #define JUMP_TABLES_IN_TEXT_SECTION 0
16217 #endif
16218
16219 static HOST_WIDE_INT
16220 get_jump_table_size (rtx_jump_table_data *insn)
16221 {
16222   /* ADDR_VECs only take room if read-only data does into the text
16223      section.  */
16224   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16225     {
16226       rtx body = PATTERN (insn);
16227       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16228       HOST_WIDE_INT size;
16229       HOST_WIDE_INT modesize;
16230
16231       modesize = GET_MODE_SIZE (GET_MODE (body));
16232       size = modesize * XVECLEN (body, elt);
16233       switch (modesize)
16234         {
16235         case 1:
16236           /* Round up size  of TBB table to a halfword boundary.  */
16237           size = (size + 1) & ~(HOST_WIDE_INT)1;
16238           break;
16239         case 2:
16240           /* No padding necessary for TBH.  */
16241           break;
16242         case 4:
16243           /* Add two bytes for alignment on Thumb.  */
16244           if (TARGET_THUMB)
16245             size += 2;
16246           break;
16247         default:
16248           gcc_unreachable ();
16249         }
16250       return size;
16251     }
16252
16253   return 0;
16254 }
16255
16256 /* Return the maximum amount of padding that will be inserted before
16257    label LABEL.  */
16258
16259 static HOST_WIDE_INT
16260 get_label_padding (rtx label)
16261 {
16262   HOST_WIDE_INT align, min_insn_size;
16263
16264   align = 1 << label_to_alignment (label);
16265   min_insn_size = TARGET_THUMB ? 2 : 4;
16266   return align > min_insn_size ? align - min_insn_size : 0;
16267 }
16268
16269 /* Move a minipool fix MP from its current location to before MAX_MP.
16270    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16271    constraints may need updating.  */
16272 static Mnode *
16273 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16274                                HOST_WIDE_INT max_address)
16275 {
16276   /* The code below assumes these are different.  */
16277   gcc_assert (mp != max_mp);
16278
16279   if (max_mp == NULL)
16280     {
16281       if (max_address < mp->max_address)
16282         mp->max_address = max_address;
16283     }
16284   else
16285     {
16286       if (max_address > max_mp->max_address - mp->fix_size)
16287         mp->max_address = max_mp->max_address - mp->fix_size;
16288       else
16289         mp->max_address = max_address;
16290
16291       /* Unlink MP from its current position.  Since max_mp is non-null,
16292        mp->prev must be non-null.  */
16293       mp->prev->next = mp->next;
16294       if (mp->next != NULL)
16295         mp->next->prev = mp->prev;
16296       else
16297         minipool_vector_tail = mp->prev;
16298
16299       /* Re-insert it before MAX_MP.  */
16300       mp->next = max_mp;
16301       mp->prev = max_mp->prev;
16302       max_mp->prev = mp;
16303
16304       if (mp->prev != NULL)
16305         mp->prev->next = mp;
16306       else
16307         minipool_vector_head = mp;
16308     }
16309
16310   /* Save the new entry.  */
16311   max_mp = mp;
16312
16313   /* Scan over the preceding entries and adjust their addresses as
16314      required.  */
16315   while (mp->prev != NULL
16316          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16317     {
16318       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16319       mp = mp->prev;
16320     }
16321
16322   return max_mp;
16323 }
16324
16325 /* Add a constant to the minipool for a forward reference.  Returns the
16326    node added or NULL if the constant will not fit in this pool.  */
16327 static Mnode *
16328 add_minipool_forward_ref (Mfix *fix)
16329 {
16330   /* If set, max_mp is the first pool_entry that has a lower
16331      constraint than the one we are trying to add.  */
16332   Mnode *       max_mp = NULL;
16333   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16334   Mnode *       mp;
16335
16336   /* If the minipool starts before the end of FIX->INSN then this FIX
16337      can not be placed into the current pool.  Furthermore, adding the
16338      new constant pool entry may cause the pool to start FIX_SIZE bytes
16339      earlier.  */
16340   if (minipool_vector_head &&
16341       (fix->address + get_attr_length (fix->insn)
16342        >= minipool_vector_head->max_address - fix->fix_size))
16343     return NULL;
16344
16345   /* Scan the pool to see if a constant with the same value has
16346      already been added.  While we are doing this, also note the
16347      location where we must insert the constant if it doesn't already
16348      exist.  */
16349   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16350     {
16351       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16352           && fix->mode == mp->mode
16353           && (!LABEL_P (fix->value)
16354               || (CODE_LABEL_NUMBER (fix->value)
16355                   == CODE_LABEL_NUMBER (mp->value)))
16356           && rtx_equal_p (fix->value, mp->value))
16357         {
16358           /* More than one fix references this entry.  */
16359           mp->refcount++;
16360           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16361         }
16362
16363       /* Note the insertion point if necessary.  */
16364       if (max_mp == NULL
16365           && mp->max_address > max_address)
16366         max_mp = mp;
16367
16368       /* If we are inserting an 8-bytes aligned quantity and
16369          we have not already found an insertion point, then
16370          make sure that all such 8-byte aligned quantities are
16371          placed at the start of the pool.  */
16372       if (ARM_DOUBLEWORD_ALIGN
16373           && max_mp == NULL
16374           && fix->fix_size >= 8
16375           && mp->fix_size < 8)
16376         {
16377           max_mp = mp;
16378           max_address = mp->max_address;
16379         }
16380     }
16381
16382   /* The value is not currently in the minipool, so we need to create
16383      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16384      the end of the list since the placement is less constrained than
16385      any existing entry.  Otherwise, we insert the new fix before
16386      MAX_MP and, if necessary, adjust the constraints on the other
16387      entries.  */
16388   mp = XNEW (Mnode);
16389   mp->fix_size = fix->fix_size;
16390   mp->mode = fix->mode;
16391   mp->value = fix->value;
16392   mp->refcount = 1;
16393   /* Not yet required for a backwards ref.  */
16394   mp->min_address = -65536;
16395
16396   if (max_mp == NULL)
16397     {
16398       mp->max_address = max_address;
16399       mp->next = NULL;
16400       mp->prev = minipool_vector_tail;
16401
16402       if (mp->prev == NULL)
16403         {
16404           minipool_vector_head = mp;
16405           minipool_vector_label = gen_label_rtx ();
16406         }
16407       else
16408         mp->prev->next = mp;
16409
16410       minipool_vector_tail = mp;
16411     }
16412   else
16413     {
16414       if (max_address > max_mp->max_address - mp->fix_size)
16415         mp->max_address = max_mp->max_address - mp->fix_size;
16416       else
16417         mp->max_address = max_address;
16418
16419       mp->next = max_mp;
16420       mp->prev = max_mp->prev;
16421       max_mp->prev = mp;
16422       if (mp->prev != NULL)
16423         mp->prev->next = mp;
16424       else
16425         minipool_vector_head = mp;
16426     }
16427
16428   /* Save the new entry.  */
16429   max_mp = mp;
16430
16431   /* Scan over the preceding entries and adjust their addresses as
16432      required.  */
16433   while (mp->prev != NULL
16434          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16435     {
16436       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16437       mp = mp->prev;
16438     }
16439
16440   return max_mp;
16441 }
16442
16443 static Mnode *
16444 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16445                                 HOST_WIDE_INT  min_address)
16446 {
16447   HOST_WIDE_INT offset;
16448
16449   /* The code below assumes these are different.  */
16450   gcc_assert (mp != min_mp);
16451
16452   if (min_mp == NULL)
16453     {
16454       if (min_address > mp->min_address)
16455         mp->min_address = min_address;
16456     }
16457   else
16458     {
16459       /* We will adjust this below if it is too loose.  */
16460       mp->min_address = min_address;
16461
16462       /* Unlink MP from its current position.  Since min_mp is non-null,
16463          mp->next must be non-null.  */
16464       mp->next->prev = mp->prev;
16465       if (mp->prev != NULL)
16466         mp->prev->next = mp->next;
16467       else
16468         minipool_vector_head = mp->next;
16469
16470       /* Reinsert it after MIN_MP.  */
16471       mp->prev = min_mp;
16472       mp->next = min_mp->next;
16473       min_mp->next = mp;
16474       if (mp->next != NULL)
16475         mp->next->prev = mp;
16476       else
16477         minipool_vector_tail = mp;
16478     }
16479
16480   min_mp = mp;
16481
16482   offset = 0;
16483   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16484     {
16485       mp->offset = offset;
16486       if (mp->refcount > 0)
16487         offset += mp->fix_size;
16488
16489       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16490         mp->next->min_address = mp->min_address + mp->fix_size;
16491     }
16492
16493   return min_mp;
16494 }
16495
16496 /* Add a constant to the minipool for a backward reference.  Returns the
16497    node added or NULL if the constant will not fit in this pool.
16498
16499    Note that the code for insertion for a backwards reference can be
16500    somewhat confusing because the calculated offsets for each fix do
16501    not take into account the size of the pool (which is still under
16502    construction.  */
16503 static Mnode *
16504 add_minipool_backward_ref (Mfix *fix)
16505 {
16506   /* If set, min_mp is the last pool_entry that has a lower constraint
16507      than the one we are trying to add.  */
16508   Mnode *min_mp = NULL;
16509   /* This can be negative, since it is only a constraint.  */
16510   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16511   Mnode *mp;
16512
16513   /* If we can't reach the current pool from this insn, or if we can't
16514      insert this entry at the end of the pool without pushing other
16515      fixes out of range, then we don't try.  This ensures that we
16516      can't fail later on.  */
16517   if (min_address >= minipool_barrier->address
16518       || (minipool_vector_tail->min_address + fix->fix_size
16519           >= minipool_barrier->address))
16520     return NULL;
16521
16522   /* Scan the pool to see if a constant with the same value has
16523      already been added.  While we are doing this, also note the
16524      location where we must insert the constant if it doesn't already
16525      exist.  */
16526   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16527     {
16528       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16529           && fix->mode == mp->mode
16530           && (!LABEL_P (fix->value)
16531               || (CODE_LABEL_NUMBER (fix->value)
16532                   == CODE_LABEL_NUMBER (mp->value)))
16533           && rtx_equal_p (fix->value, mp->value)
16534           /* Check that there is enough slack to move this entry to the
16535              end of the table (this is conservative).  */
16536           && (mp->max_address
16537               > (minipool_barrier->address
16538                  + minipool_vector_tail->offset
16539                  + minipool_vector_tail->fix_size)))
16540         {
16541           mp->refcount++;
16542           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16543         }
16544
16545       if (min_mp != NULL)
16546         mp->min_address += fix->fix_size;
16547       else
16548         {
16549           /* Note the insertion point if necessary.  */
16550           if (mp->min_address < min_address)
16551             {
16552               /* For now, we do not allow the insertion of 8-byte alignment
16553                  requiring nodes anywhere but at the start of the pool.  */
16554               if (ARM_DOUBLEWORD_ALIGN
16555                   && fix->fix_size >= 8 && mp->fix_size < 8)
16556                 return NULL;
16557               else
16558                 min_mp = mp;
16559             }
16560           else if (mp->max_address
16561                    < minipool_barrier->address + mp->offset + fix->fix_size)
16562             {
16563               /* Inserting before this entry would push the fix beyond
16564                  its maximum address (which can happen if we have
16565                  re-located a forwards fix); force the new fix to come
16566                  after it.  */
16567               if (ARM_DOUBLEWORD_ALIGN
16568                   && fix->fix_size >= 8 && mp->fix_size < 8)
16569                 return NULL;
16570               else
16571                 {
16572                   min_mp = mp;
16573                   min_address = mp->min_address + fix->fix_size;
16574                 }
16575             }
16576           /* Do not insert a non-8-byte aligned quantity before 8-byte
16577              aligned quantities.  */
16578           else if (ARM_DOUBLEWORD_ALIGN
16579                    && fix->fix_size < 8
16580                    && mp->fix_size >= 8)
16581             {
16582               min_mp = mp;
16583               min_address = mp->min_address + fix->fix_size;
16584             }
16585         }
16586     }
16587
16588   /* We need to create a new entry.  */
16589   mp = XNEW (Mnode);
16590   mp->fix_size = fix->fix_size;
16591   mp->mode = fix->mode;
16592   mp->value = fix->value;
16593   mp->refcount = 1;
16594   mp->max_address = minipool_barrier->address + 65536;
16595
16596   mp->min_address = min_address;
16597
16598   if (min_mp == NULL)
16599     {
16600       mp->prev = NULL;
16601       mp->next = minipool_vector_head;
16602
16603       if (mp->next == NULL)
16604         {
16605           minipool_vector_tail = mp;
16606           minipool_vector_label = gen_label_rtx ();
16607         }
16608       else
16609         mp->next->prev = mp;
16610
16611       minipool_vector_head = mp;
16612     }
16613   else
16614     {
16615       mp->next = min_mp->next;
16616       mp->prev = min_mp;
16617       min_mp->next = mp;
16618
16619       if (mp->next != NULL)
16620         mp->next->prev = mp;
16621       else
16622         minipool_vector_tail = mp;
16623     }
16624
16625   /* Save the new entry.  */
16626   min_mp = mp;
16627
16628   if (mp->prev)
16629     mp = mp->prev;
16630   else
16631     mp->offset = 0;
16632
16633   /* Scan over the following entries and adjust their offsets.  */
16634   while (mp->next != NULL)
16635     {
16636       if (mp->next->min_address < mp->min_address + mp->fix_size)
16637         mp->next->min_address = mp->min_address + mp->fix_size;
16638
16639       if (mp->refcount)
16640         mp->next->offset = mp->offset + mp->fix_size;
16641       else
16642         mp->next->offset = mp->offset;
16643
16644       mp = mp->next;
16645     }
16646
16647   return min_mp;
16648 }
16649
16650 static void
16651 assign_minipool_offsets (Mfix *barrier)
16652 {
16653   HOST_WIDE_INT offset = 0;
16654   Mnode *mp;
16655
16656   minipool_barrier = barrier;
16657
16658   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16659     {
16660       mp->offset = offset;
16661
16662       if (mp->refcount > 0)
16663         offset += mp->fix_size;
16664     }
16665 }
16666
16667 /* Output the literal table */
16668 static void
16669 dump_minipool (rtx_insn *scan)
16670 {
16671   Mnode * mp;
16672   Mnode * nmp;
16673   int align64 = 0;
16674
16675   if (ARM_DOUBLEWORD_ALIGN)
16676     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16677       if (mp->refcount > 0 && mp->fix_size >= 8)
16678         {
16679           align64 = 1;
16680           break;
16681         }
16682
16683   if (dump_file)
16684     fprintf (dump_file,
16685              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16686              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16687
16688   scan = emit_label_after (gen_label_rtx (), scan);
16689   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16690   scan = emit_label_after (minipool_vector_label, scan);
16691
16692   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16693     {
16694       if (mp->refcount > 0)
16695         {
16696           if (dump_file)
16697             {
16698               fprintf (dump_file,
16699                        ";;  Offset %u, min %ld, max %ld ",
16700                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16701                        (unsigned long) mp->max_address);
16702               arm_print_value (dump_file, mp->value);
16703               fputc ('\n', dump_file);
16704             }
16705
16706           switch (mp->fix_size)
16707             {
16708 #ifdef HAVE_consttable_1
16709             case 1:
16710               scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16711               break;
16712
16713 #endif
16714 #ifdef HAVE_consttable_2
16715             case 2:
16716               scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16717               break;
16718
16719 #endif
16720 #ifdef HAVE_consttable_4
16721             case 4:
16722               scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16723               break;
16724
16725 #endif
16726 #ifdef HAVE_consttable_8
16727             case 8:
16728               scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16729               break;
16730
16731 #endif
16732 #ifdef HAVE_consttable_16
16733             case 16:
16734               scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16735               break;
16736
16737 #endif
16738             default:
16739               gcc_unreachable ();
16740             }
16741         }
16742
16743       nmp = mp->next;
16744       free (mp);
16745     }
16746
16747   minipool_vector_head = minipool_vector_tail = NULL;
16748   scan = emit_insn_after (gen_consttable_end (), scan);
16749   scan = emit_barrier_after (scan);
16750 }
16751
16752 /* Return the cost of forcibly inserting a barrier after INSN.  */
16753 static int
16754 arm_barrier_cost (rtx insn)
16755 {
16756   /* Basing the location of the pool on the loop depth is preferable,
16757      but at the moment, the basic block information seems to be
16758      corrupt by this stage of the compilation.  */
16759   int base_cost = 50;
16760   rtx next = next_nonnote_insn (insn);
16761
16762   if (next != NULL && LABEL_P (next))
16763     base_cost -= 20;
16764
16765   switch (GET_CODE (insn))
16766     {
16767     case CODE_LABEL:
16768       /* It will always be better to place the table before the label, rather
16769          than after it.  */
16770       return 50;
16771
16772     case INSN:
16773     case CALL_INSN:
16774       return base_cost;
16775
16776     case JUMP_INSN:
16777       return base_cost - 10;
16778
16779     default:
16780       return base_cost + 10;
16781     }
16782 }
16783
16784 /* Find the best place in the insn stream in the range
16785    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16786    Create the barrier by inserting a jump and add a new fix entry for
16787    it.  */
16788 static Mfix *
16789 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16790 {
16791   HOST_WIDE_INT count = 0;
16792   rtx_barrier *barrier;
16793   rtx_insn *from = fix->insn;
16794   /* The instruction after which we will insert the jump.  */
16795   rtx_insn *selected = NULL;
16796   int selected_cost;
16797   /* The address at which the jump instruction will be placed.  */
16798   HOST_WIDE_INT selected_address;
16799   Mfix * new_fix;
16800   HOST_WIDE_INT max_count = max_address - fix->address;
16801   rtx_code_label *label = gen_label_rtx ();
16802
16803   selected_cost = arm_barrier_cost (from);
16804   selected_address = fix->address;
16805
16806   while (from && count < max_count)
16807     {
16808       rtx_jump_table_data *tmp;
16809       int new_cost;
16810
16811       /* This code shouldn't have been called if there was a natural barrier
16812          within range.  */
16813       gcc_assert (!BARRIER_P (from));
16814
16815       /* Count the length of this insn.  This must stay in sync with the
16816          code that pushes minipool fixes.  */
16817       if (LABEL_P (from))
16818         count += get_label_padding (from);
16819       else
16820         count += get_attr_length (from);
16821
16822       /* If there is a jump table, add its length.  */
16823       if (tablejump_p (from, NULL, &tmp))
16824         {
16825           count += get_jump_table_size (tmp);
16826
16827           /* Jump tables aren't in a basic block, so base the cost on
16828              the dispatch insn.  If we select this location, we will
16829              still put the pool after the table.  */
16830           new_cost = arm_barrier_cost (from);
16831
16832           if (count < max_count
16833               && (!selected || new_cost <= selected_cost))
16834             {
16835               selected = tmp;
16836               selected_cost = new_cost;
16837               selected_address = fix->address + count;
16838             }
16839
16840           /* Continue after the dispatch table.  */
16841           from = NEXT_INSN (tmp);
16842           continue;
16843         }
16844
16845       new_cost = arm_barrier_cost (from);
16846
16847       if (count < max_count
16848           && (!selected || new_cost <= selected_cost))
16849         {
16850           selected = from;
16851           selected_cost = new_cost;
16852           selected_address = fix->address + count;
16853         }
16854
16855       from = NEXT_INSN (from);
16856     }
16857
16858   /* Make sure that we found a place to insert the jump.  */
16859   gcc_assert (selected);
16860
16861   /* Make sure we do not split a call and its corresponding
16862      CALL_ARG_LOCATION note.  */
16863   if (CALL_P (selected))
16864     {
16865       rtx_insn *next = NEXT_INSN (selected);
16866       if (next && NOTE_P (next)
16867           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16868           selected = next;
16869     }
16870
16871   /* Create a new JUMP_INSN that branches around a barrier.  */
16872   from = emit_jump_insn_after (gen_jump (label), selected);
16873   JUMP_LABEL (from) = label;
16874   barrier = emit_barrier_after (from);
16875   emit_label_after (label, barrier);
16876
16877   /* Create a minipool barrier entry for the new barrier.  */
16878   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16879   new_fix->insn = barrier;
16880   new_fix->address = selected_address;
16881   new_fix->next = fix->next;
16882   fix->next = new_fix;
16883
16884   return new_fix;
16885 }
16886
16887 /* Record that there is a natural barrier in the insn stream at
16888    ADDRESS.  */
16889 static void
16890 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16891 {
16892   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16893
16894   fix->insn = insn;
16895   fix->address = address;
16896
16897   fix->next = NULL;
16898   if (minipool_fix_head != NULL)
16899     minipool_fix_tail->next = fix;
16900   else
16901     minipool_fix_head = fix;
16902
16903   minipool_fix_tail = fix;
16904 }
16905
16906 /* Record INSN, which will need fixing up to load a value from the
16907    minipool.  ADDRESS is the offset of the insn since the start of the
16908    function; LOC is a pointer to the part of the insn which requires
16909    fixing; VALUE is the constant that must be loaded, which is of type
16910    MODE.  */
16911 static void
16912 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16913                    machine_mode mode, rtx value)
16914 {
16915   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16916
16917   fix->insn = insn;
16918   fix->address = address;
16919   fix->loc = loc;
16920   fix->mode = mode;
16921   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16922   fix->value = value;
16923   fix->forwards = get_attr_pool_range (insn);
16924   fix->backwards = get_attr_neg_pool_range (insn);
16925   fix->minipool = NULL;
16926
16927   /* If an insn doesn't have a range defined for it, then it isn't
16928      expecting to be reworked by this code.  Better to stop now than
16929      to generate duff assembly code.  */
16930   gcc_assert (fix->forwards || fix->backwards);
16931
16932   /* If an entry requires 8-byte alignment then assume all constant pools
16933      require 4 bytes of padding.  Trying to do this later on a per-pool
16934      basis is awkward because existing pool entries have to be modified.  */
16935   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16936     minipool_pad = 4;
16937
16938   if (dump_file)
16939     {
16940       fprintf (dump_file,
16941                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16942                GET_MODE_NAME (mode),
16943                INSN_UID (insn), (unsigned long) address,
16944                -1 * (long)fix->backwards, (long)fix->forwards);
16945       arm_print_value (dump_file, fix->value);
16946       fprintf (dump_file, "\n");
16947     }
16948
16949   /* Add it to the chain of fixes.  */
16950   fix->next = NULL;
16951
16952   if (minipool_fix_head != NULL)
16953     minipool_fix_tail->next = fix;
16954   else
16955     minipool_fix_head = fix;
16956
16957   minipool_fix_tail = fix;
16958 }
16959
16960 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16961    Returns the number of insns needed, or 99 if we always want to synthesize
16962    the value.  */
16963 int
16964 arm_max_const_double_inline_cost ()
16965 {
16966   /* Let the value get synthesized to avoid the use of literal pools.  */
16967   if (arm_disable_literal_pool)
16968     return 99;
16969
16970   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16971 }
16972
16973 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16974    Returns the number of insns needed, or 99 if we don't know how to
16975    do it.  */
16976 int
16977 arm_const_double_inline_cost (rtx val)
16978 {
16979   rtx lowpart, highpart;
16980   machine_mode mode;
16981
16982   mode = GET_MODE (val);
16983
16984   if (mode == VOIDmode)
16985     mode = DImode;
16986
16987   gcc_assert (GET_MODE_SIZE (mode) == 8);
16988
16989   lowpart = gen_lowpart (SImode, val);
16990   highpart = gen_highpart_mode (SImode, mode, val);
16991
16992   gcc_assert (CONST_INT_P (lowpart));
16993   gcc_assert (CONST_INT_P (highpart));
16994
16995   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16996                             NULL_RTX, NULL_RTX, 0, 0)
16997           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16998                               NULL_RTX, NULL_RTX, 0, 0));
16999 }
17000
17001 /* Cost of loading a SImode constant.  */
17002 static inline int
17003 arm_const_inline_cost (enum rtx_code code, rtx val)
17004 {
17005   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17006                            NULL_RTX, NULL_RTX, 1, 0);
17007 }
17008
17009 /* Return true if it is worthwhile to split a 64-bit constant into two
17010    32-bit operations.  This is the case if optimizing for size, or
17011    if we have load delay slots, or if one 32-bit part can be done with
17012    a single data operation.  */
17013 bool
17014 arm_const_double_by_parts (rtx val)
17015 {
17016   machine_mode mode = GET_MODE (val);
17017   rtx part;
17018
17019   if (optimize_size || arm_ld_sched)
17020     return true;
17021
17022   if (mode == VOIDmode)
17023     mode = DImode;
17024
17025   part = gen_highpart_mode (SImode, mode, val);
17026
17027   gcc_assert (CONST_INT_P (part));
17028
17029   if (const_ok_for_arm (INTVAL (part))
17030       || const_ok_for_arm (~INTVAL (part)))
17031     return true;
17032
17033   part = gen_lowpart (SImode, val);
17034
17035   gcc_assert (CONST_INT_P (part));
17036
17037   if (const_ok_for_arm (INTVAL (part))
17038       || const_ok_for_arm (~INTVAL (part)))
17039     return true;
17040
17041   return false;
17042 }
17043
17044 /* Return true if it is possible to inline both the high and low parts
17045    of a 64-bit constant into 32-bit data processing instructions.  */
17046 bool
17047 arm_const_double_by_immediates (rtx val)
17048 {
17049   machine_mode mode = GET_MODE (val);
17050   rtx part;
17051
17052   if (mode == VOIDmode)
17053     mode = DImode;
17054
17055   part = gen_highpart_mode (SImode, mode, val);
17056
17057   gcc_assert (CONST_INT_P (part));
17058
17059   if (!const_ok_for_arm (INTVAL (part)))
17060     return false;
17061
17062   part = gen_lowpart (SImode, val);
17063
17064   gcc_assert (CONST_INT_P (part));
17065
17066   if (!const_ok_for_arm (INTVAL (part)))
17067     return false;
17068
17069   return true;
17070 }
17071
17072 /* Scan INSN and note any of its operands that need fixing.
17073    If DO_PUSHES is false we do not actually push any of the fixups
17074    needed.  */
17075 static void
17076 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17077 {
17078   int opno;
17079
17080   extract_constrain_insn (insn);
17081
17082   if (recog_data.n_alternatives == 0)
17083     return;
17084
17085   /* Fill in recog_op_alt with information about the constraints of
17086      this insn.  */
17087   preprocess_constraints (insn);
17088
17089   const operand_alternative *op_alt = which_op_alt ();
17090   for (opno = 0; opno < recog_data.n_operands; opno++)
17091     {
17092       /* Things we need to fix can only occur in inputs.  */
17093       if (recog_data.operand_type[opno] != OP_IN)
17094         continue;
17095
17096       /* If this alternative is a memory reference, then any mention
17097          of constants in this alternative is really to fool reload
17098          into allowing us to accept one there.  We need to fix them up
17099          now so that we output the right code.  */
17100       if (op_alt[opno].memory_ok)
17101         {
17102           rtx op = recog_data.operand[opno];
17103
17104           if (CONSTANT_P (op))
17105             {
17106               if (do_pushes)
17107                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17108                                    recog_data.operand_mode[opno], op);
17109             }
17110           else if (MEM_P (op)
17111                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17112                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17113             {
17114               if (do_pushes)
17115                 {
17116                   rtx cop = avoid_constant_pool_reference (op);
17117
17118                   /* Casting the address of something to a mode narrower
17119                      than a word can cause avoid_constant_pool_reference()
17120                      to return the pool reference itself.  That's no good to
17121                      us here.  Lets just hope that we can use the
17122                      constant pool value directly.  */
17123                   if (op == cop)
17124                     cop = get_pool_constant (XEXP (op, 0));
17125
17126                   push_minipool_fix (insn, address,
17127                                      recog_data.operand_loc[opno],
17128                                      recog_data.operand_mode[opno], cop);
17129                 }
17130
17131             }
17132         }
17133     }
17134
17135   return;
17136 }
17137
17138 /* Rewrite move insn into subtract of 0 if the condition codes will
17139    be useful in next conditional jump insn.  */
17140
17141 static void
17142 thumb1_reorg (void)
17143 {
17144   basic_block bb;
17145
17146   FOR_EACH_BB_FN (bb, cfun)
17147     {
17148       rtx dest, src;
17149       rtx pat, op0, set = NULL;
17150       rtx_insn *prev, *insn = BB_END (bb);
17151       bool insn_clobbered = false;
17152
17153       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17154         insn = PREV_INSN (insn);
17155
17156       /* Find the last cbranchsi4_insn in basic block BB.  */
17157       if (insn == BB_HEAD (bb)
17158           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17159         continue;
17160
17161       /* Get the register with which we are comparing.  */
17162       pat = PATTERN (insn);
17163       op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17164
17165       /* Find the first flag setting insn before INSN in basic block BB.  */
17166       gcc_assert (insn != BB_HEAD (bb));
17167       for (prev = PREV_INSN (insn);
17168            (!insn_clobbered
17169             && prev != BB_HEAD (bb)
17170             && (NOTE_P (prev)
17171                 || DEBUG_INSN_P (prev)
17172                 || ((set = single_set (prev)) != NULL
17173                     && get_attr_conds (prev) == CONDS_NOCOND)));
17174            prev = PREV_INSN (prev))
17175         {
17176           if (reg_set_p (op0, prev))
17177             insn_clobbered = true;
17178         }
17179
17180       /* Skip if op0 is clobbered by insn other than prev. */
17181       if (insn_clobbered)
17182         continue;
17183
17184       if (!set)
17185         continue;
17186
17187       dest = SET_DEST (set);
17188       src = SET_SRC (set);
17189       if (!low_register_operand (dest, SImode)
17190           || !low_register_operand (src, SImode))
17191         continue;
17192
17193       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17194          in INSN.  Both src and dest of the move insn are checked.  */
17195       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17196         {
17197           dest = copy_rtx (dest);
17198           src = copy_rtx (src);
17199           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17200           PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17201           INSN_CODE (prev) = -1;
17202           /* Set test register in INSN to dest.  */
17203           XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17204           INSN_CODE (insn) = -1;
17205         }
17206     }
17207 }
17208
17209 /* Convert instructions to their cc-clobbering variant if possible, since
17210    that allows us to use smaller encodings.  */
17211
17212 static void
17213 thumb2_reorg (void)
17214 {
17215   basic_block bb;
17216   regset_head live;
17217
17218   INIT_REG_SET (&live);
17219
17220   /* We are freeing block_for_insn in the toplev to keep compatibility
17221      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17222   compute_bb_for_insn ();
17223   df_analyze ();
17224
17225   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17226
17227   FOR_EACH_BB_FN (bb, cfun)
17228     {
17229       if (current_tune->disparage_flag_setting_t16_encodings
17230           && optimize_bb_for_speed_p (bb))
17231         continue;
17232
17233       rtx_insn *insn;
17234       Convert_Action action = SKIP;
17235       Convert_Action action_for_partial_flag_setting
17236         = (current_tune->disparage_partial_flag_setting_t16_encodings
17237            && optimize_bb_for_speed_p (bb))
17238           ? SKIP : CONV;
17239
17240       COPY_REG_SET (&live, DF_LR_OUT (bb));
17241       df_simulate_initialize_backwards (bb, &live);
17242       FOR_BB_INSNS_REVERSE (bb, insn)
17243         {
17244           if (NONJUMP_INSN_P (insn)
17245               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17246               && GET_CODE (PATTERN (insn)) == SET)
17247             {
17248               action = SKIP;
17249               rtx pat = PATTERN (insn);
17250               rtx dst = XEXP (pat, 0);
17251               rtx src = XEXP (pat, 1);
17252               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17253
17254               if (!OBJECT_P (src))
17255                   op0 = XEXP (src, 0);
17256
17257               if (BINARY_P (src))
17258                   op1 = XEXP (src, 1);
17259
17260               if (low_register_operand (dst, SImode))
17261                 {
17262                   switch (GET_CODE (src))
17263                     {
17264                     case PLUS:
17265                       /* Adding two registers and storing the result
17266                          in the first source is already a 16-bit
17267                          operation.  */
17268                       if (rtx_equal_p (dst, op0)
17269                           && register_operand (op1, SImode))
17270                         break;
17271
17272                       if (low_register_operand (op0, SImode))
17273                         {
17274                           /* ADDS <Rd>,<Rn>,<Rm>  */
17275                           if (low_register_operand (op1, SImode))
17276                             action = CONV;
17277                           /* ADDS <Rdn>,#<imm8>  */
17278                           /* SUBS <Rdn>,#<imm8>  */
17279                           else if (rtx_equal_p (dst, op0)
17280                                    && CONST_INT_P (op1)
17281                                    && IN_RANGE (INTVAL (op1), -255, 255))
17282                             action = CONV;
17283                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17284                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17285                           else if (CONST_INT_P (op1)
17286                                    && IN_RANGE (INTVAL (op1), -7, 7))
17287                             action = CONV;
17288                         }
17289                       /* ADCS <Rd>, <Rn>  */
17290                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17291                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17292                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17293                                                        SImode)
17294                               && COMPARISON_P (op1)
17295                               && cc_register (XEXP (op1, 0), VOIDmode)
17296                               && maybe_get_arm_condition_code (op1) == ARM_CS
17297                               && XEXP (op1, 1) == const0_rtx)
17298                         action = CONV;
17299                       break;
17300
17301                     case MINUS:
17302                       /* RSBS <Rd>,<Rn>,#0
17303                          Not handled here: see NEG below.  */
17304                       /* SUBS <Rd>,<Rn>,#<imm3>
17305                          SUBS <Rdn>,#<imm8>
17306                          Not handled here: see PLUS above.  */
17307                       /* SUBS <Rd>,<Rn>,<Rm>  */
17308                       if (low_register_operand (op0, SImode)
17309                           && low_register_operand (op1, SImode))
17310                             action = CONV;
17311                       break;
17312
17313                     case MULT:
17314                       /* MULS <Rdm>,<Rn>,<Rdm>
17315                          As an exception to the rule, this is only used
17316                          when optimizing for size since MULS is slow on all
17317                          known implementations.  We do not even want to use
17318                          MULS in cold code, if optimizing for speed, so we
17319                          test the global flag here.  */
17320                       if (!optimize_size)
17321                         break;
17322                       /* else fall through.  */
17323                     case AND:
17324                     case IOR:
17325                     case XOR:
17326                       /* ANDS <Rdn>,<Rm>  */
17327                       if (rtx_equal_p (dst, op0)
17328                           && low_register_operand (op1, SImode))
17329                         action = action_for_partial_flag_setting;
17330                       else if (rtx_equal_p (dst, op1)
17331                                && low_register_operand (op0, SImode))
17332                         action = action_for_partial_flag_setting == SKIP
17333                                  ? SKIP : SWAP_CONV;
17334                       break;
17335
17336                     case ASHIFTRT:
17337                     case ASHIFT:
17338                     case LSHIFTRT:
17339                       /* ASRS <Rdn>,<Rm> */
17340                       /* LSRS <Rdn>,<Rm> */
17341                       /* LSLS <Rdn>,<Rm> */
17342                       if (rtx_equal_p (dst, op0)
17343                           && low_register_operand (op1, SImode))
17344                         action = action_for_partial_flag_setting;
17345                       /* ASRS <Rd>,<Rm>,#<imm5> */
17346                       /* LSRS <Rd>,<Rm>,#<imm5> */
17347                       /* LSLS <Rd>,<Rm>,#<imm5> */
17348                       else if (low_register_operand (op0, SImode)
17349                                && CONST_INT_P (op1)
17350                                && IN_RANGE (INTVAL (op1), 0, 31))
17351                         action = action_for_partial_flag_setting;
17352                       break;
17353
17354                     case ROTATERT:
17355                       /* RORS <Rdn>,<Rm>  */
17356                       if (rtx_equal_p (dst, op0)
17357                           && low_register_operand (op1, SImode))
17358                         action = action_for_partial_flag_setting;
17359                       break;
17360
17361                     case NOT:
17362                       /* MVNS <Rd>,<Rm>  */
17363                       if (low_register_operand (op0, SImode))
17364                         action = action_for_partial_flag_setting;
17365                       break;
17366
17367                     case NEG:
17368                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17369                       if (low_register_operand (op0, SImode))
17370                         action = CONV;
17371                       break;
17372
17373                     case CONST_INT:
17374                       /* MOVS <Rd>,#<imm8>  */
17375                       if (CONST_INT_P (src)
17376                           && IN_RANGE (INTVAL (src), 0, 255))
17377                         action = action_for_partial_flag_setting;
17378                       break;
17379
17380                     case REG:
17381                       /* MOVS and MOV<c> with registers have different
17382                          encodings, so are not relevant here.  */
17383                       break;
17384
17385                     default:
17386                       break;
17387                     }
17388                 }
17389
17390               if (action != SKIP)
17391                 {
17392                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17393                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17394                   rtvec vec;
17395
17396                   if (action == SWAP_CONV)
17397                     {
17398                       src = copy_rtx (src);
17399                       XEXP (src, 0) = op1;
17400                       XEXP (src, 1) = op0;
17401                       pat = gen_rtx_SET (VOIDmode, dst, src);
17402                       vec = gen_rtvec (2, pat, clobber);
17403                     }
17404                   else /* action == CONV */
17405                     vec = gen_rtvec (2, pat, clobber);
17406
17407                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17408                   INSN_CODE (insn) = -1;
17409                 }
17410             }
17411
17412           if (NONDEBUG_INSN_P (insn))
17413             df_simulate_one_insn_backwards (bb, insn, &live);
17414         }
17415     }
17416
17417   CLEAR_REG_SET (&live);
17418 }
17419
17420 /* Gcc puts the pool in the wrong place for ARM, since we can only
17421    load addresses a limited distance around the pc.  We do some
17422    special munging to move the constant pool values to the correct
17423    point in the code.  */
17424 static void
17425 arm_reorg (void)
17426 {
17427   rtx_insn *insn;
17428   HOST_WIDE_INT address = 0;
17429   Mfix * fix;
17430
17431   if (TARGET_THUMB1)
17432     thumb1_reorg ();
17433   else if (TARGET_THUMB2)
17434     thumb2_reorg ();
17435
17436   /* Ensure all insns that must be split have been split at this point.
17437      Otherwise, the pool placement code below may compute incorrect
17438      insn lengths.  Note that when optimizing, all insns have already
17439      been split at this point.  */
17440   if (!optimize)
17441     split_all_insns_noflow ();
17442
17443   minipool_fix_head = minipool_fix_tail = NULL;
17444
17445   /* The first insn must always be a note, or the code below won't
17446      scan it properly.  */
17447   insn = get_insns ();
17448   gcc_assert (NOTE_P (insn));
17449   minipool_pad = 0;
17450
17451   /* Scan all the insns and record the operands that will need fixing.  */
17452   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17453     {
17454       if (BARRIER_P (insn))
17455         push_minipool_barrier (insn, address);
17456       else if (INSN_P (insn))
17457         {
17458           rtx_jump_table_data *table;
17459
17460           note_invalid_constants (insn, address, true);
17461           address += get_attr_length (insn);
17462
17463           /* If the insn is a vector jump, add the size of the table
17464              and skip the table.  */
17465           if (tablejump_p (insn, NULL, &table))
17466             {
17467               address += get_jump_table_size (table);
17468               insn = table;
17469             }
17470         }
17471       else if (LABEL_P (insn))
17472         /* Add the worst-case padding due to alignment.  We don't add
17473            the _current_ padding because the minipool insertions
17474            themselves might change it.  */
17475         address += get_label_padding (insn);
17476     }
17477
17478   fix = minipool_fix_head;
17479
17480   /* Now scan the fixups and perform the required changes.  */
17481   while (fix)
17482     {
17483       Mfix * ftmp;
17484       Mfix * fdel;
17485       Mfix *  last_added_fix;
17486       Mfix * last_barrier = NULL;
17487       Mfix * this_fix;
17488
17489       /* Skip any further barriers before the next fix.  */
17490       while (fix && BARRIER_P (fix->insn))
17491         fix = fix->next;
17492
17493       /* No more fixes.  */
17494       if (fix == NULL)
17495         break;
17496
17497       last_added_fix = NULL;
17498
17499       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17500         {
17501           if (BARRIER_P (ftmp->insn))
17502             {
17503               if (ftmp->address >= minipool_vector_head->max_address)
17504                 break;
17505
17506               last_barrier = ftmp;
17507             }
17508           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17509             break;
17510
17511           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17512         }
17513
17514       /* If we found a barrier, drop back to that; any fixes that we
17515          could have reached but come after the barrier will now go in
17516          the next mini-pool.  */
17517       if (last_barrier != NULL)
17518         {
17519           /* Reduce the refcount for those fixes that won't go into this
17520              pool after all.  */
17521           for (fdel = last_barrier->next;
17522                fdel && fdel != ftmp;
17523                fdel = fdel->next)
17524             {
17525               fdel->minipool->refcount--;
17526               fdel->minipool = NULL;
17527             }
17528
17529           ftmp = last_barrier;
17530         }
17531       else
17532         {
17533           /* ftmp is first fix that we can't fit into this pool and
17534              there no natural barriers that we could use.  Insert a
17535              new barrier in the code somewhere between the previous
17536              fix and this one, and arrange to jump around it.  */
17537           HOST_WIDE_INT max_address;
17538
17539           /* The last item on the list of fixes must be a barrier, so
17540              we can never run off the end of the list of fixes without
17541              last_barrier being set.  */
17542           gcc_assert (ftmp);
17543
17544           max_address = minipool_vector_head->max_address;
17545           /* Check that there isn't another fix that is in range that
17546              we couldn't fit into this pool because the pool was
17547              already too large: we need to put the pool before such an
17548              instruction.  The pool itself may come just after the
17549              fix because create_fix_barrier also allows space for a
17550              jump instruction.  */
17551           if (ftmp->address < max_address)
17552             max_address = ftmp->address + 1;
17553
17554           last_barrier = create_fix_barrier (last_added_fix, max_address);
17555         }
17556
17557       assign_minipool_offsets (last_barrier);
17558
17559       while (ftmp)
17560         {
17561           if (!BARRIER_P (ftmp->insn)
17562               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17563                   == NULL))
17564             break;
17565
17566           ftmp = ftmp->next;
17567         }
17568
17569       /* Scan over the fixes we have identified for this pool, fixing them
17570          up and adding the constants to the pool itself.  */
17571       for (this_fix = fix; this_fix && ftmp != this_fix;
17572            this_fix = this_fix->next)
17573         if (!BARRIER_P (this_fix->insn))
17574           {
17575             rtx addr
17576               = plus_constant (Pmode,
17577                                gen_rtx_LABEL_REF (VOIDmode,
17578                                                   minipool_vector_label),
17579                                this_fix->minipool->offset);
17580             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17581           }
17582
17583       dump_minipool (last_barrier->insn);
17584       fix = ftmp;
17585     }
17586
17587   /* From now on we must synthesize any constants that we can't handle
17588      directly.  This can happen if the RTL gets split during final
17589      instruction generation.  */
17590   cfun->machine->after_arm_reorg = 1;
17591
17592   /* Free the minipool memory.  */
17593   obstack_free (&minipool_obstack, minipool_startobj);
17594 }
17595 \f
17596 /* Routines to output assembly language.  */
17597
17598 /* Return string representation of passed in real value.  */
17599 static const char *
17600 fp_const_from_val (REAL_VALUE_TYPE *r)
17601 {
17602   if (!fp_consts_inited)
17603     init_fp_table ();
17604
17605   gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17606   return "0";
17607 }
17608
17609 /* OPERANDS[0] is the entire list of insns that constitute pop,
17610    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17611    is in the list, UPDATE is true iff the list contains explicit
17612    update of base register.  */
17613 void
17614 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17615                          bool update)
17616 {
17617   int i;
17618   char pattern[100];
17619   int offset;
17620   const char *conditional;
17621   int num_saves = XVECLEN (operands[0], 0);
17622   unsigned int regno;
17623   unsigned int regno_base = REGNO (operands[1]);
17624
17625   offset = 0;
17626   offset += update ? 1 : 0;
17627   offset += return_pc ? 1 : 0;
17628
17629   /* Is the base register in the list?  */
17630   for (i = offset; i < num_saves; i++)
17631     {
17632       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17633       /* If SP is in the list, then the base register must be SP.  */
17634       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17635       /* If base register is in the list, there must be no explicit update.  */
17636       if (regno == regno_base)
17637         gcc_assert (!update);
17638     }
17639
17640   conditional = reverse ? "%?%D0" : "%?%d0";
17641   if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17642     {
17643       /* Output pop (not stmfd) because it has a shorter encoding.  */
17644       gcc_assert (update);
17645       sprintf (pattern, "pop%s\t{", conditional);
17646     }
17647   else
17648     {
17649       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17650          It's just a convention, their semantics are identical.  */
17651       if (regno_base == SP_REGNUM)
17652         sprintf (pattern, "ldm%sfd\t", conditional);
17653       else if (TARGET_UNIFIED_ASM)
17654         sprintf (pattern, "ldmia%s\t", conditional);
17655       else
17656         sprintf (pattern, "ldm%sia\t", conditional);
17657
17658       strcat (pattern, reg_names[regno_base]);
17659       if (update)
17660         strcat (pattern, "!, {");
17661       else
17662         strcat (pattern, ", {");
17663     }
17664
17665   /* Output the first destination register.  */
17666   strcat (pattern,
17667           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17668
17669   /* Output the rest of the destination registers.  */
17670   for (i = offset + 1; i < num_saves; i++)
17671     {
17672       strcat (pattern, ", ");
17673       strcat (pattern,
17674               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17675     }
17676
17677   strcat (pattern, "}");
17678
17679   if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17680     strcat (pattern, "^");
17681
17682   output_asm_insn (pattern, &cond);
17683 }
17684
17685
17686 /* Output the assembly for a store multiple.  */
17687
17688 const char *
17689 vfp_output_vstmd (rtx * operands)
17690 {
17691   char pattern[100];
17692   int p;
17693   int base;
17694   int i;
17695   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17696                    ? XEXP (operands[0], 0)
17697                    : XEXP (XEXP (operands[0], 0), 0);
17698   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17699
17700   if (push_p)
17701     strcpy (pattern, "vpush%?.64\t{%P1");
17702   else
17703     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17704
17705   p = strlen (pattern);
17706
17707   gcc_assert (REG_P (operands[1]));
17708
17709   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17710   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17711     {
17712       p += sprintf (&pattern[p], ", d%d", base + i);
17713     }
17714   strcpy (&pattern[p], "}");
17715
17716   output_asm_insn (pattern, operands);
17717   return "";
17718 }
17719
17720
17721 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17722    number of bytes pushed.  */
17723
17724 static int
17725 vfp_emit_fstmd (int base_reg, int count)
17726 {
17727   rtx par;
17728   rtx dwarf;
17729   rtx tmp, reg;
17730   int i;
17731
17732   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17733      register pairs are stored by a store multiple insn.  We avoid this
17734      by pushing an extra pair.  */
17735   if (count == 2 && !arm_arch6)
17736     {
17737       if (base_reg == LAST_VFP_REGNUM - 3)
17738         base_reg -= 2;
17739       count++;
17740     }
17741
17742   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17743      larger stores into multiple parts (up to a maximum of two, in
17744      practice).  */
17745   if (count > 16)
17746     {
17747       int saved;
17748       /* NOTE: base_reg is an internal register number, so each D register
17749          counts as 2.  */
17750       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17751       saved += vfp_emit_fstmd (base_reg, 16);
17752       return saved;
17753     }
17754
17755   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17756   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17757
17758   reg = gen_rtx_REG (DFmode, base_reg);
17759   base_reg += 2;
17760
17761   XVECEXP (par, 0, 0)
17762     = gen_rtx_SET (VOIDmode,
17763                    gen_frame_mem
17764                    (BLKmode,
17765                     gen_rtx_PRE_MODIFY (Pmode,
17766                                         stack_pointer_rtx,
17767                                         plus_constant
17768                                         (Pmode, stack_pointer_rtx,
17769                                          - (count * 8)))
17770                     ),
17771                    gen_rtx_UNSPEC (BLKmode,
17772                                    gen_rtvec (1, reg),
17773                                    UNSPEC_PUSH_MULT));
17774
17775   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17776                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17777   RTX_FRAME_RELATED_P (tmp) = 1;
17778   XVECEXP (dwarf, 0, 0) = tmp;
17779
17780   tmp = gen_rtx_SET (VOIDmode,
17781                      gen_frame_mem (DFmode, stack_pointer_rtx),
17782                      reg);
17783   RTX_FRAME_RELATED_P (tmp) = 1;
17784   XVECEXP (dwarf, 0, 1) = tmp;
17785
17786   for (i = 1; i < count; i++)
17787     {
17788       reg = gen_rtx_REG (DFmode, base_reg);
17789       base_reg += 2;
17790       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17791
17792       tmp = gen_rtx_SET (VOIDmode,
17793                          gen_frame_mem (DFmode,
17794                                         plus_constant (Pmode,
17795                                                        stack_pointer_rtx,
17796                                                        i * 8)),
17797                          reg);
17798       RTX_FRAME_RELATED_P (tmp) = 1;
17799       XVECEXP (dwarf, 0, i + 1) = tmp;
17800     }
17801
17802   par = emit_insn (par);
17803   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17804   RTX_FRAME_RELATED_P (par) = 1;
17805
17806   return count * 8;
17807 }
17808
17809 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17810    the call target.  */
17811
17812 void
17813 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17814 {
17815   rtx insn;
17816
17817   insn = emit_call_insn (pat);
17818
17819   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17820      If the call might use such an entry, add a use of the PIC register
17821      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17822   if (TARGET_VXWORKS_RTP
17823       && flag_pic
17824       && !sibcall
17825       && GET_CODE (addr) == SYMBOL_REF
17826       && (SYMBOL_REF_DECL (addr)
17827           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17828           : !SYMBOL_REF_LOCAL_P (addr)))
17829     {
17830       require_pic_register ();
17831       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17832     }
17833
17834   if (TARGET_AAPCS_BASED)
17835     {
17836       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17837          linker.  We need to add an IP clobber to allow setting
17838          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17839          is not needed since it's a fixed register.  */
17840       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17841       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17842     }
17843 }
17844
17845 /* Output a 'call' insn.  */
17846 const char *
17847 output_call (rtx *operands)
17848 {
17849   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17850
17851   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17852   if (REGNO (operands[0]) == LR_REGNUM)
17853     {
17854       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17855       output_asm_insn ("mov%?\t%0, %|lr", operands);
17856     }
17857
17858   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17859
17860   if (TARGET_INTERWORK || arm_arch4t)
17861     output_asm_insn ("bx%?\t%0", operands);
17862   else
17863     output_asm_insn ("mov%?\t%|pc, %0", operands);
17864
17865   return "";
17866 }
17867
17868 /* Output a 'call' insn that is a reference in memory. This is
17869    disabled for ARMv5 and we prefer a blx instead because otherwise
17870    there's a significant performance overhead.  */
17871 const char *
17872 output_call_mem (rtx *operands)
17873 {
17874   gcc_assert (!arm_arch5);
17875   if (TARGET_INTERWORK)
17876     {
17877       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17878       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17879       output_asm_insn ("bx%?\t%|ip", operands);
17880     }
17881   else if (regno_use_in (LR_REGNUM, operands[0]))
17882     {
17883       /* LR is used in the memory address.  We load the address in the
17884          first instruction.  It's safe to use IP as the target of the
17885          load since the call will kill it anyway.  */
17886       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17887       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17888       if (arm_arch4t)
17889         output_asm_insn ("bx%?\t%|ip", operands);
17890       else
17891         output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17892     }
17893   else
17894     {
17895       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17896       output_asm_insn ("ldr%?\t%|pc, %0", operands);
17897     }
17898
17899   return "";
17900 }
17901
17902
17903 /* Output a move from arm registers to arm registers of a long double
17904    OPERANDS[0] is the destination.
17905    OPERANDS[1] is the source.  */
17906 const char *
17907 output_mov_long_double_arm_from_arm (rtx *operands)
17908 {
17909   /* We have to be careful here because the two might overlap.  */
17910   int dest_start = REGNO (operands[0]);
17911   int src_start = REGNO (operands[1]);
17912   rtx ops[2];
17913   int i;
17914
17915   if (dest_start < src_start)
17916     {
17917       for (i = 0; i < 3; i++)
17918         {
17919           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17920           ops[1] = gen_rtx_REG (SImode, src_start + i);
17921           output_asm_insn ("mov%?\t%0, %1", ops);
17922         }
17923     }
17924   else
17925     {
17926       for (i = 2; i >= 0; i--)
17927         {
17928           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17929           ops[1] = gen_rtx_REG (SImode, src_start + i);
17930           output_asm_insn ("mov%?\t%0, %1", ops);
17931         }
17932     }
17933
17934   return "";
17935 }
17936
17937 void
17938 arm_emit_movpair (rtx dest, rtx src)
17939  {
17940   /* If the src is an immediate, simplify it.  */
17941   if (CONST_INT_P (src))
17942     {
17943       HOST_WIDE_INT val = INTVAL (src);
17944       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17945       if ((val >> 16) & 0x0000ffff)
17946         emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17947                                              GEN_INT (16)),
17948                        GEN_INT ((val >> 16) & 0x0000ffff));
17949       return;
17950     }
17951    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17952    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17953  }
17954
17955 /* Output a move between double words.  It must be REG<-MEM
17956    or MEM<-REG.  */
17957 const char *
17958 output_move_double (rtx *operands, bool emit, int *count)
17959 {
17960   enum rtx_code code0 = GET_CODE (operands[0]);
17961   enum rtx_code code1 = GET_CODE (operands[1]);
17962   rtx otherops[3];
17963   if (count)
17964     *count = 1;
17965
17966   /* The only case when this might happen is when
17967      you are looking at the length of a DImode instruction
17968      that has an invalid constant in it.  */
17969   if (code0 == REG && code1 != MEM)
17970     {
17971       gcc_assert (!emit);
17972       *count = 2;
17973       return "";
17974     }
17975
17976   if (code0 == REG)
17977     {
17978       unsigned int reg0 = REGNO (operands[0]);
17979
17980       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17981
17982       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17983
17984       switch (GET_CODE (XEXP (operands[1], 0)))
17985         {
17986         case REG:
17987
17988           if (emit)
17989             {
17990               if (TARGET_LDRD
17991                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17992                 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17993               else
17994                 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17995             }
17996           break;
17997
17998         case PRE_INC:
17999           gcc_assert (TARGET_LDRD);
18000           if (emit)
18001             output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
18002           break;
18003
18004         case PRE_DEC:
18005           if (emit)
18006             {
18007               if (TARGET_LDRD)
18008                 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
18009               else
18010                 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
18011             }
18012           break;
18013
18014         case POST_INC:
18015           if (emit)
18016             {
18017               if (TARGET_LDRD)
18018                 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
18019               else
18020                 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
18021             }
18022           break;
18023
18024         case POST_DEC:
18025           gcc_assert (TARGET_LDRD);
18026           if (emit)
18027             output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
18028           break;
18029
18030         case PRE_MODIFY:
18031         case POST_MODIFY:
18032           /* Autoicrement addressing modes should never have overlapping
18033              base and destination registers, and overlapping index registers
18034              are already prohibited, so this doesn't need to worry about
18035              fix_cm3_ldrd.  */
18036           otherops[0] = operands[0];
18037           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18038           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18039
18040           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18041             {
18042               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18043                 {
18044                   /* Registers overlap so split out the increment.  */
18045                   if (emit)
18046                     {
18047                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18048                       output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
18049                     }
18050                   if (count)
18051                     *count = 2;
18052                 }
18053               else
18054                 {
18055                   /* Use a single insn if we can.
18056                      FIXME: IWMMXT allows offsets larger than ldrd can
18057                      handle, fix these up with a pair of ldr.  */
18058                   if (TARGET_THUMB2
18059                       || !CONST_INT_P (otherops[2])
18060                       || (INTVAL (otherops[2]) > -256
18061                           && INTVAL (otherops[2]) < 256))
18062                     {
18063                       if (emit)
18064                         output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18065                     }
18066                   else
18067                     {
18068                       if (emit)
18069                         {
18070                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18071                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18072                         }
18073                       if (count)
18074                         *count = 2;
18075
18076                     }
18077                 }
18078             }
18079           else
18080             {
18081               /* Use a single insn if we can.
18082                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18083                  fix these up with a pair of ldr.  */
18084               if (TARGET_THUMB2
18085                   || !CONST_INT_P (otherops[2])
18086                   || (INTVAL (otherops[2]) > -256
18087                       && INTVAL (otherops[2]) < 256))
18088                 {
18089                   if (emit)
18090                     output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18091                 }
18092               else
18093                 {
18094                   if (emit)
18095                     {
18096                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18097                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18098                     }
18099                   if (count)
18100                     *count = 2;
18101                 }
18102             }
18103           break;
18104
18105         case LABEL_REF:
18106         case CONST:
18107           /* We might be able to use ldrd %0, %1 here.  However the range is
18108              different to ldr/adr, and it is broken on some ARMv7-M
18109              implementations.  */
18110           /* Use the second register of the pair to avoid problematic
18111              overlap.  */
18112           otherops[1] = operands[1];
18113           if (emit)
18114             output_asm_insn ("adr%?\t%0, %1", otherops);
18115           operands[1] = otherops[0];
18116           if (emit)
18117             {
18118               if (TARGET_LDRD)
18119                 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18120               else
18121                 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18122             }
18123
18124           if (count)
18125             *count = 2;
18126           break;
18127
18128           /* ??? This needs checking for thumb2.  */
18129         default:
18130           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18131                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18132             {
18133               otherops[0] = operands[0];
18134               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18135               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18136
18137               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18138                 {
18139                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18140                     {
18141                       switch ((int) INTVAL (otherops[2]))
18142                         {
18143                         case -8:
18144                           if (emit)
18145                             output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18146                           return "";
18147                         case -4:
18148                           if (TARGET_THUMB2)
18149                             break;
18150                           if (emit)
18151                             output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18152                           return "";
18153                         case 4:
18154                           if (TARGET_THUMB2)
18155                             break;
18156                           if (emit)
18157                             output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18158                           return "";
18159                         }
18160                     }
18161                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18162                   operands[1] = otherops[0];
18163                   if (TARGET_LDRD
18164                       && (REG_P (otherops[2])
18165                           || TARGET_THUMB2
18166                           || (CONST_INT_P (otherops[2])
18167                               && INTVAL (otherops[2]) > -256
18168                               && INTVAL (otherops[2]) < 256)))
18169                     {
18170                       if (reg_overlap_mentioned_p (operands[0],
18171                                                    otherops[2]))
18172                         {
18173                           rtx tmp;
18174                           /* Swap base and index registers over to
18175                              avoid a conflict.  */
18176                           tmp = otherops[1];
18177                           otherops[1] = otherops[2];
18178                           otherops[2] = tmp;
18179                         }
18180                       /* If both registers conflict, it will usually
18181                          have been fixed by a splitter.  */
18182                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18183                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18184                         {
18185                           if (emit)
18186                             {
18187                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18188                               output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18189                             }
18190                           if (count)
18191                             *count = 2;
18192                         }
18193                       else
18194                         {
18195                           otherops[0] = operands[0];
18196                           if (emit)
18197                             output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18198                         }
18199                       return "";
18200                     }
18201
18202                   if (CONST_INT_P (otherops[2]))
18203                     {
18204                       if (emit)
18205                         {
18206                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18207                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18208                           else
18209                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18210                         }
18211                     }
18212                   else
18213                     {
18214                       if (emit)
18215                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18216                     }
18217                 }
18218               else
18219                 {
18220                   if (emit)
18221                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18222                 }
18223
18224               if (count)
18225                 *count = 2;
18226
18227               if (TARGET_LDRD)
18228                 return "ldr%(d%)\t%0, [%1]";
18229
18230               return "ldm%(ia%)\t%1, %M0";
18231             }
18232           else
18233             {
18234               otherops[1] = adjust_address (operands[1], SImode, 4);
18235               /* Take care of overlapping base/data reg.  */
18236               if (reg_mentioned_p (operands[0], operands[1]))
18237                 {
18238                   if (emit)
18239                     {
18240                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18241                       output_asm_insn ("ldr%?\t%0, %1", operands);
18242                     }
18243                   if (count)
18244                     *count = 2;
18245
18246                 }
18247               else
18248                 {
18249                   if (emit)
18250                     {
18251                       output_asm_insn ("ldr%?\t%0, %1", operands);
18252                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18253                     }
18254                   if (count)
18255                     *count = 2;
18256                 }
18257             }
18258         }
18259     }
18260   else
18261     {
18262       /* Constraints should ensure this.  */
18263       gcc_assert (code0 == MEM && code1 == REG);
18264       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18265                   || (TARGET_ARM && TARGET_LDRD));
18266
18267       switch (GET_CODE (XEXP (operands[0], 0)))
18268         {
18269         case REG:
18270           if (emit)
18271             {
18272               if (TARGET_LDRD)
18273                 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18274               else
18275                 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18276             }
18277           break;
18278
18279         case PRE_INC:
18280           gcc_assert (TARGET_LDRD);
18281           if (emit)
18282             output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18283           break;
18284
18285         case PRE_DEC:
18286           if (emit)
18287             {
18288               if (TARGET_LDRD)
18289                 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18290               else
18291                 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18292             }
18293           break;
18294
18295         case POST_INC:
18296           if (emit)
18297             {
18298               if (TARGET_LDRD)
18299                 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18300               else
18301                 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18302             }
18303           break;
18304
18305         case POST_DEC:
18306           gcc_assert (TARGET_LDRD);
18307           if (emit)
18308             output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18309           break;
18310
18311         case PRE_MODIFY:
18312         case POST_MODIFY:
18313           otherops[0] = operands[1];
18314           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18315           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18316
18317           /* IWMMXT allows offsets larger than ldrd can handle,
18318              fix these up with a pair of ldr.  */
18319           if (!TARGET_THUMB2
18320               && CONST_INT_P (otherops[2])
18321               && (INTVAL(otherops[2]) <= -256
18322                   || INTVAL(otherops[2]) >= 256))
18323             {
18324               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18325                 {
18326                   if (emit)
18327                     {
18328                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18329                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18330                     }
18331                   if (count)
18332                     *count = 2;
18333                 }
18334               else
18335                 {
18336                   if (emit)
18337                     {
18338                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18339                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18340                     }
18341                   if (count)
18342                     *count = 2;
18343                 }
18344             }
18345           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18346             {
18347               if (emit)
18348                 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18349             }
18350           else
18351             {
18352               if (emit)
18353                 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18354             }
18355           break;
18356
18357         case PLUS:
18358           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18359           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18360             {
18361               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18362                 {
18363                 case -8:
18364                   if (emit)
18365                     output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18366                   return "";
18367
18368                 case -4:
18369                   if (TARGET_THUMB2)
18370                     break;
18371                   if (emit)
18372                     output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18373                   return "";
18374
18375                 case 4:
18376                   if (TARGET_THUMB2)
18377                     break;
18378                   if (emit)
18379                     output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18380                   return "";
18381                 }
18382             }
18383           if (TARGET_LDRD
18384               && (REG_P (otherops[2])
18385                   || TARGET_THUMB2
18386                   || (CONST_INT_P (otherops[2])
18387                       && INTVAL (otherops[2]) > -256
18388                       && INTVAL (otherops[2]) < 256)))
18389             {
18390               otherops[0] = operands[1];
18391               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18392               if (emit)
18393                 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18394               return "";
18395             }
18396           /* Fall through */
18397
18398         default:
18399           otherops[0] = adjust_address (operands[0], SImode, 4);
18400           otherops[1] = operands[1];
18401           if (emit)
18402             {
18403               output_asm_insn ("str%?\t%1, %0", operands);
18404               output_asm_insn ("str%?\t%H1, %0", otherops);
18405             }
18406           if (count)
18407             *count = 2;
18408         }
18409     }
18410
18411   return "";
18412 }
18413
18414 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18415    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18416
18417 const char *
18418 output_move_quad (rtx *operands)
18419 {
18420   if (REG_P (operands[0]))
18421     {
18422       /* Load, or reg->reg move.  */
18423
18424       if (MEM_P (operands[1]))
18425         {
18426           switch (GET_CODE (XEXP (operands[1], 0)))
18427             {
18428             case REG:
18429               output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18430               break;
18431
18432             case LABEL_REF:
18433             case CONST:
18434               output_asm_insn ("adr%?\t%0, %1", operands);
18435               output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18436               break;
18437
18438             default:
18439               gcc_unreachable ();
18440             }
18441         }
18442       else
18443         {
18444           rtx ops[2];
18445           int dest, src, i;
18446
18447           gcc_assert (REG_P (operands[1]));
18448
18449           dest = REGNO (operands[0]);
18450           src = REGNO (operands[1]);
18451
18452           /* This seems pretty dumb, but hopefully GCC won't try to do it
18453              very often.  */
18454           if (dest < src)
18455             for (i = 0; i < 4; i++)
18456               {
18457                 ops[0] = gen_rtx_REG (SImode, dest + i);
18458                 ops[1] = gen_rtx_REG (SImode, src + i);
18459                 output_asm_insn ("mov%?\t%0, %1", ops);
18460               }
18461           else
18462             for (i = 3; i >= 0; i--)
18463               {
18464                 ops[0] = gen_rtx_REG (SImode, dest + i);
18465                 ops[1] = gen_rtx_REG (SImode, src + i);
18466                 output_asm_insn ("mov%?\t%0, %1", ops);
18467               }
18468         }
18469     }
18470   else
18471     {
18472       gcc_assert (MEM_P (operands[0]));
18473       gcc_assert (REG_P (operands[1]));
18474       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18475
18476       switch (GET_CODE (XEXP (operands[0], 0)))
18477         {
18478         case REG:
18479           output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18480           break;
18481
18482         default:
18483           gcc_unreachable ();
18484         }
18485     }
18486
18487   return "";
18488 }
18489
18490 /* Output a VFP load or store instruction.  */
18491
18492 const char *
18493 output_move_vfp (rtx *operands)
18494 {
18495   rtx reg, mem, addr, ops[2];
18496   int load = REG_P (operands[0]);
18497   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18498   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18499   const char *templ;
18500   char buff[50];
18501   machine_mode mode;
18502
18503   reg = operands[!load];
18504   mem = operands[load];
18505
18506   mode = GET_MODE (reg);
18507
18508   gcc_assert (REG_P (reg));
18509   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18510   gcc_assert (mode == SFmode
18511               || mode == DFmode
18512               || mode == SImode
18513               || mode == DImode
18514               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18515   gcc_assert (MEM_P (mem));
18516
18517   addr = XEXP (mem, 0);
18518
18519   switch (GET_CODE (addr))
18520     {
18521     case PRE_DEC:
18522       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18523       ops[0] = XEXP (addr, 0);
18524       ops[1] = reg;
18525       break;
18526
18527     case POST_INC:
18528       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18529       ops[0] = XEXP (addr, 0);
18530       ops[1] = reg;
18531       break;
18532
18533     default:
18534       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18535       ops[0] = reg;
18536       ops[1] = mem;
18537       break;
18538     }
18539
18540   sprintf (buff, templ,
18541            load ? "ld" : "st",
18542            dp ? "64" : "32",
18543            dp ? "P" : "",
18544            integer_p ? "\t%@ int" : "");
18545   output_asm_insn (buff, ops);
18546
18547   return "";
18548 }
18549
18550 /* Output a Neon double-word or quad-word load or store, or a load
18551    or store for larger structure modes.
18552
18553    WARNING: The ordering of elements is weird in big-endian mode,
18554    because the EABI requires that vectors stored in memory appear
18555    as though they were stored by a VSTM, as required by the EABI.
18556    GCC RTL defines element ordering based on in-memory order.
18557    This can be different from the architectural ordering of elements
18558    within a NEON register. The intrinsics defined in arm_neon.h use the
18559    NEON register element ordering, not the GCC RTL element ordering.
18560
18561    For example, the in-memory ordering of a big-endian a quadword
18562    vector with 16-bit elements when stored from register pair {d0,d1}
18563    will be (lowest address first, d0[N] is NEON register element N):
18564
18565      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18566
18567    When necessary, quadword registers (dN, dN+1) are moved to ARM
18568    registers from rN in the order:
18569
18570      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18571
18572    So that STM/LDM can be used on vectors in ARM registers, and the
18573    same memory layout will result as if VSTM/VLDM were used.
18574
18575    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18576    possible, which allows use of appropriate alignment tags.
18577    Note that the choice of "64" is independent of the actual vector
18578    element size; this size simply ensures that the behavior is
18579    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18580
18581    Due to limitations of those instructions, use of VST1.64/VLD1.64
18582    is not possible if:
18583     - the address contains PRE_DEC, or
18584     - the mode refers to more than 4 double-word registers
18585
18586    In those cases, it would be possible to replace VSTM/VLDM by a
18587    sequence of instructions; this is not currently implemented since
18588    this is not certain to actually improve performance.  */
18589
18590 const char *
18591 output_move_neon (rtx *operands)
18592 {
18593   rtx reg, mem, addr, ops[2];
18594   int regno, nregs, load = REG_P (operands[0]);
18595   const char *templ;
18596   char buff[50];
18597   machine_mode mode;
18598
18599   reg = operands[!load];
18600   mem = operands[load];
18601
18602   mode = GET_MODE (reg);
18603
18604   gcc_assert (REG_P (reg));
18605   regno = REGNO (reg);
18606   nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18607   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18608               || NEON_REGNO_OK_FOR_QUAD (regno));
18609   gcc_assert (VALID_NEON_DREG_MODE (mode)
18610               || VALID_NEON_QREG_MODE (mode)
18611               || VALID_NEON_STRUCT_MODE (mode));
18612   gcc_assert (MEM_P (mem));
18613
18614   addr = XEXP (mem, 0);
18615
18616   /* Strip off const from addresses like (const (plus (...))).  */
18617   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18618     addr = XEXP (addr, 0);
18619
18620   switch (GET_CODE (addr))
18621     {
18622     case POST_INC:
18623       /* We have to use vldm / vstm for too-large modes.  */
18624       if (nregs > 4)
18625         {
18626           templ = "v%smia%%?\t%%0!, %%h1";
18627           ops[0] = XEXP (addr, 0);
18628         }
18629       else
18630         {
18631           templ = "v%s1.64\t%%h1, %%A0";
18632           ops[0] = mem;
18633         }
18634       ops[1] = reg;
18635       break;
18636
18637     case PRE_DEC:
18638       /* We have to use vldm / vstm in this case, since there is no
18639          pre-decrement form of the vld1 / vst1 instructions.  */
18640       templ = "v%smdb%%?\t%%0!, %%h1";
18641       ops[0] = XEXP (addr, 0);
18642       ops[1] = reg;
18643       break;
18644
18645     case POST_MODIFY:
18646       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18647       gcc_unreachable ();
18648
18649     case REG:
18650       /* We have to use vldm / vstm for too-large modes.  */
18651       if (nregs > 1)
18652         {
18653           if (nregs > 4)
18654             templ = "v%smia%%?\t%%m0, %%h1";
18655           else
18656             templ = "v%s1.64\t%%h1, %%A0";
18657
18658           ops[0] = mem;
18659           ops[1] = reg;
18660           break;
18661         }
18662       /* Fall through.  */
18663     case LABEL_REF:
18664     case PLUS:
18665       {
18666         int i;
18667         int overlap = -1;
18668         for (i = 0; i < nregs; i++)
18669           {
18670             /* We're only using DImode here because it's a convenient size.  */
18671             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18672             ops[1] = adjust_address (mem, DImode, 8 * i);
18673             if (reg_overlap_mentioned_p (ops[0], mem))
18674               {
18675                 gcc_assert (overlap == -1);
18676                 overlap = i;
18677               }
18678             else
18679               {
18680                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18681                 output_asm_insn (buff, ops);
18682               }
18683           }
18684         if (overlap != -1)
18685           {
18686             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18687             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18688             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18689             output_asm_insn (buff, ops);
18690           }
18691
18692         return "";
18693       }
18694
18695     default:
18696       gcc_unreachable ();
18697     }
18698
18699   sprintf (buff, templ, load ? "ld" : "st");
18700   output_asm_insn (buff, ops);
18701
18702   return "";
18703 }
18704
18705 /* Compute and return the length of neon_mov<mode>, where <mode> is
18706    one of VSTRUCT modes: EI, OI, CI or XI.  */
18707 int
18708 arm_attr_length_move_neon (rtx_insn *insn)
18709 {
18710   rtx reg, mem, addr;
18711   int load;
18712   machine_mode mode;
18713
18714   extract_insn_cached (insn);
18715
18716   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18717     {
18718       mode = GET_MODE (recog_data.operand[0]);
18719       switch (mode)
18720         {
18721         case EImode:
18722         case OImode:
18723           return 8;
18724         case CImode:
18725           return 12;
18726         case XImode:
18727           return 16;
18728         default:
18729           gcc_unreachable ();
18730         }
18731     }
18732
18733   load = REG_P (recog_data.operand[0]);
18734   reg = recog_data.operand[!load];
18735   mem = recog_data.operand[load];
18736
18737   gcc_assert (MEM_P (mem));
18738
18739   mode = GET_MODE (reg);
18740   addr = XEXP (mem, 0);
18741
18742   /* Strip off const from addresses like (const (plus (...))).  */
18743   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18744     addr = XEXP (addr, 0);
18745
18746   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18747     {
18748       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18749       return insns * 4;
18750     }
18751   else
18752     return 4;
18753 }
18754
18755 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18756    return zero.  */
18757
18758 int
18759 arm_address_offset_is_imm (rtx_insn *insn)
18760 {
18761   rtx mem, addr;
18762
18763   extract_insn_cached (insn);
18764
18765   if (REG_P (recog_data.operand[0]))
18766     return 0;
18767
18768   mem = recog_data.operand[0];
18769
18770   gcc_assert (MEM_P (mem));
18771
18772   addr = XEXP (mem, 0);
18773
18774   if (REG_P (addr)
18775       || (GET_CODE (addr) == PLUS
18776           && REG_P (XEXP (addr, 0))
18777           && CONST_INT_P (XEXP (addr, 1))))
18778     return 1;
18779   else
18780     return 0;
18781 }
18782
18783 /* Output an ADD r, s, #n where n may be too big for one instruction.
18784    If adding zero to one register, output nothing.  */
18785 const char *
18786 output_add_immediate (rtx *operands)
18787 {
18788   HOST_WIDE_INT n = INTVAL (operands[2]);
18789
18790   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18791     {
18792       if (n < 0)
18793         output_multi_immediate (operands,
18794                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18795                                 -n);
18796       else
18797         output_multi_immediate (operands,
18798                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18799                                 n);
18800     }
18801
18802   return "";
18803 }
18804
18805 /* Output a multiple immediate operation.
18806    OPERANDS is the vector of operands referred to in the output patterns.
18807    INSTR1 is the output pattern to use for the first constant.
18808    INSTR2 is the output pattern to use for subsequent constants.
18809    IMMED_OP is the index of the constant slot in OPERANDS.
18810    N is the constant value.  */
18811 static const char *
18812 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18813                         int immed_op, HOST_WIDE_INT n)
18814 {
18815 #if HOST_BITS_PER_WIDE_INT > 32
18816   n &= 0xffffffff;
18817 #endif
18818
18819   if (n == 0)
18820     {
18821       /* Quick and easy output.  */
18822       operands[immed_op] = const0_rtx;
18823       output_asm_insn (instr1, operands);
18824     }
18825   else
18826     {
18827       int i;
18828       const char * instr = instr1;
18829
18830       /* Note that n is never zero here (which would give no output).  */
18831       for (i = 0; i < 32; i += 2)
18832         {
18833           if (n & (3 << i))
18834             {
18835               operands[immed_op] = GEN_INT (n & (255 << i));
18836               output_asm_insn (instr, operands);
18837               instr = instr2;
18838               i += 6;
18839             }
18840         }
18841     }
18842
18843   return "";
18844 }
18845
18846 /* Return the name of a shifter operation.  */
18847 static const char *
18848 arm_shift_nmem(enum rtx_code code)
18849 {
18850   switch (code)
18851     {
18852     case ASHIFT:
18853       return ARM_LSL_NAME;
18854
18855     case ASHIFTRT:
18856       return "asr";
18857
18858     case LSHIFTRT:
18859       return "lsr";
18860
18861     case ROTATERT:
18862       return "ror";
18863
18864     default:
18865       abort();
18866     }
18867 }
18868
18869 /* Return the appropriate ARM instruction for the operation code.
18870    The returned result should not be overwritten.  OP is the rtx of the
18871    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18872    was shifted.  */
18873 const char *
18874 arithmetic_instr (rtx op, int shift_first_arg)
18875 {
18876   switch (GET_CODE (op))
18877     {
18878     case PLUS:
18879       return "add";
18880
18881     case MINUS:
18882       return shift_first_arg ? "rsb" : "sub";
18883
18884     case IOR:
18885       return "orr";
18886
18887     case XOR:
18888       return "eor";
18889
18890     case AND:
18891       return "and";
18892
18893     case ASHIFT:
18894     case ASHIFTRT:
18895     case LSHIFTRT:
18896     case ROTATERT:
18897       return arm_shift_nmem(GET_CODE(op));
18898
18899     default:
18900       gcc_unreachable ();
18901     }
18902 }
18903
18904 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18905    for the operation code.  The returned result should not be overwritten.
18906    OP is the rtx code of the shift.
18907    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18908    shift.  */
18909 static const char *
18910 shift_op (rtx op, HOST_WIDE_INT *amountp)
18911 {
18912   const char * mnem;
18913   enum rtx_code code = GET_CODE (op);
18914
18915   switch (code)
18916     {
18917     case ROTATE:
18918       if (!CONST_INT_P (XEXP (op, 1)))
18919         {
18920           output_operand_lossage ("invalid shift operand");
18921           return NULL;
18922         }
18923
18924       code = ROTATERT;
18925       *amountp = 32 - INTVAL (XEXP (op, 1));
18926       mnem = "ror";
18927       break;
18928
18929     case ASHIFT:
18930     case ASHIFTRT:
18931     case LSHIFTRT:
18932     case ROTATERT:
18933       mnem = arm_shift_nmem(code);
18934       if (CONST_INT_P (XEXP (op, 1)))
18935         {
18936           *amountp = INTVAL (XEXP (op, 1));
18937         }
18938       else if (REG_P (XEXP (op, 1)))
18939         {
18940           *amountp = -1;
18941           return mnem;
18942         }
18943       else
18944         {
18945           output_operand_lossage ("invalid shift operand");
18946           return NULL;
18947         }
18948       break;
18949
18950     case MULT:
18951       /* We never have to worry about the amount being other than a
18952          power of 2, since this case can never be reloaded from a reg.  */
18953       if (!CONST_INT_P (XEXP (op, 1)))
18954         {
18955           output_operand_lossage ("invalid shift operand");
18956           return NULL;
18957         }
18958
18959       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18960
18961       /* Amount must be a power of two.  */
18962       if (*amountp & (*amountp - 1))
18963         {
18964           output_operand_lossage ("invalid shift operand");
18965           return NULL;
18966         }
18967
18968       *amountp = int_log2 (*amountp);
18969       return ARM_LSL_NAME;
18970
18971     default:
18972       output_operand_lossage ("invalid shift operand");
18973       return NULL;
18974     }
18975
18976   /* This is not 100% correct, but follows from the desire to merge
18977      multiplication by a power of 2 with the recognizer for a
18978      shift.  >=32 is not a valid shift for "lsl", so we must try and
18979      output a shift that produces the correct arithmetical result.
18980      Using lsr #32 is identical except for the fact that the carry bit
18981      is not set correctly if we set the flags; but we never use the
18982      carry bit from such an operation, so we can ignore that.  */
18983   if (code == ROTATERT)
18984     /* Rotate is just modulo 32.  */
18985     *amountp &= 31;
18986   else if (*amountp != (*amountp & 31))
18987     {
18988       if (code == ASHIFT)
18989         mnem = "lsr";
18990       *amountp = 32;
18991     }
18992
18993   /* Shifts of 0 are no-ops.  */
18994   if (*amountp == 0)
18995     return NULL;
18996
18997   return mnem;
18998 }
18999
19000 /* Obtain the shift from the POWER of two.  */
19001
19002 static HOST_WIDE_INT
19003 int_log2 (HOST_WIDE_INT power)
19004 {
19005   HOST_WIDE_INT shift = 0;
19006
19007   while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
19008     {
19009       gcc_assert (shift <= 31);
19010       shift++;
19011     }
19012
19013   return shift;
19014 }
19015
19016 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19017    because /bin/as is horribly restrictive.  The judgement about
19018    whether or not each character is 'printable' (and can be output as
19019    is) or not (and must be printed with an octal escape) must be made
19020    with reference to the *host* character set -- the situation is
19021    similar to that discussed in the comments above pp_c_char in
19022    c-pretty-print.c.  */
19023
19024 #define MAX_ASCII_LEN 51
19025
19026 void
19027 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19028 {
19029   int i;
19030   int len_so_far = 0;
19031
19032   fputs ("\t.ascii\t\"", stream);
19033
19034   for (i = 0; i < len; i++)
19035     {
19036       int c = p[i];
19037
19038       if (len_so_far >= MAX_ASCII_LEN)
19039         {
19040           fputs ("\"\n\t.ascii\t\"", stream);
19041           len_so_far = 0;
19042         }
19043
19044       if (ISPRINT (c))
19045         {
19046           if (c == '\\' || c == '\"')
19047             {
19048               putc ('\\', stream);
19049               len_so_far++;
19050             }
19051           putc (c, stream);
19052           len_so_far++;
19053         }
19054       else
19055         {
19056           fprintf (stream, "\\%03o", c);
19057           len_so_far += 4;
19058         }
19059     }
19060
19061   fputs ("\"\n", stream);
19062 }
19063 \f
19064 /* Compute the register save mask for registers 0 through 12
19065    inclusive.  This code is used by arm_compute_save_reg_mask.  */
19066
19067 static unsigned long
19068 arm_compute_save_reg0_reg12_mask (void)
19069 {
19070   unsigned long func_type = arm_current_func_type ();
19071   unsigned long save_reg_mask = 0;
19072   unsigned int reg;
19073
19074   if (IS_INTERRUPT (func_type))
19075     {
19076       unsigned int max_reg;
19077       /* Interrupt functions must not corrupt any registers,
19078          even call clobbered ones.  If this is a leaf function
19079          we can just examine the registers used by the RTL, but
19080          otherwise we have to assume that whatever function is
19081          called might clobber anything, and so we have to save
19082          all the call-clobbered registers as well.  */
19083       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19084         /* FIQ handlers have registers r8 - r12 banked, so
19085            we only need to check r0 - r7, Normal ISRs only
19086            bank r14 and r15, so we must check up to r12.
19087            r13 is the stack pointer which is always preserved,
19088            so we do not need to consider it here.  */
19089         max_reg = 7;
19090       else
19091         max_reg = 12;
19092
19093       for (reg = 0; reg <= max_reg; reg++)
19094         if (df_regs_ever_live_p (reg)
19095             || (! crtl->is_leaf && call_used_regs[reg]))
19096           save_reg_mask |= (1 << reg);
19097
19098       /* Also save the pic base register if necessary.  */
19099       if (flag_pic
19100           && !TARGET_SINGLE_PIC_BASE
19101           && arm_pic_register != INVALID_REGNUM
19102           && crtl->uses_pic_offset_table)
19103         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19104     }
19105   else if (IS_VOLATILE(func_type))
19106     {
19107       /* For noreturn functions we historically omitted register saves
19108          altogether.  However this really messes up debugging.  As a
19109          compromise save just the frame pointers.  Combined with the link
19110          register saved elsewhere this should be sufficient to get
19111          a backtrace.  */
19112       if (frame_pointer_needed)
19113         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19114       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19115         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19116       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19117         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19118     }
19119   else
19120     {
19121       /* In the normal case we only need to save those registers
19122          which are call saved and which are used by this function.  */
19123       for (reg = 0; reg <= 11; reg++)
19124         if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19125           save_reg_mask |= (1 << reg);
19126
19127       /* Handle the frame pointer as a special case.  */
19128       if (frame_pointer_needed)
19129         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19130
19131       /* If we aren't loading the PIC register,
19132          don't stack it even though it may be live.  */
19133       if (flag_pic
19134           && !TARGET_SINGLE_PIC_BASE
19135           && arm_pic_register != INVALID_REGNUM
19136           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19137               || crtl->uses_pic_offset_table))
19138         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19139
19140       /* The prologue will copy SP into R0, so save it.  */
19141       if (IS_STACKALIGN (func_type))
19142         save_reg_mask |= 1;
19143     }
19144
19145   /* Save registers so the exception handler can modify them.  */
19146   if (crtl->calls_eh_return)
19147     {
19148       unsigned int i;
19149
19150       for (i = 0; ; i++)
19151         {
19152           reg = EH_RETURN_DATA_REGNO (i);
19153           if (reg == INVALID_REGNUM)
19154             break;
19155           save_reg_mask |= 1 << reg;
19156         }
19157     }
19158
19159   return save_reg_mask;
19160 }
19161
19162 /* Return true if r3 is live at the start of the function.  */
19163
19164 static bool
19165 arm_r3_live_at_start_p (void)
19166 {
19167   /* Just look at cfg info, which is still close enough to correct at this
19168      point.  This gives false positives for broken functions that might use
19169      uninitialized data that happens to be allocated in r3, but who cares?  */
19170   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19171 }
19172
19173 /* Compute the number of bytes used to store the static chain register on the
19174    stack, above the stack frame.  We need to know this accurately to get the
19175    alignment of the rest of the stack frame correct.  */
19176
19177 static int
19178 arm_compute_static_chain_stack_bytes (void)
19179 {
19180   /* See the defining assertion in arm_expand_prologue.  */
19181   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19182       && IS_NESTED (arm_current_func_type ())
19183       && arm_r3_live_at_start_p ()
19184       && crtl->args.pretend_args_size == 0)
19185     return 4;
19186
19187   return 0;
19188 }
19189
19190 /* Compute a bit mask of which registers need to be
19191    saved on the stack for the current function.
19192    This is used by arm_get_frame_offsets, which may add extra registers.  */
19193
19194 static unsigned long
19195 arm_compute_save_reg_mask (void)
19196 {
19197   unsigned int save_reg_mask = 0;
19198   unsigned long func_type = arm_current_func_type ();
19199   unsigned int reg;
19200
19201   if (IS_NAKED (func_type))
19202     /* This should never really happen.  */
19203     return 0;
19204
19205   /* If we are creating a stack frame, then we must save the frame pointer,
19206      IP (which will hold the old stack pointer), LR and the PC.  */
19207   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19208     save_reg_mask |=
19209       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19210       | (1 << IP_REGNUM)
19211       | (1 << LR_REGNUM)
19212       | (1 << PC_REGNUM);
19213
19214   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19215
19216   /* Decide if we need to save the link register.
19217      Interrupt routines have their own banked link register,
19218      so they never need to save it.
19219      Otherwise if we do not use the link register we do not need to save
19220      it.  If we are pushing other registers onto the stack however, we
19221      can save an instruction in the epilogue by pushing the link register
19222      now and then popping it back into the PC.  This incurs extra memory
19223      accesses though, so we only do it when optimizing for size, and only
19224      if we know that we will not need a fancy return sequence.  */
19225   if (df_regs_ever_live_p (LR_REGNUM)
19226       || (save_reg_mask
19227           && optimize_size
19228           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19229           && !crtl->calls_eh_return))
19230     save_reg_mask |= 1 << LR_REGNUM;
19231
19232   if (cfun->machine->lr_save_eliminated)
19233     save_reg_mask &= ~ (1 << LR_REGNUM);
19234
19235   if (TARGET_REALLY_IWMMXT
19236       && ((bit_count (save_reg_mask)
19237            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19238                            arm_compute_static_chain_stack_bytes())
19239            ) % 2) != 0)
19240     {
19241       /* The total number of registers that are going to be pushed
19242          onto the stack is odd.  We need to ensure that the stack
19243          is 64-bit aligned before we start to save iWMMXt registers,
19244          and also before we start to create locals.  (A local variable
19245          might be a double or long long which we will load/store using
19246          an iWMMXt instruction).  Therefore we need to push another
19247          ARM register, so that the stack will be 64-bit aligned.  We
19248          try to avoid using the arg registers (r0 -r3) as they might be
19249          used to pass values in a tail call.  */
19250       for (reg = 4; reg <= 12; reg++)
19251         if ((save_reg_mask & (1 << reg)) == 0)
19252           break;
19253
19254       if (reg <= 12)
19255         save_reg_mask |= (1 << reg);
19256       else
19257         {
19258           cfun->machine->sibcall_blocked = 1;
19259           save_reg_mask |= (1 << 3);
19260         }
19261     }
19262
19263   /* We may need to push an additional register for use initializing the
19264      PIC base register.  */
19265   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19266       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19267     {
19268       reg = thumb_find_work_register (1 << 4);
19269       if (!call_used_regs[reg])
19270         save_reg_mask |= (1 << reg);
19271     }
19272
19273   return save_reg_mask;
19274 }
19275
19276
19277 /* Compute a bit mask of which registers need to be
19278    saved on the stack for the current function.  */
19279 static unsigned long
19280 thumb1_compute_save_reg_mask (void)
19281 {
19282   unsigned long mask;
19283   unsigned reg;
19284
19285   mask = 0;
19286   for (reg = 0; reg < 12; reg ++)
19287     if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19288       mask |= 1 << reg;
19289
19290   if (flag_pic
19291       && !TARGET_SINGLE_PIC_BASE
19292       && arm_pic_register != INVALID_REGNUM
19293       && crtl->uses_pic_offset_table)
19294     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19295
19296   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19297   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19298     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19299
19300   /* LR will also be pushed if any lo regs are pushed.  */
19301   if (mask & 0xff || thumb_force_lr_save ())
19302     mask |= (1 << LR_REGNUM);
19303
19304   /* Make sure we have a low work register if we need one.
19305      We will need one if we are going to push a high register,
19306      but we are not currently intending to push a low register.  */
19307   if ((mask & 0xff) == 0
19308       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19309     {
19310       /* Use thumb_find_work_register to choose which register
19311          we will use.  If the register is live then we will
19312          have to push it.  Use LAST_LO_REGNUM as our fallback
19313          choice for the register to select.  */
19314       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19315       /* Make sure the register returned by thumb_find_work_register is
19316          not part of the return value.  */
19317       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19318         reg = LAST_LO_REGNUM;
19319
19320       if (! call_used_regs[reg])
19321         mask |= 1 << reg;
19322     }
19323
19324   /* The 504 below is 8 bytes less than 512 because there are two possible
19325      alignment words.  We can't tell here if they will be present or not so we
19326      have to play it safe and assume that they are. */
19327   if ((CALLER_INTERWORKING_SLOT_SIZE +
19328        ROUND_UP_WORD (get_frame_size ()) +
19329        crtl->outgoing_args_size) >= 504)
19330     {
19331       /* This is the same as the code in thumb1_expand_prologue() which
19332          determines which register to use for stack decrement. */
19333       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19334         if (mask & (1 << reg))
19335           break;
19336
19337       if (reg > LAST_LO_REGNUM)
19338         {
19339           /* Make sure we have a register available for stack decrement. */
19340           mask |= 1 << LAST_LO_REGNUM;
19341         }
19342     }
19343
19344   return mask;
19345 }
19346
19347
19348 /* Return the number of bytes required to save VFP registers.  */
19349 static int
19350 arm_get_vfp_saved_size (void)
19351 {
19352   unsigned int regno;
19353   int count;
19354   int saved;
19355
19356   saved = 0;
19357   /* Space for saved VFP registers.  */
19358   if (TARGET_HARD_FLOAT && TARGET_VFP)
19359     {
19360       count = 0;
19361       for (regno = FIRST_VFP_REGNUM;
19362            regno < LAST_VFP_REGNUM;
19363            regno += 2)
19364         {
19365           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19366               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19367             {
19368               if (count > 0)
19369                 {
19370                   /* Workaround ARM10 VFPr1 bug.  */
19371                   if (count == 2 && !arm_arch6)
19372                     count++;
19373                   saved += count * 8;
19374                 }
19375               count = 0;
19376             }
19377           else
19378             count++;
19379         }
19380       if (count > 0)
19381         {
19382           if (count == 2 && !arm_arch6)
19383             count++;
19384           saved += count * 8;
19385         }
19386     }
19387   return saved;
19388 }
19389
19390
19391 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19392    everything bar the final return instruction.  If simple_return is true,
19393    then do not output epilogue, because it has already been emitted in RTL.  */
19394 const char *
19395 output_return_instruction (rtx operand, bool really_return, bool reverse,
19396                            bool simple_return)
19397 {
19398   char conditional[10];
19399   char instr[100];
19400   unsigned reg;
19401   unsigned long live_regs_mask;
19402   unsigned long func_type;
19403   arm_stack_offsets *offsets;
19404
19405   func_type = arm_current_func_type ();
19406
19407   if (IS_NAKED (func_type))
19408     return "";
19409
19410   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19411     {
19412       /* If this function was declared non-returning, and we have
19413          found a tail call, then we have to trust that the called
19414          function won't return.  */
19415       if (really_return)
19416         {
19417           rtx ops[2];
19418
19419           /* Otherwise, trap an attempted return by aborting.  */
19420           ops[0] = operand;
19421           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19422                                        : "abort");
19423           assemble_external_libcall (ops[1]);
19424           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19425         }
19426
19427       return "";
19428     }
19429
19430   gcc_assert (!cfun->calls_alloca || really_return);
19431
19432   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19433
19434   cfun->machine->return_used_this_function = 1;
19435
19436   offsets = arm_get_frame_offsets ();
19437   live_regs_mask = offsets->saved_regs_mask;
19438
19439   if (!simple_return && live_regs_mask)
19440     {
19441       const char * return_reg;
19442
19443       /* If we do not have any special requirements for function exit
19444          (e.g. interworking) then we can load the return address
19445          directly into the PC.  Otherwise we must load it into LR.  */
19446       if (really_return
19447           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19448         return_reg = reg_names[PC_REGNUM];
19449       else
19450         return_reg = reg_names[LR_REGNUM];
19451
19452       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19453         {
19454           /* There are three possible reasons for the IP register
19455              being saved.  1) a stack frame was created, in which case
19456              IP contains the old stack pointer, or 2) an ISR routine
19457              corrupted it, or 3) it was saved to align the stack on
19458              iWMMXt.  In case 1, restore IP into SP, otherwise just
19459              restore IP.  */
19460           if (frame_pointer_needed)
19461             {
19462               live_regs_mask &= ~ (1 << IP_REGNUM);
19463               live_regs_mask |=   (1 << SP_REGNUM);
19464             }
19465           else
19466             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19467         }
19468
19469       /* On some ARM architectures it is faster to use LDR rather than
19470          LDM to load a single register.  On other architectures, the
19471          cost is the same.  In 26 bit mode, or for exception handlers,
19472          we have to use LDM to load the PC so that the CPSR is also
19473          restored.  */
19474       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19475         if (live_regs_mask == (1U << reg))
19476           break;
19477
19478       if (reg <= LAST_ARM_REGNUM
19479           && (reg != LR_REGNUM
19480               || ! really_return
19481               || ! IS_INTERRUPT (func_type)))
19482         {
19483           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19484                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19485         }
19486       else
19487         {
19488           char *p;
19489           int first = 1;
19490
19491           /* Generate the load multiple instruction to restore the
19492              registers.  Note we can get here, even if
19493              frame_pointer_needed is true, but only if sp already
19494              points to the base of the saved core registers.  */
19495           if (live_regs_mask & (1 << SP_REGNUM))
19496             {
19497               unsigned HOST_WIDE_INT stack_adjust;
19498
19499               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19500               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19501
19502               if (stack_adjust && arm_arch5 && TARGET_ARM)
19503                 if (TARGET_UNIFIED_ASM)
19504                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19505                 else
19506                   sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19507               else
19508                 {
19509                   /* If we can't use ldmib (SA110 bug),
19510                      then try to pop r3 instead.  */
19511                   if (stack_adjust)
19512                     live_regs_mask |= 1 << 3;
19513
19514                   if (TARGET_UNIFIED_ASM)
19515                     sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19516                   else
19517                     sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19518                 }
19519             }
19520           else
19521             if (TARGET_UNIFIED_ASM)
19522               sprintf (instr, "pop%s\t{", conditional);
19523             else
19524               sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19525
19526           p = instr + strlen (instr);
19527
19528           for (reg = 0; reg <= SP_REGNUM; reg++)
19529             if (live_regs_mask & (1 << reg))
19530               {
19531                 int l = strlen (reg_names[reg]);
19532
19533                 if (first)
19534                   first = 0;
19535                 else
19536                   {
19537                     memcpy (p, ", ", 2);
19538                     p += 2;
19539                   }
19540
19541                 memcpy (p, "%|", 2);
19542                 memcpy (p + 2, reg_names[reg], l);
19543                 p += l + 2;
19544               }
19545
19546           if (live_regs_mask & (1 << LR_REGNUM))
19547             {
19548               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19549               /* If returning from an interrupt, restore the CPSR.  */
19550               if (IS_INTERRUPT (func_type))
19551                 strcat (p, "^");
19552             }
19553           else
19554             strcpy (p, "}");
19555         }
19556
19557       output_asm_insn (instr, & operand);
19558
19559       /* See if we need to generate an extra instruction to
19560          perform the actual function return.  */
19561       if (really_return
19562           && func_type != ARM_FT_INTERWORKED
19563           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19564         {
19565           /* The return has already been handled
19566              by loading the LR into the PC.  */
19567           return "";
19568         }
19569     }
19570
19571   if (really_return)
19572     {
19573       switch ((int) ARM_FUNC_TYPE (func_type))
19574         {
19575         case ARM_FT_ISR:
19576         case ARM_FT_FIQ:
19577           /* ??? This is wrong for unified assembly syntax.  */
19578           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19579           break;
19580
19581         case ARM_FT_INTERWORKED:
19582           sprintf (instr, "bx%s\t%%|lr", conditional);
19583           break;
19584
19585         case ARM_FT_EXCEPTION:
19586           /* ??? This is wrong for unified assembly syntax.  */
19587           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19588           break;
19589
19590         default:
19591           /* Use bx if it's available.  */
19592           if (arm_arch5 || arm_arch4t)
19593             sprintf (instr, "bx%s\t%%|lr", conditional);
19594           else
19595             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19596           break;
19597         }
19598
19599       output_asm_insn (instr, & operand);
19600     }
19601
19602   return "";
19603 }
19604
19605 /* Write the function name into the code section, directly preceding
19606    the function prologue.
19607
19608    Code will be output similar to this:
19609      t0
19610          .ascii "arm_poke_function_name", 0
19611          .align
19612      t1
19613          .word 0xff000000 + (t1 - t0)
19614      arm_poke_function_name
19615          mov     ip, sp
19616          stmfd   sp!, {fp, ip, lr, pc}
19617          sub     fp, ip, #4
19618
19619    When performing a stack backtrace, code can inspect the value
19620    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19621    at location pc - 12 and the top 8 bits are set, then we know
19622    that there is a function name embedded immediately preceding this
19623    location and has length ((pc[-3]) & 0xff000000).
19624
19625    We assume that pc is declared as a pointer to an unsigned long.
19626
19627    It is of no benefit to output the function name if we are assembling
19628    a leaf function.  These function types will not contain a stack
19629    backtrace structure, therefore it is not possible to determine the
19630    function name.  */
19631 void
19632 arm_poke_function_name (FILE *stream, const char *name)
19633 {
19634   unsigned long alignlength;
19635   unsigned long length;
19636   rtx           x;
19637
19638   length      = strlen (name) + 1;
19639   alignlength = ROUND_UP_WORD (length);
19640
19641   ASM_OUTPUT_ASCII (stream, name, length);
19642   ASM_OUTPUT_ALIGN (stream, 2);
19643   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19644   assemble_aligned_integer (UNITS_PER_WORD, x);
19645 }
19646
19647 /* Place some comments into the assembler stream
19648    describing the current function.  */
19649 static void
19650 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19651 {
19652   unsigned long func_type;
19653
19654   /* ??? Do we want to print some of the below anyway?  */
19655   if (TARGET_THUMB1)
19656     return;
19657
19658   /* Sanity check.  */
19659   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19660
19661   func_type = arm_current_func_type ();
19662
19663   switch ((int) ARM_FUNC_TYPE (func_type))
19664     {
19665     default:
19666     case ARM_FT_NORMAL:
19667       break;
19668     case ARM_FT_INTERWORKED:
19669       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19670       break;
19671     case ARM_FT_ISR:
19672       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19673       break;
19674     case ARM_FT_FIQ:
19675       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19676       break;
19677     case ARM_FT_EXCEPTION:
19678       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19679       break;
19680     }
19681
19682   if (IS_NAKED (func_type))
19683     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19684
19685   if (IS_VOLATILE (func_type))
19686     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19687
19688   if (IS_NESTED (func_type))
19689     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19690   if (IS_STACKALIGN (func_type))
19691     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19692
19693   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19694                crtl->args.size,
19695                crtl->args.pretend_args_size, frame_size);
19696
19697   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19698                frame_pointer_needed,
19699                cfun->machine->uses_anonymous_args);
19700
19701   if (cfun->machine->lr_save_eliminated)
19702     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19703
19704   if (crtl->calls_eh_return)
19705     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19706
19707 }
19708
19709 static void
19710 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19711                               HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19712 {
19713   arm_stack_offsets *offsets;
19714
19715   if (TARGET_THUMB1)
19716     {
19717       int regno;
19718
19719       /* Emit any call-via-reg trampolines that are needed for v4t support
19720          of call_reg and call_value_reg type insns.  */
19721       for (regno = 0; regno < LR_REGNUM; regno++)
19722         {
19723           rtx label = cfun->machine->call_via[regno];
19724
19725           if (label != NULL)
19726             {
19727               switch_to_section (function_section (current_function_decl));
19728               targetm.asm_out.internal_label (asm_out_file, "L",
19729                                               CODE_LABEL_NUMBER (label));
19730               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19731             }
19732         }
19733
19734       /* ??? Probably not safe to set this here, since it assumes that a
19735          function will be emitted as assembly immediately after we generate
19736          RTL for it.  This does not happen for inline functions.  */
19737       cfun->machine->return_used_this_function = 0;
19738     }
19739   else /* TARGET_32BIT */
19740     {
19741       /* We need to take into account any stack-frame rounding.  */
19742       offsets = arm_get_frame_offsets ();
19743
19744       gcc_assert (!use_return_insn (FALSE, NULL)
19745                   || (cfun->machine->return_used_this_function != 0)
19746                   || offsets->saved_regs == offsets->outgoing_args
19747                   || frame_pointer_needed);
19748     }
19749 }
19750
19751 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19752    STR and STRD.  If an even number of registers are being pushed, one
19753    or more STRD patterns are created for each register pair.  If an
19754    odd number of registers are pushed, emit an initial STR followed by
19755    as many STRD instructions as are needed.  This works best when the
19756    stack is initially 64-bit aligned (the normal case), since it
19757    ensures that each STRD is also 64-bit aligned.  */
19758 static void
19759 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19760 {
19761   int num_regs = 0;
19762   int i;
19763   int regno;
19764   rtx par = NULL_RTX;
19765   rtx dwarf = NULL_RTX;
19766   rtx tmp;
19767   bool first = true;
19768
19769   num_regs = bit_count (saved_regs_mask);
19770
19771   /* Must be at least one register to save, and can't save SP or PC.  */
19772   gcc_assert (num_regs > 0 && num_regs <= 14);
19773   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19774   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19775
19776   /* Create sequence for DWARF info.  All the frame-related data for
19777      debugging is held in this wrapper.  */
19778   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19779
19780   /* Describe the stack adjustment.  */
19781   tmp = gen_rtx_SET (VOIDmode,
19782                       stack_pointer_rtx,
19783                       plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19784   RTX_FRAME_RELATED_P (tmp) = 1;
19785   XVECEXP (dwarf, 0, 0) = tmp;
19786
19787   /* Find the first register.  */
19788   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19789     ;
19790
19791   i = 0;
19792
19793   /* If there's an odd number of registers to push.  Start off by
19794      pushing a single register.  This ensures that subsequent strd
19795      operations are dword aligned (assuming that SP was originally
19796      64-bit aligned).  */
19797   if ((num_regs & 1) != 0)
19798     {
19799       rtx reg, mem, insn;
19800
19801       reg = gen_rtx_REG (SImode, regno);
19802       if (num_regs == 1)
19803         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19804                                                      stack_pointer_rtx));
19805       else
19806         mem = gen_frame_mem (Pmode,
19807                              gen_rtx_PRE_MODIFY
19808                              (Pmode, stack_pointer_rtx,
19809                               plus_constant (Pmode, stack_pointer_rtx,
19810                                              -4 * num_regs)));
19811
19812       tmp = gen_rtx_SET (VOIDmode, mem, reg);
19813       RTX_FRAME_RELATED_P (tmp) = 1;
19814       insn = emit_insn (tmp);
19815       RTX_FRAME_RELATED_P (insn) = 1;
19816       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19817       tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19818                          reg);
19819       RTX_FRAME_RELATED_P (tmp) = 1;
19820       i++;
19821       regno++;
19822       XVECEXP (dwarf, 0, i) = tmp;
19823       first = false;
19824     }
19825
19826   while (i < num_regs)
19827     if (saved_regs_mask & (1 << regno))
19828       {
19829         rtx reg1, reg2, mem1, mem2;
19830         rtx tmp0, tmp1, tmp2;
19831         int regno2;
19832
19833         /* Find the register to pair with this one.  */
19834         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19835              regno2++)
19836           ;
19837
19838         reg1 = gen_rtx_REG (SImode, regno);
19839         reg2 = gen_rtx_REG (SImode, regno2);
19840
19841         if (first)
19842           {
19843             rtx insn;
19844
19845             first = false;
19846             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19847                                                         stack_pointer_rtx,
19848                                                         -4 * num_regs));
19849             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19850                                                         stack_pointer_rtx,
19851                                                         -4 * (num_regs - 1)));
19852             tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19853                                 plus_constant (Pmode, stack_pointer_rtx,
19854                                                -4 * (num_regs)));
19855             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19856             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19857             RTX_FRAME_RELATED_P (tmp0) = 1;
19858             RTX_FRAME_RELATED_P (tmp1) = 1;
19859             RTX_FRAME_RELATED_P (tmp2) = 1;
19860             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19861             XVECEXP (par, 0, 0) = tmp0;
19862             XVECEXP (par, 0, 1) = tmp1;
19863             XVECEXP (par, 0, 2) = tmp2;
19864             insn = emit_insn (par);
19865             RTX_FRAME_RELATED_P (insn) = 1;
19866             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19867           }
19868         else
19869           {
19870             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19871                                                         stack_pointer_rtx,
19872                                                         4 * i));
19873             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19874                                                         stack_pointer_rtx,
19875                                                         4 * (i + 1)));
19876             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19877             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19878             RTX_FRAME_RELATED_P (tmp1) = 1;
19879             RTX_FRAME_RELATED_P (tmp2) = 1;
19880             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19881             XVECEXP (par, 0, 0) = tmp1;
19882             XVECEXP (par, 0, 1) = tmp2;
19883             emit_insn (par);
19884           }
19885
19886         /* Create unwind information.  This is an approximation.  */
19887         tmp1 = gen_rtx_SET (VOIDmode,
19888                             gen_frame_mem (Pmode,
19889                                            plus_constant (Pmode,
19890                                                           stack_pointer_rtx,
19891                                                           4 * i)),
19892                             reg1);
19893         tmp2 = gen_rtx_SET (VOIDmode,
19894                             gen_frame_mem (Pmode,
19895                                            plus_constant (Pmode,
19896                                                           stack_pointer_rtx,
19897                                                           4 * (i + 1))),
19898                             reg2);
19899
19900         RTX_FRAME_RELATED_P (tmp1) = 1;
19901         RTX_FRAME_RELATED_P (tmp2) = 1;
19902         XVECEXP (dwarf, 0, i + 1) = tmp1;
19903         XVECEXP (dwarf, 0, i + 2) = tmp2;
19904         i += 2;
19905         regno = regno2 + 1;
19906       }
19907     else
19908       regno++;
19909
19910   return;
19911 }
19912
19913 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19914    whenever possible, otherwise it emits single-word stores.  The first store
19915    also allocates stack space for all saved registers, using writeback with
19916    post-addressing mode.  All other stores use offset addressing.  If no STRD
19917    can be emitted, this function emits a sequence of single-word stores,
19918    and not an STM as before, because single-word stores provide more freedom
19919    scheduling and can be turned into an STM by peephole optimizations.  */
19920 static void
19921 arm_emit_strd_push (unsigned long saved_regs_mask)
19922 {
19923   int num_regs = 0;
19924   int i, j, dwarf_index  = 0;
19925   int offset = 0;
19926   rtx dwarf = NULL_RTX;
19927   rtx insn = NULL_RTX;
19928   rtx tmp, mem;
19929
19930   /* TODO: A more efficient code can be emitted by changing the
19931      layout, e.g., first push all pairs that can use STRD to keep the
19932      stack aligned, and then push all other registers.  */
19933   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19934     if (saved_regs_mask & (1 << i))
19935       num_regs++;
19936
19937   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19938   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19939   gcc_assert (num_regs > 0);
19940
19941   /* Create sequence for DWARF info.  */
19942   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19943
19944   /* For dwarf info, we generate explicit stack update.  */
19945   tmp = gen_rtx_SET (VOIDmode,
19946                      stack_pointer_rtx,
19947                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19948   RTX_FRAME_RELATED_P (tmp) = 1;
19949   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19950
19951   /* Save registers.  */
19952   offset = - 4 * num_regs;
19953   j = 0;
19954   while (j <= LAST_ARM_REGNUM)
19955     if (saved_regs_mask & (1 << j))
19956       {
19957         if ((j % 2 == 0)
19958             && (saved_regs_mask & (1 << (j + 1))))
19959           {
19960             /* Current register and previous register form register pair for
19961                which STRD can be generated.  */
19962             if (offset < 0)
19963               {
19964                 /* Allocate stack space for all saved registers.  */
19965                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19966                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19967                 mem = gen_frame_mem (DImode, tmp);
19968                 offset = 0;
19969               }
19970             else if (offset > 0)
19971               mem = gen_frame_mem (DImode,
19972                                    plus_constant (Pmode,
19973                                                   stack_pointer_rtx,
19974                                                   offset));
19975             else
19976               mem = gen_frame_mem (DImode, stack_pointer_rtx);
19977
19978             tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19979             RTX_FRAME_RELATED_P (tmp) = 1;
19980             tmp = emit_insn (tmp);
19981
19982             /* Record the first store insn.  */
19983             if (dwarf_index == 1)
19984               insn = tmp;
19985
19986             /* Generate dwarf info.  */
19987             mem = gen_frame_mem (SImode,
19988                                  plus_constant (Pmode,
19989                                                 stack_pointer_rtx,
19990                                                 offset));
19991             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19992             RTX_FRAME_RELATED_P (tmp) = 1;
19993             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19994
19995             mem = gen_frame_mem (SImode,
19996                                  plus_constant (Pmode,
19997                                                 stack_pointer_rtx,
19998                                                 offset + 4));
19999             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
20000             RTX_FRAME_RELATED_P (tmp) = 1;
20001             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20002
20003             offset += 8;
20004             j += 2;
20005           }
20006         else
20007           {
20008             /* Emit a single word store.  */
20009             if (offset < 0)
20010               {
20011                 /* Allocate stack space for all saved registers.  */
20012                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20013                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20014                 mem = gen_frame_mem (SImode, tmp);
20015                 offset = 0;
20016               }
20017             else if (offset > 0)
20018               mem = gen_frame_mem (SImode,
20019                                    plus_constant (Pmode,
20020                                                   stack_pointer_rtx,
20021                                                   offset));
20022             else
20023               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20024
20025             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20026             RTX_FRAME_RELATED_P (tmp) = 1;
20027             tmp = emit_insn (tmp);
20028
20029             /* Record the first store insn.  */
20030             if (dwarf_index == 1)
20031               insn = tmp;
20032
20033             /* Generate dwarf info.  */
20034             mem = gen_frame_mem (SImode,
20035                                  plus_constant(Pmode,
20036                                                stack_pointer_rtx,
20037                                                offset));
20038             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20039             RTX_FRAME_RELATED_P (tmp) = 1;
20040             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20041
20042             offset += 4;
20043             j += 1;
20044           }
20045       }
20046     else
20047       j++;
20048
20049   /* Attach dwarf info to the first insn we generate.  */
20050   gcc_assert (insn != NULL_RTX);
20051   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20052   RTX_FRAME_RELATED_P (insn) = 1;
20053 }
20054
20055 /* Generate and emit an insn that we will recognize as a push_multi.
20056    Unfortunately, since this insn does not reflect very well the actual
20057    semantics of the operation, we need to annotate the insn for the benefit
20058    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20059    MASK for registers that should be annotated for DWARF2 frame unwind
20060    information.  */
20061 static rtx
20062 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20063 {
20064   int num_regs = 0;
20065   int num_dwarf_regs = 0;
20066   int i, j;
20067   rtx par;
20068   rtx dwarf;
20069   int dwarf_par_index;
20070   rtx tmp, reg;
20071
20072   /* We don't record the PC in the dwarf frame information.  */
20073   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20074
20075   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20076     {
20077       if (mask & (1 << i))
20078         num_regs++;
20079       if (dwarf_regs_mask & (1 << i))
20080         num_dwarf_regs++;
20081     }
20082
20083   gcc_assert (num_regs && num_regs <= 16);
20084   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20085
20086   /* For the body of the insn we are going to generate an UNSPEC in
20087      parallel with several USEs.  This allows the insn to be recognized
20088      by the push_multi pattern in the arm.md file.
20089
20090      The body of the insn looks something like this:
20091
20092        (parallel [
20093            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20094                                         (const_int:SI <num>)))
20095                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20096            (use (reg:SI XX))
20097            (use (reg:SI YY))
20098            ...
20099         ])
20100
20101      For the frame note however, we try to be more explicit and actually
20102      show each register being stored into the stack frame, plus a (single)
20103      decrement of the stack pointer.  We do it this way in order to be
20104      friendly to the stack unwinding code, which only wants to see a single
20105      stack decrement per instruction.  The RTL we generate for the note looks
20106      something like this:
20107
20108       (sequence [
20109            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20110            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20111            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20112            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20113            ...
20114         ])
20115
20116      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20117      instead we'd have a parallel expression detailing all
20118      the stores to the various memory addresses so that debug
20119      information is more up-to-date. Remember however while writing
20120      this to take care of the constraints with the push instruction.
20121
20122      Note also that this has to be taken care of for the VFP registers.
20123
20124      For more see PR43399.  */
20125
20126   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20127   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20128   dwarf_par_index = 1;
20129
20130   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20131     {
20132       if (mask & (1 << i))
20133         {
20134           reg = gen_rtx_REG (SImode, i);
20135
20136           XVECEXP (par, 0, 0)
20137             = gen_rtx_SET (VOIDmode,
20138                            gen_frame_mem
20139                            (BLKmode,
20140                             gen_rtx_PRE_MODIFY (Pmode,
20141                                                 stack_pointer_rtx,
20142                                                 plus_constant
20143                                                 (Pmode, stack_pointer_rtx,
20144                                                  -4 * num_regs))
20145                             ),
20146                            gen_rtx_UNSPEC (BLKmode,
20147                                            gen_rtvec (1, reg),
20148                                            UNSPEC_PUSH_MULT));
20149
20150           if (dwarf_regs_mask & (1 << i))
20151             {
20152               tmp = gen_rtx_SET (VOIDmode,
20153                                  gen_frame_mem (SImode, stack_pointer_rtx),
20154                                  reg);
20155               RTX_FRAME_RELATED_P (tmp) = 1;
20156               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20157             }
20158
20159           break;
20160         }
20161     }
20162
20163   for (j = 1, i++; j < num_regs; i++)
20164     {
20165       if (mask & (1 << i))
20166         {
20167           reg = gen_rtx_REG (SImode, i);
20168
20169           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20170
20171           if (dwarf_regs_mask & (1 << i))
20172             {
20173               tmp
20174                 = gen_rtx_SET (VOIDmode,
20175                                gen_frame_mem
20176                                (SImode,
20177                                 plus_constant (Pmode, stack_pointer_rtx,
20178                                                4 * j)),
20179                                reg);
20180               RTX_FRAME_RELATED_P (tmp) = 1;
20181               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20182             }
20183
20184           j++;
20185         }
20186     }
20187
20188   par = emit_insn (par);
20189
20190   tmp = gen_rtx_SET (VOIDmode,
20191                      stack_pointer_rtx,
20192                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20193   RTX_FRAME_RELATED_P (tmp) = 1;
20194   XVECEXP (dwarf, 0, 0) = tmp;
20195
20196   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20197
20198   return par;
20199 }
20200
20201 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20202    SIZE is the offset to be adjusted.
20203    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20204 static void
20205 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20206 {
20207   rtx dwarf;
20208
20209   RTX_FRAME_RELATED_P (insn) = 1;
20210   dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20211   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20212 }
20213
20214 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20215    SAVED_REGS_MASK shows which registers need to be restored.
20216
20217    Unfortunately, since this insn does not reflect very well the actual
20218    semantics of the operation, we need to annotate the insn for the benefit
20219    of DWARF2 frame unwind information.  */
20220 static void
20221 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20222 {
20223   int num_regs = 0;
20224   int i, j;
20225   rtx par;
20226   rtx dwarf = NULL_RTX;
20227   rtx tmp, reg;
20228   bool return_in_pc;
20229   int offset_adj;
20230   int emit_update;
20231
20232   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20233   offset_adj = return_in_pc ? 1 : 0;
20234   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20235     if (saved_regs_mask & (1 << i))
20236       num_regs++;
20237
20238   gcc_assert (num_regs && num_regs <= 16);
20239
20240   /* If SP is in reglist, then we don't emit SP update insn.  */
20241   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20242
20243   /* The parallel needs to hold num_regs SETs
20244      and one SET for the stack update.  */
20245   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20246
20247   if (return_in_pc)
20248     {
20249       tmp = ret_rtx;
20250       XVECEXP (par, 0, 0) = tmp;
20251     }
20252
20253   if (emit_update)
20254     {
20255       /* Increment the stack pointer, based on there being
20256          num_regs 4-byte registers to restore.  */
20257       tmp = gen_rtx_SET (VOIDmode,
20258                          stack_pointer_rtx,
20259                          plus_constant (Pmode,
20260                                         stack_pointer_rtx,
20261                                         4 * num_regs));
20262       RTX_FRAME_RELATED_P (tmp) = 1;
20263       XVECEXP (par, 0, offset_adj) = tmp;
20264     }
20265
20266   /* Now restore every reg, which may include PC.  */
20267   for (j = 0, i = 0; j < num_regs; i++)
20268     if (saved_regs_mask & (1 << i))
20269       {
20270         reg = gen_rtx_REG (SImode, i);
20271         if ((num_regs == 1) && emit_update && !return_in_pc)
20272           {
20273             /* Emit single load with writeback.  */
20274             tmp = gen_frame_mem (SImode,
20275                                  gen_rtx_POST_INC (Pmode,
20276                                                    stack_pointer_rtx));
20277             tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20278             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20279             return;
20280           }
20281
20282         tmp = gen_rtx_SET (VOIDmode,
20283                            reg,
20284                            gen_frame_mem
20285                            (SImode,
20286                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20287         RTX_FRAME_RELATED_P (tmp) = 1;
20288         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20289
20290         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20291            should not have PC, skip PC.  */
20292         if (i != PC_REGNUM)
20293           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20294
20295         j++;
20296       }
20297
20298   if (return_in_pc)
20299     par = emit_jump_insn (par);
20300   else
20301     par = emit_insn (par);
20302
20303   REG_NOTES (par) = dwarf;
20304   if (!return_in_pc)
20305     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20306                                  stack_pointer_rtx, stack_pointer_rtx);
20307 }
20308
20309 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20310    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20311
20312    Unfortunately, since this insn does not reflect very well the actual
20313    semantics of the operation, we need to annotate the insn for the benefit
20314    of DWARF2 frame unwind information.  */
20315 static void
20316 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20317 {
20318   int i, j;
20319   rtx par;
20320   rtx dwarf = NULL_RTX;
20321   rtx tmp, reg;
20322
20323   gcc_assert (num_regs && num_regs <= 32);
20324
20325     /* Workaround ARM10 VFPr1 bug.  */
20326   if (num_regs == 2 && !arm_arch6)
20327     {
20328       if (first_reg == 15)
20329         first_reg--;
20330
20331       num_regs++;
20332     }
20333
20334   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20335      there could be up to 32 D-registers to restore.
20336      If there are more than 16 D-registers, make two recursive calls,
20337      each of which emits one pop_multi instruction.  */
20338   if (num_regs > 16)
20339     {
20340       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20341       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20342       return;
20343     }
20344
20345   /* The parallel needs to hold num_regs SETs
20346      and one SET for the stack update.  */
20347   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20348
20349   /* Increment the stack pointer, based on there being
20350      num_regs 8-byte registers to restore.  */
20351   tmp = gen_rtx_SET (VOIDmode,
20352                      base_reg,
20353                      plus_constant (Pmode, base_reg, 8 * num_regs));
20354   RTX_FRAME_RELATED_P (tmp) = 1;
20355   XVECEXP (par, 0, 0) = tmp;
20356
20357   /* Now show every reg that will be restored, using a SET for each.  */
20358   for (j = 0, i=first_reg; j < num_regs; i += 2)
20359     {
20360       reg = gen_rtx_REG (DFmode, i);
20361
20362       tmp = gen_rtx_SET (VOIDmode,
20363                          reg,
20364                          gen_frame_mem
20365                          (DFmode,
20366                           plus_constant (Pmode, base_reg, 8 * j)));
20367       RTX_FRAME_RELATED_P (tmp) = 1;
20368       XVECEXP (par, 0, j + 1) = tmp;
20369
20370       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20371
20372       j++;
20373     }
20374
20375   par = emit_insn (par);
20376   REG_NOTES (par) = dwarf;
20377
20378   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20379   if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20380     {
20381       RTX_FRAME_RELATED_P (par) = 1;
20382       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20383     }
20384   else
20385     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20386                                  base_reg, base_reg);
20387 }
20388
20389 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20390    number of registers are being popped, multiple LDRD patterns are created for
20391    all register pairs.  If odd number of registers are popped, last register is
20392    loaded by using LDR pattern.  */
20393 static void
20394 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20395 {
20396   int num_regs = 0;
20397   int i, j;
20398   rtx par = NULL_RTX;
20399   rtx dwarf = NULL_RTX;
20400   rtx tmp, reg, tmp1;
20401   bool return_in_pc;
20402
20403   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20404   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20405     if (saved_regs_mask & (1 << i))
20406       num_regs++;
20407
20408   gcc_assert (num_regs && num_regs <= 16);
20409
20410   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20411      to be popped.  So, if num_regs is even, now it will become odd,
20412      and we can generate pop with PC.  If num_regs is odd, it will be
20413      even now, and ldr with return can be generated for PC.  */
20414   if (return_in_pc)
20415     num_regs--;
20416
20417   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20418
20419   /* Var j iterates over all the registers to gather all the registers in
20420      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20421      A PARALLEL RTX of register-pair is created here, so that pattern for
20422      LDRD can be matched.  As PC is always last register to be popped, and
20423      we have already decremented num_regs if PC, we don't have to worry
20424      about PC in this loop.  */
20425   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20426     if (saved_regs_mask & (1 << j))
20427       {
20428         /* Create RTX for memory load.  */
20429         reg = gen_rtx_REG (SImode, j);
20430         tmp = gen_rtx_SET (SImode,
20431                            reg,
20432                            gen_frame_mem (SImode,
20433                                plus_constant (Pmode,
20434                                               stack_pointer_rtx, 4 * i)));
20435         RTX_FRAME_RELATED_P (tmp) = 1;
20436
20437         if (i % 2 == 0)
20438           {
20439             /* When saved-register index (i) is even, the RTX to be emitted is
20440                yet to be created.  Hence create it first.  The LDRD pattern we
20441                are generating is :
20442                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20443                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20444                where target registers need not be consecutive.  */
20445             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20446             dwarf = NULL_RTX;
20447           }
20448
20449         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20450            added as 0th element and if i is odd, reg_i is added as 1st element
20451            of LDRD pattern shown above.  */
20452         XVECEXP (par, 0, (i % 2)) = tmp;
20453         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20454
20455         if ((i % 2) == 1)
20456           {
20457             /* When saved-register index (i) is odd, RTXs for both the registers
20458                to be loaded are generated in above given LDRD pattern, and the
20459                pattern can be emitted now.  */
20460             par = emit_insn (par);
20461             REG_NOTES (par) = dwarf;
20462             RTX_FRAME_RELATED_P (par) = 1;
20463           }
20464
20465         i++;
20466       }
20467
20468   /* If the number of registers pushed is odd AND return_in_pc is false OR
20469      number of registers are even AND return_in_pc is true, last register is
20470      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20471      then LDR with post increment.  */
20472
20473   /* Increment the stack pointer, based on there being
20474      num_regs 4-byte registers to restore.  */
20475   tmp = gen_rtx_SET (VOIDmode,
20476                      stack_pointer_rtx,
20477                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20478   RTX_FRAME_RELATED_P (tmp) = 1;
20479   tmp = emit_insn (tmp);
20480   if (!return_in_pc)
20481     {
20482       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20483                                    stack_pointer_rtx, stack_pointer_rtx);
20484     }
20485
20486   dwarf = NULL_RTX;
20487
20488   if (((num_regs % 2) == 1 && !return_in_pc)
20489       || ((num_regs % 2) == 0 && return_in_pc))
20490     {
20491       /* Scan for the single register to be popped.  Skip until the saved
20492          register is found.  */
20493       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20494
20495       /* Gen LDR with post increment here.  */
20496       tmp1 = gen_rtx_MEM (SImode,
20497                           gen_rtx_POST_INC (SImode,
20498                                             stack_pointer_rtx));
20499       set_mem_alias_set (tmp1, get_frame_alias_set ());
20500
20501       reg = gen_rtx_REG (SImode, j);
20502       tmp = gen_rtx_SET (SImode, reg, tmp1);
20503       RTX_FRAME_RELATED_P (tmp) = 1;
20504       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20505
20506       if (return_in_pc)
20507         {
20508           /* If return_in_pc, j must be PC_REGNUM.  */
20509           gcc_assert (j == PC_REGNUM);
20510           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20511           XVECEXP (par, 0, 0) = ret_rtx;
20512           XVECEXP (par, 0, 1) = tmp;
20513           par = emit_jump_insn (par);
20514         }
20515       else
20516         {
20517           par = emit_insn (tmp);
20518           REG_NOTES (par) = dwarf;
20519           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20520                                        stack_pointer_rtx, stack_pointer_rtx);
20521         }
20522
20523     }
20524   else if ((num_regs % 2) == 1 && return_in_pc)
20525     {
20526       /* There are 2 registers to be popped.  So, generate the pattern
20527          pop_multiple_with_stack_update_and_return to pop in PC.  */
20528       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20529     }
20530
20531   return;
20532 }
20533
20534 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20535    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20536    offset addressing and then generates one separate stack udpate. This provides
20537    more scheduling freedom, compared to writeback on every load.  However,
20538    if the function returns using load into PC directly
20539    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20540    before the last load.  TODO: Add a peephole optimization to recognize
20541    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20542    peephole optimization to merge the load at stack-offset zero
20543    with the stack update instruction using load with writeback
20544    in post-index addressing mode.  */
20545 static void
20546 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20547 {
20548   int j = 0;
20549   int offset = 0;
20550   rtx par = NULL_RTX;
20551   rtx dwarf = NULL_RTX;
20552   rtx tmp, mem;
20553
20554   /* Restore saved registers.  */
20555   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20556   j = 0;
20557   while (j <= LAST_ARM_REGNUM)
20558     if (saved_regs_mask & (1 << j))
20559       {
20560         if ((j % 2) == 0
20561             && (saved_regs_mask & (1 << (j + 1)))
20562             && (j + 1) != PC_REGNUM)
20563           {
20564             /* Current register and next register form register pair for which
20565                LDRD can be generated. PC is always the last register popped, and
20566                we handle it separately.  */
20567             if (offset > 0)
20568               mem = gen_frame_mem (DImode,
20569                                    plus_constant (Pmode,
20570                                                   stack_pointer_rtx,
20571                                                   offset));
20572             else
20573               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20574
20575             tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20576             tmp = emit_insn (tmp);
20577             RTX_FRAME_RELATED_P (tmp) = 1;
20578
20579             /* Generate dwarf info.  */
20580
20581             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20582                                     gen_rtx_REG (SImode, j),
20583                                     NULL_RTX);
20584             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20585                                     gen_rtx_REG (SImode, j + 1),
20586                                     dwarf);
20587
20588             REG_NOTES (tmp) = dwarf;
20589
20590             offset += 8;
20591             j += 2;
20592           }
20593         else if (j != PC_REGNUM)
20594           {
20595             /* Emit a single word load.  */
20596             if (offset > 0)
20597               mem = gen_frame_mem (SImode,
20598                                    plus_constant (Pmode,
20599                                                   stack_pointer_rtx,
20600                                                   offset));
20601             else
20602               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20603
20604             tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20605             tmp = emit_insn (tmp);
20606             RTX_FRAME_RELATED_P (tmp) = 1;
20607
20608             /* Generate dwarf info.  */
20609             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20610                                               gen_rtx_REG (SImode, j),
20611                                               NULL_RTX);
20612
20613             offset += 4;
20614             j += 1;
20615           }
20616         else /* j == PC_REGNUM */
20617           j++;
20618       }
20619     else
20620       j++;
20621
20622   /* Update the stack.  */
20623   if (offset > 0)
20624     {
20625       tmp = gen_rtx_SET (Pmode,
20626                          stack_pointer_rtx,
20627                          plus_constant (Pmode,
20628                                         stack_pointer_rtx,
20629                                         offset));
20630       tmp = emit_insn (tmp);
20631       arm_add_cfa_adjust_cfa_note (tmp, offset,
20632                                    stack_pointer_rtx, stack_pointer_rtx);
20633       offset = 0;
20634     }
20635
20636   if (saved_regs_mask & (1 << PC_REGNUM))
20637     {
20638       /* Only PC is to be popped.  */
20639       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20640       XVECEXP (par, 0, 0) = ret_rtx;
20641       tmp = gen_rtx_SET (SImode,
20642                          gen_rtx_REG (SImode, PC_REGNUM),
20643                          gen_frame_mem (SImode,
20644                                         gen_rtx_POST_INC (SImode,
20645                                                           stack_pointer_rtx)));
20646       RTX_FRAME_RELATED_P (tmp) = 1;
20647       XVECEXP (par, 0, 1) = tmp;
20648       par = emit_jump_insn (par);
20649
20650       /* Generate dwarf info.  */
20651       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20652                               gen_rtx_REG (SImode, PC_REGNUM),
20653                               NULL_RTX);
20654       REG_NOTES (par) = dwarf;
20655       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20656                                    stack_pointer_rtx, stack_pointer_rtx);
20657     }
20658 }
20659
20660 /* Calculate the size of the return value that is passed in registers.  */
20661 static unsigned
20662 arm_size_return_regs (void)
20663 {
20664   machine_mode mode;
20665
20666   if (crtl->return_rtx != 0)
20667     mode = GET_MODE (crtl->return_rtx);
20668   else
20669     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20670
20671   return GET_MODE_SIZE (mode);
20672 }
20673
20674 /* Return true if the current function needs to save/restore LR.  */
20675 static bool
20676 thumb_force_lr_save (void)
20677 {
20678   return !cfun->machine->lr_save_eliminated
20679          && (!leaf_function_p ()
20680              || thumb_far_jump_used_p ()
20681              || df_regs_ever_live_p (LR_REGNUM));
20682 }
20683
20684 /* We do not know if r3 will be available because
20685    we do have an indirect tailcall happening in this
20686    particular case.  */
20687 static bool
20688 is_indirect_tailcall_p (rtx call)
20689 {
20690   rtx pat = PATTERN (call);
20691
20692   /* Indirect tail call.  */
20693   pat = XVECEXP (pat, 0, 0);
20694   if (GET_CODE (pat) == SET)
20695     pat = SET_SRC (pat);
20696
20697   pat = XEXP (XEXP (pat, 0), 0);
20698   return REG_P (pat);
20699 }
20700
20701 /* Return true if r3 is used by any of the tail call insns in the
20702    current function.  */
20703 static bool
20704 any_sibcall_could_use_r3 (void)
20705 {
20706   edge_iterator ei;
20707   edge e;
20708
20709   if (!crtl->tail_call_emit)
20710     return false;
20711   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20712     if (e->flags & EDGE_SIBCALL)
20713       {
20714         rtx call = BB_END (e->src);
20715         if (!CALL_P (call))
20716           call = prev_nonnote_nondebug_insn (call);
20717         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20718         if (find_regno_fusage (call, USE, 3)
20719             || is_indirect_tailcall_p (call))
20720           return true;
20721       }
20722   return false;
20723 }
20724
20725
20726 /* Compute the distance from register FROM to register TO.
20727    These can be the arg pointer (26), the soft frame pointer (25),
20728    the stack pointer (13) or the hard frame pointer (11).
20729    In thumb mode r7 is used as the soft frame pointer, if needed.
20730    Typical stack layout looks like this:
20731
20732        old stack pointer -> |    |
20733                              ----
20734                             |    | \
20735                             |    |   saved arguments for
20736                             |    |   vararg functions
20737                             |    | /
20738                               --
20739    hard FP & arg pointer -> |    | \
20740                             |    |   stack
20741                             |    |   frame
20742                             |    | /
20743                               --
20744                             |    | \
20745                             |    |   call saved
20746                             |    |   registers
20747       soft frame pointer -> |    | /
20748                               --
20749                             |    | \
20750                             |    |   local
20751                             |    |   variables
20752      locals base pointer -> |    | /
20753                               --
20754                             |    | \
20755                             |    |   outgoing
20756                             |    |   arguments
20757    current stack pointer -> |    | /
20758                               --
20759
20760   For a given function some or all of these stack components
20761   may not be needed, giving rise to the possibility of
20762   eliminating some of the registers.
20763
20764   The values returned by this function must reflect the behavior
20765   of arm_expand_prologue() and arm_compute_save_reg_mask().
20766
20767   The sign of the number returned reflects the direction of stack
20768   growth, so the values are positive for all eliminations except
20769   from the soft frame pointer to the hard frame pointer.
20770
20771   SFP may point just inside the local variables block to ensure correct
20772   alignment.  */
20773
20774
20775 /* Calculate stack offsets.  These are used to calculate register elimination
20776    offsets and in prologue/epilogue code.  Also calculates which registers
20777    should be saved.  */
20778
20779 static arm_stack_offsets *
20780 arm_get_frame_offsets (void)
20781 {
20782   struct arm_stack_offsets *offsets;
20783   unsigned long func_type;
20784   int leaf;
20785   int saved;
20786   int core_saved;
20787   HOST_WIDE_INT frame_size;
20788   int i;
20789
20790   offsets = &cfun->machine->stack_offsets;
20791
20792   /* We need to know if we are a leaf function.  Unfortunately, it
20793      is possible to be called after start_sequence has been called,
20794      which causes get_insns to return the insns for the sequence,
20795      not the function, which will cause leaf_function_p to return
20796      the incorrect result.
20797
20798      to know about leaf functions once reload has completed, and the
20799      frame size cannot be changed after that time, so we can safely
20800      use the cached value.  */
20801
20802   if (reload_completed)
20803     return offsets;
20804
20805   /* Initially this is the size of the local variables.  It will translated
20806      into an offset once we have determined the size of preceding data.  */
20807   frame_size = ROUND_UP_WORD (get_frame_size ());
20808
20809   leaf = leaf_function_p ();
20810
20811   /* Space for variadic functions.  */
20812   offsets->saved_args = crtl->args.pretend_args_size;
20813
20814   /* In Thumb mode this is incorrect, but never used.  */
20815   offsets->frame
20816     = (offsets->saved_args
20817        + arm_compute_static_chain_stack_bytes ()
20818        + (frame_pointer_needed ? 4 : 0));
20819
20820   if (TARGET_32BIT)
20821     {
20822       unsigned int regno;
20823
20824       offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20825       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20826       saved = core_saved;
20827
20828       /* We know that SP will be doubleword aligned on entry, and we must
20829          preserve that condition at any subroutine call.  We also require the
20830          soft frame pointer to be doubleword aligned.  */
20831
20832       if (TARGET_REALLY_IWMMXT)
20833         {
20834           /* Check for the call-saved iWMMXt registers.  */
20835           for (regno = FIRST_IWMMXT_REGNUM;
20836                regno <= LAST_IWMMXT_REGNUM;
20837                regno++)
20838             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20839               saved += 8;
20840         }
20841
20842       func_type = arm_current_func_type ();
20843       /* Space for saved VFP registers.  */
20844       if (! IS_VOLATILE (func_type)
20845           && TARGET_HARD_FLOAT && TARGET_VFP)
20846         saved += arm_get_vfp_saved_size ();
20847     }
20848   else /* TARGET_THUMB1 */
20849     {
20850       offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20851       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20852       saved = core_saved;
20853       if (TARGET_BACKTRACE)
20854         saved += 16;
20855     }
20856
20857   /* Saved registers include the stack frame.  */
20858   offsets->saved_regs
20859     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20860   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20861
20862   /* A leaf function does not need any stack alignment if it has nothing
20863      on the stack.  */
20864   if (leaf && frame_size == 0
20865       /* However if it calls alloca(), we have a dynamically allocated
20866          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20867       && ! cfun->calls_alloca)
20868     {
20869       offsets->outgoing_args = offsets->soft_frame;
20870       offsets->locals_base = offsets->soft_frame;
20871       return offsets;
20872     }
20873
20874   /* Ensure SFP has the correct alignment.  */
20875   if (ARM_DOUBLEWORD_ALIGN
20876       && (offsets->soft_frame & 7))
20877     {
20878       offsets->soft_frame += 4;
20879       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20880          when there is a stack frame as the alignment will be rolled into
20881          the normal stack adjustment.  */
20882       if (frame_size + crtl->outgoing_args_size == 0)
20883         {
20884           int reg = -1;
20885
20886           /* Register r3 is caller-saved.  Normally it does not need to be
20887              saved on entry by the prologue.  However if we choose to save
20888              it for padding then we may confuse the compiler into thinking
20889              a prologue sequence is required when in fact it is not.  This
20890              will occur when shrink-wrapping if r3 is used as a scratch
20891              register and there are no other callee-saved writes.
20892
20893              This situation can be avoided when other callee-saved registers
20894              are available and r3 is not mandatory if we choose a callee-saved
20895              register for padding.  */
20896           bool prefer_callee_reg_p = false;
20897
20898           /* If it is safe to use r3, then do so.  This sometimes
20899              generates better code on Thumb-2 by avoiding the need to
20900              use 32-bit push/pop instructions.  */
20901           if (! any_sibcall_could_use_r3 ()
20902               && arm_size_return_regs () <= 12
20903               && (offsets->saved_regs_mask & (1 << 3)) == 0
20904               && (TARGET_THUMB2
20905                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20906             {
20907               reg = 3;
20908               if (!TARGET_THUMB2)
20909                 prefer_callee_reg_p = true;
20910             }
20911           if (reg == -1
20912               || prefer_callee_reg_p)
20913             {
20914               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20915                 {
20916                   /* Avoid fixed registers; they may be changed at
20917                      arbitrary times so it's unsafe to restore them
20918                      during the epilogue.  */
20919                   if (!fixed_regs[i]
20920                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20921                     {
20922                       reg = i;
20923                       break;
20924                     }
20925                 }
20926             }
20927
20928           if (reg != -1)
20929             {
20930               offsets->saved_regs += 4;
20931               offsets->saved_regs_mask |= (1 << reg);
20932             }
20933         }
20934     }
20935
20936   offsets->locals_base = offsets->soft_frame + frame_size;
20937   offsets->outgoing_args = (offsets->locals_base
20938                             + crtl->outgoing_args_size);
20939
20940   if (ARM_DOUBLEWORD_ALIGN)
20941     {
20942       /* Ensure SP remains doubleword aligned.  */
20943       if (offsets->outgoing_args & 7)
20944         offsets->outgoing_args += 4;
20945       gcc_assert (!(offsets->outgoing_args & 7));
20946     }
20947
20948   return offsets;
20949 }
20950
20951
20952 /* Calculate the relative offsets for the different stack pointers.  Positive
20953    offsets are in the direction of stack growth.  */
20954
20955 HOST_WIDE_INT
20956 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20957 {
20958   arm_stack_offsets *offsets;
20959
20960   offsets = arm_get_frame_offsets ();
20961
20962   /* OK, now we have enough information to compute the distances.
20963      There must be an entry in these switch tables for each pair
20964      of registers in ELIMINABLE_REGS, even if some of the entries
20965      seem to be redundant or useless.  */
20966   switch (from)
20967     {
20968     case ARG_POINTER_REGNUM:
20969       switch (to)
20970         {
20971         case THUMB_HARD_FRAME_POINTER_REGNUM:
20972           return 0;
20973
20974         case FRAME_POINTER_REGNUM:
20975           /* This is the reverse of the soft frame pointer
20976              to hard frame pointer elimination below.  */
20977           return offsets->soft_frame - offsets->saved_args;
20978
20979         case ARM_HARD_FRAME_POINTER_REGNUM:
20980           /* This is only non-zero in the case where the static chain register
20981              is stored above the frame.  */
20982           return offsets->frame - offsets->saved_args - 4;
20983
20984         case STACK_POINTER_REGNUM:
20985           /* If nothing has been pushed on the stack at all
20986              then this will return -4.  This *is* correct!  */
20987           return offsets->outgoing_args - (offsets->saved_args + 4);
20988
20989         default:
20990           gcc_unreachable ();
20991         }
20992       gcc_unreachable ();
20993
20994     case FRAME_POINTER_REGNUM:
20995       switch (to)
20996         {
20997         case THUMB_HARD_FRAME_POINTER_REGNUM:
20998           return 0;
20999
21000         case ARM_HARD_FRAME_POINTER_REGNUM:
21001           /* The hard frame pointer points to the top entry in the
21002              stack frame.  The soft frame pointer to the bottom entry
21003              in the stack frame.  If there is no stack frame at all,
21004              then they are identical.  */
21005
21006           return offsets->frame - offsets->soft_frame;
21007
21008         case STACK_POINTER_REGNUM:
21009           return offsets->outgoing_args - offsets->soft_frame;
21010
21011         default:
21012           gcc_unreachable ();
21013         }
21014       gcc_unreachable ();
21015
21016     default:
21017       /* You cannot eliminate from the stack pointer.
21018          In theory you could eliminate from the hard frame
21019          pointer to the stack pointer, but this will never
21020          happen, since if a stack frame is not needed the
21021          hard frame pointer will never be used.  */
21022       gcc_unreachable ();
21023     }
21024 }
21025
21026 /* Given FROM and TO register numbers, say whether this elimination is
21027    allowed.  Frame pointer elimination is automatically handled.
21028
21029    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21030    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21031    pointer, we must eliminate FRAME_POINTER_REGNUM into
21032    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21033    ARG_POINTER_REGNUM.  */
21034
21035 bool
21036 arm_can_eliminate (const int from, const int to)
21037 {
21038   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21039           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21040           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21041           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21042            true);
21043 }
21044
21045 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21046    number of bytes pushed.  */
21047
21048 static int
21049 arm_save_coproc_regs(void)
21050 {
21051   int saved_size = 0;
21052   unsigned reg;
21053   unsigned start_reg;
21054   rtx insn;
21055
21056   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21057     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21058       {
21059         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21060         insn = gen_rtx_MEM (V2SImode, insn);
21061         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21062         RTX_FRAME_RELATED_P (insn) = 1;
21063         saved_size += 8;
21064       }
21065
21066   if (TARGET_HARD_FLOAT && TARGET_VFP)
21067     {
21068       start_reg = FIRST_VFP_REGNUM;
21069
21070       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21071         {
21072           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21073               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21074             {
21075               if (start_reg != reg)
21076                 saved_size += vfp_emit_fstmd (start_reg,
21077                                               (reg - start_reg) / 2);
21078               start_reg = reg + 2;
21079             }
21080         }
21081       if (start_reg != reg)
21082         saved_size += vfp_emit_fstmd (start_reg,
21083                                       (reg - start_reg) / 2);
21084     }
21085   return saved_size;
21086 }
21087
21088
21089 /* Set the Thumb frame pointer from the stack pointer.  */
21090
21091 static void
21092 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21093 {
21094   HOST_WIDE_INT amount;
21095   rtx insn, dwarf;
21096
21097   amount = offsets->outgoing_args - offsets->locals_base;
21098   if (amount < 1024)
21099     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21100                                   stack_pointer_rtx, GEN_INT (amount)));
21101   else
21102     {
21103       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21104       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21105          expects the first two operands to be the same.  */
21106       if (TARGET_THUMB2)
21107         {
21108           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21109                                         stack_pointer_rtx,
21110                                         hard_frame_pointer_rtx));
21111         }
21112       else
21113         {
21114           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21115                                         hard_frame_pointer_rtx,
21116                                         stack_pointer_rtx));
21117         }
21118       dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21119                            plus_constant (Pmode, stack_pointer_rtx, amount));
21120       RTX_FRAME_RELATED_P (dwarf) = 1;
21121       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21122     }
21123
21124   RTX_FRAME_RELATED_P (insn) = 1;
21125 }
21126
21127 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21128    function.  */
21129 void
21130 arm_expand_prologue (void)
21131 {
21132   rtx amount;
21133   rtx insn;
21134   rtx ip_rtx;
21135   unsigned long live_regs_mask;
21136   unsigned long func_type;
21137   int fp_offset = 0;
21138   int saved_pretend_args = 0;
21139   int saved_regs = 0;
21140   unsigned HOST_WIDE_INT args_to_push;
21141   arm_stack_offsets *offsets;
21142
21143   func_type = arm_current_func_type ();
21144
21145   /* Naked functions don't have prologues.  */
21146   if (IS_NAKED (func_type))
21147     return;
21148
21149   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21150   args_to_push = crtl->args.pretend_args_size;
21151
21152   /* Compute which register we will have to save onto the stack.  */
21153   offsets = arm_get_frame_offsets ();
21154   live_regs_mask = offsets->saved_regs_mask;
21155
21156   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21157
21158   if (IS_STACKALIGN (func_type))
21159     {
21160       rtx r0, r1;
21161
21162       /* Handle a word-aligned stack pointer.  We generate the following:
21163
21164           mov r0, sp
21165           bic r1, r0, #7
21166           mov sp, r1
21167           <save and restore r0 in normal prologue/epilogue>
21168           mov sp, r0
21169           bx lr
21170
21171          The unwinder doesn't need to know about the stack realignment.
21172          Just tell it we saved SP in r0.  */
21173       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21174
21175       r0 = gen_rtx_REG (SImode, 0);
21176       r1 = gen_rtx_REG (SImode, 1);
21177
21178       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21179       RTX_FRAME_RELATED_P (insn) = 1;
21180       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21181
21182       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21183
21184       /* ??? The CFA changes here, which may cause GDB to conclude that it
21185          has entered a different function.  That said, the unwind info is
21186          correct, individually, before and after this instruction because
21187          we've described the save of SP, which will override the default
21188          handling of SP as restoring from the CFA.  */
21189       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21190     }
21191
21192   /* For APCS frames, if IP register is clobbered
21193      when creating frame, save that register in a special
21194      way.  */
21195   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21196     {
21197       if (IS_INTERRUPT (func_type))
21198         {
21199           /* Interrupt functions must not corrupt any registers.
21200              Creating a frame pointer however, corrupts the IP
21201              register, so we must push it first.  */
21202           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21203
21204           /* Do not set RTX_FRAME_RELATED_P on this insn.
21205              The dwarf stack unwinding code only wants to see one
21206              stack decrement per function, and this is not it.  If
21207              this instruction is labeled as being part of the frame
21208              creation sequence then dwarf2out_frame_debug_expr will
21209              die when it encounters the assignment of IP to FP
21210              later on, since the use of SP here establishes SP as
21211              the CFA register and not IP.
21212
21213              Anyway this instruction is not really part of the stack
21214              frame creation although it is part of the prologue.  */
21215         }
21216       else if (IS_NESTED (func_type))
21217         {
21218           /* The static chain register is the same as the IP register
21219              used as a scratch register during stack frame creation.
21220              To get around this need to find somewhere to store IP
21221              whilst the frame is being created.  We try the following
21222              places in order:
21223
21224                1. The last argument register r3 if it is available.
21225                2. A slot on the stack above the frame if there are no
21226                   arguments to push onto the stack.
21227                3. Register r3 again, after pushing the argument registers
21228                   onto the stack, if this is a varargs function.
21229                4. The last slot on the stack created for the arguments to
21230                   push, if this isn't a varargs function.
21231
21232              Note - we only need to tell the dwarf2 backend about the SP
21233              adjustment in the second variant; the static chain register
21234              doesn't need to be unwound, as it doesn't contain a value
21235              inherited from the caller.  */
21236
21237           if (!arm_r3_live_at_start_p ())
21238             insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21239           else if (args_to_push == 0)
21240             {
21241               rtx addr, dwarf;
21242
21243               gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21244               saved_regs += 4;
21245
21246               addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21247               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21248               fp_offset = 4;
21249
21250               /* Just tell the dwarf backend that we adjusted SP.  */
21251               dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21252                                    plus_constant (Pmode, stack_pointer_rtx,
21253                                                   -fp_offset));
21254               RTX_FRAME_RELATED_P (insn) = 1;
21255               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21256             }
21257           else
21258             {
21259               /* Store the args on the stack.  */
21260               if (cfun->machine->uses_anonymous_args)
21261                 {
21262                   insn
21263                     = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21264                                            (0xf0 >> (args_to_push / 4)) & 0xf);
21265                   emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21266                   saved_pretend_args = 1;
21267                 }
21268               else
21269                 {
21270                   rtx addr, dwarf;
21271
21272                   if (args_to_push == 4)
21273                     addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21274                   else
21275                     addr
21276                       = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21277                                             plus_constant (Pmode,
21278                                                            stack_pointer_rtx,
21279                                                            -args_to_push));
21280
21281                   insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21282
21283                   /* Just tell the dwarf backend that we adjusted SP.  */
21284                   dwarf
21285                     = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21286                                    plus_constant (Pmode, stack_pointer_rtx,
21287                                                   -args_to_push));
21288                   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21289                 }
21290
21291               RTX_FRAME_RELATED_P (insn) = 1;
21292               fp_offset = args_to_push;
21293               args_to_push = 0;
21294             }
21295         }
21296
21297       insn = emit_set_insn (ip_rtx,
21298                             plus_constant (Pmode, stack_pointer_rtx,
21299                                            fp_offset));
21300       RTX_FRAME_RELATED_P (insn) = 1;
21301     }
21302
21303   if (args_to_push)
21304     {
21305       /* Push the argument registers, or reserve space for them.  */
21306       if (cfun->machine->uses_anonymous_args)
21307         insn = emit_multi_reg_push
21308           ((0xf0 >> (args_to_push / 4)) & 0xf,
21309            (0xf0 >> (args_to_push / 4)) & 0xf);
21310       else
21311         insn = emit_insn
21312           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21313                        GEN_INT (- args_to_push)));
21314       RTX_FRAME_RELATED_P (insn) = 1;
21315     }
21316
21317   /* If this is an interrupt service routine, and the link register
21318      is going to be pushed, and we're not generating extra
21319      push of IP (needed when frame is needed and frame layout if apcs),
21320      subtracting four from LR now will mean that the function return
21321      can be done with a single instruction.  */
21322   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21323       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21324       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21325       && TARGET_ARM)
21326     {
21327       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21328
21329       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21330     }
21331
21332   if (live_regs_mask)
21333     {
21334       unsigned long dwarf_regs_mask = live_regs_mask;
21335
21336       saved_regs += bit_count (live_regs_mask) * 4;
21337       if (optimize_size && !frame_pointer_needed
21338           && saved_regs == offsets->saved_regs - offsets->saved_args)
21339         {
21340           /* If no coprocessor registers are being pushed and we don't have
21341              to worry about a frame pointer then push extra registers to
21342              create the stack frame.  This is done is a way that does not
21343              alter the frame layout, so is independent of the epilogue.  */
21344           int n;
21345           int frame;
21346           n = 0;
21347           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21348             n++;
21349           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21350           if (frame && n * 4 >= frame)
21351             {
21352               n = frame / 4;
21353               live_regs_mask |= (1 << n) - 1;
21354               saved_regs += frame;
21355             }
21356         }
21357
21358       if (TARGET_LDRD
21359           && current_tune->prefer_ldrd_strd
21360           && !optimize_function_for_size_p (cfun))
21361         {
21362           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21363           if (TARGET_THUMB2)
21364             thumb2_emit_strd_push (live_regs_mask);
21365           else if (TARGET_ARM
21366                    && !TARGET_APCS_FRAME
21367                    && !IS_INTERRUPT (func_type))
21368             arm_emit_strd_push (live_regs_mask);
21369           else
21370             {
21371               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21372               RTX_FRAME_RELATED_P (insn) = 1;
21373             }
21374         }
21375       else
21376         {
21377           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21378           RTX_FRAME_RELATED_P (insn) = 1;
21379         }
21380     }
21381
21382   if (! IS_VOLATILE (func_type))
21383     saved_regs += arm_save_coproc_regs ();
21384
21385   if (frame_pointer_needed && TARGET_ARM)
21386     {
21387       /* Create the new frame pointer.  */
21388       if (TARGET_APCS_FRAME)
21389         {
21390           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21391           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21392           RTX_FRAME_RELATED_P (insn) = 1;
21393
21394           if (IS_NESTED (func_type))
21395             {
21396               /* Recover the static chain register.  */
21397               if (!arm_r3_live_at_start_p () || saved_pretend_args)
21398                 insn = gen_rtx_REG (SImode, 3);
21399               else
21400                 {
21401                   insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21402                   insn = gen_frame_mem (SImode, insn);
21403                 }
21404               emit_set_insn (ip_rtx, insn);
21405               /* Add a USE to stop propagate_one_insn() from barfing.  */
21406               emit_insn (gen_force_register_use (ip_rtx));
21407             }
21408         }
21409       else
21410         {
21411           insn = GEN_INT (saved_regs - 4);
21412           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21413                                         stack_pointer_rtx, insn));
21414           RTX_FRAME_RELATED_P (insn) = 1;
21415         }
21416     }
21417
21418   if (flag_stack_usage_info)
21419     current_function_static_stack_size
21420       = offsets->outgoing_args - offsets->saved_args;
21421
21422   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21423     {
21424       /* This add can produce multiple insns for a large constant, so we
21425          need to get tricky.  */
21426       rtx_insn *last = get_last_insn ();
21427
21428       amount = GEN_INT (offsets->saved_args + saved_regs
21429                         - offsets->outgoing_args);
21430
21431       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21432                                     amount));
21433       do
21434         {
21435           last = last ? NEXT_INSN (last) : get_insns ();
21436           RTX_FRAME_RELATED_P (last) = 1;
21437         }
21438       while (last != insn);
21439
21440       /* If the frame pointer is needed, emit a special barrier that
21441          will prevent the scheduler from moving stores to the frame
21442          before the stack adjustment.  */
21443       if (frame_pointer_needed)
21444         insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21445                                          hard_frame_pointer_rtx));
21446     }
21447
21448
21449   if (frame_pointer_needed && TARGET_THUMB2)
21450     thumb_set_frame_pointer (offsets);
21451
21452   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21453     {
21454       unsigned long mask;
21455
21456       mask = live_regs_mask;
21457       mask &= THUMB2_WORK_REGS;
21458       if (!IS_NESTED (func_type))
21459         mask |= (1 << IP_REGNUM);
21460       arm_load_pic_register (mask);
21461     }
21462
21463   /* If we are profiling, make sure no instructions are scheduled before
21464      the call to mcount.  Similarly if the user has requested no
21465      scheduling in the prolog.  Similarly if we want non-call exceptions
21466      using the EABI unwinder, to prevent faulting instructions from being
21467      swapped with a stack adjustment.  */
21468   if (crtl->profile || !TARGET_SCHED_PROLOG
21469       || (arm_except_unwind_info (&global_options) == UI_TARGET
21470           && cfun->can_throw_non_call_exceptions))
21471     emit_insn (gen_blockage ());
21472
21473   /* If the link register is being kept alive, with the return address in it,
21474      then make sure that it does not get reused by the ce2 pass.  */
21475   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21476     cfun->machine->lr_save_eliminated = 1;
21477 }
21478 \f
21479 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21480 static void
21481 arm_print_condition (FILE *stream)
21482 {
21483   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21484     {
21485       /* Branch conversion is not implemented for Thumb-2.  */
21486       if (TARGET_THUMB)
21487         {
21488           output_operand_lossage ("predicated Thumb instruction");
21489           return;
21490         }
21491       if (current_insn_predicate != NULL)
21492         {
21493           output_operand_lossage
21494             ("predicated instruction in conditional sequence");
21495           return;
21496         }
21497
21498       fputs (arm_condition_codes[arm_current_cc], stream);
21499     }
21500   else if (current_insn_predicate)
21501     {
21502       enum arm_cond_code code;
21503
21504       if (TARGET_THUMB1)
21505         {
21506           output_operand_lossage ("predicated Thumb instruction");
21507           return;
21508         }
21509
21510       code = get_arm_condition_code (current_insn_predicate);
21511       fputs (arm_condition_codes[code], stream);
21512     }
21513 }
21514
21515
21516 /* Globally reserved letters: acln
21517    Puncutation letters currently used: @_|?().!#
21518    Lower case letters currently used: bcdefhimpqtvwxyz
21519    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21520    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21521
21522    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21523
21524    If CODE is 'd', then the X is a condition operand and the instruction
21525    should only be executed if the condition is true.
21526    if CODE is 'D', then the X is a condition operand and the instruction
21527    should only be executed if the condition is false: however, if the mode
21528    of the comparison is CCFPEmode, then always execute the instruction -- we
21529    do this because in these circumstances !GE does not necessarily imply LT;
21530    in these cases the instruction pattern will take care to make sure that
21531    an instruction containing %d will follow, thereby undoing the effects of
21532    doing this instruction unconditionally.
21533    If CODE is 'N' then X is a floating point operand that must be negated
21534    before output.
21535    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21536    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21537 static void
21538 arm_print_operand (FILE *stream, rtx x, int code)
21539 {
21540   switch (code)
21541     {
21542     case '@':
21543       fputs (ASM_COMMENT_START, stream);
21544       return;
21545
21546     case '_':
21547       fputs (user_label_prefix, stream);
21548       return;
21549
21550     case '|':
21551       fputs (REGISTER_PREFIX, stream);
21552       return;
21553
21554     case '?':
21555       arm_print_condition (stream);
21556       return;
21557
21558     case '(':
21559       /* Nothing in unified syntax, otherwise the current condition code.  */
21560       if (!TARGET_UNIFIED_ASM)
21561         arm_print_condition (stream);
21562       break;
21563
21564     case ')':
21565       /* The current condition code in unified syntax, otherwise nothing.  */
21566       if (TARGET_UNIFIED_ASM)
21567         arm_print_condition (stream);
21568       break;
21569
21570     case '.':
21571       /* The current condition code for a condition code setting instruction.
21572          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21573       if (TARGET_UNIFIED_ASM)
21574         {
21575           fputc('s', stream);
21576           arm_print_condition (stream);
21577         }
21578       else
21579         {
21580           arm_print_condition (stream);
21581           fputc('s', stream);
21582         }
21583       return;
21584
21585     case '!':
21586       /* If the instruction is conditionally executed then print
21587          the current condition code, otherwise print 's'.  */
21588       gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21589       if (current_insn_predicate)
21590         arm_print_condition (stream);
21591       else
21592         fputc('s', stream);
21593       break;
21594
21595     /* %# is a "break" sequence. It doesn't output anything, but is used to
21596        separate e.g. operand numbers from following text, if that text consists
21597        of further digits which we don't want to be part of the operand
21598        number.  */
21599     case '#':
21600       return;
21601
21602     case 'N':
21603       {
21604         REAL_VALUE_TYPE r;
21605         REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21606         r = real_value_negate (&r);
21607         fprintf (stream, "%s", fp_const_from_val (&r));
21608       }
21609       return;
21610
21611     /* An integer or symbol address without a preceding # sign.  */
21612     case 'c':
21613       switch (GET_CODE (x))
21614         {
21615         case CONST_INT:
21616           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21617           break;
21618
21619         case SYMBOL_REF:
21620           output_addr_const (stream, x);
21621           break;
21622
21623         case CONST:
21624           if (GET_CODE (XEXP (x, 0)) == PLUS
21625               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21626             {
21627               output_addr_const (stream, x);
21628               break;
21629             }
21630           /* Fall through.  */
21631
21632         default:
21633           output_operand_lossage ("Unsupported operand for code '%c'", code);
21634         }
21635       return;
21636
21637     /* An integer that we want to print in HEX.  */
21638     case 'x':
21639       switch (GET_CODE (x))
21640         {
21641         case CONST_INT:
21642           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21643           break;
21644
21645         default:
21646           output_operand_lossage ("Unsupported operand for code '%c'", code);
21647         }
21648       return;
21649
21650     case 'B':
21651       if (CONST_INT_P (x))
21652         {
21653           HOST_WIDE_INT val;
21654           val = ARM_SIGN_EXTEND (~INTVAL (x));
21655           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21656         }
21657       else
21658         {
21659           putc ('~', stream);
21660           output_addr_const (stream, x);
21661         }
21662       return;
21663
21664     case 'b':
21665       /* Print the log2 of a CONST_INT.  */
21666       {
21667         HOST_WIDE_INT val;
21668
21669         if (!CONST_INT_P (x)
21670             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21671           output_operand_lossage ("Unsupported operand for code '%c'", code);
21672         else
21673           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21674       }
21675       return;
21676
21677     case 'L':
21678       /* The low 16 bits of an immediate constant.  */
21679       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21680       return;
21681
21682     case 'i':
21683       fprintf (stream, "%s", arithmetic_instr (x, 1));
21684       return;
21685
21686     case 'I':
21687       fprintf (stream, "%s", arithmetic_instr (x, 0));
21688       return;
21689
21690     case 'S':
21691       {
21692         HOST_WIDE_INT val;
21693         const char *shift;
21694
21695         shift = shift_op (x, &val);
21696
21697         if (shift)
21698           {
21699             fprintf (stream, ", %s ", shift);
21700             if (val == -1)
21701               arm_print_operand (stream, XEXP (x, 1), 0);
21702             else
21703               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21704           }
21705       }
21706       return;
21707
21708       /* An explanation of the 'Q', 'R' and 'H' register operands:
21709
21710          In a pair of registers containing a DI or DF value the 'Q'
21711          operand returns the register number of the register containing
21712          the least significant part of the value.  The 'R' operand returns
21713          the register number of the register containing the most
21714          significant part of the value.
21715
21716          The 'H' operand returns the higher of the two register numbers.
21717          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21718          same as the 'Q' operand, since the most significant part of the
21719          value is held in the lower number register.  The reverse is true
21720          on systems where WORDS_BIG_ENDIAN is false.
21721
21722          The purpose of these operands is to distinguish between cases
21723          where the endian-ness of the values is important (for example
21724          when they are added together), and cases where the endian-ness
21725          is irrelevant, but the order of register operations is important.
21726          For example when loading a value from memory into a register
21727          pair, the endian-ness does not matter.  Provided that the value
21728          from the lower memory address is put into the lower numbered
21729          register, and the value from the higher address is put into the
21730          higher numbered register, the load will work regardless of whether
21731          the value being loaded is big-wordian or little-wordian.  The
21732          order of the two register loads can matter however, if the address
21733          of the memory location is actually held in one of the registers
21734          being overwritten by the load.
21735
21736          The 'Q' and 'R' constraints are also available for 64-bit
21737          constants.  */
21738     case 'Q':
21739       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21740         {
21741           rtx part = gen_lowpart (SImode, x);
21742           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21743           return;
21744         }
21745
21746       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21747         {
21748           output_operand_lossage ("invalid operand for code '%c'", code);
21749           return;
21750         }
21751
21752       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21753       return;
21754
21755     case 'R':
21756       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21757         {
21758           machine_mode mode = GET_MODE (x);
21759           rtx part;
21760
21761           if (mode == VOIDmode)
21762             mode = DImode;
21763           part = gen_highpart_mode (SImode, mode, x);
21764           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21765           return;
21766         }
21767
21768       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21769         {
21770           output_operand_lossage ("invalid operand for code '%c'", code);
21771           return;
21772         }
21773
21774       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21775       return;
21776
21777     case 'H':
21778       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21779         {
21780           output_operand_lossage ("invalid operand for code '%c'", code);
21781           return;
21782         }
21783
21784       asm_fprintf (stream, "%r", REGNO (x) + 1);
21785       return;
21786
21787     case 'J':
21788       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21789         {
21790           output_operand_lossage ("invalid operand for code '%c'", code);
21791           return;
21792         }
21793
21794       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21795       return;
21796
21797     case 'K':
21798       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21799         {
21800           output_operand_lossage ("invalid operand for code '%c'", code);
21801           return;
21802         }
21803
21804       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21805       return;
21806
21807     case 'm':
21808       asm_fprintf (stream, "%r",
21809                    REG_P (XEXP (x, 0))
21810                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21811       return;
21812
21813     case 'M':
21814       asm_fprintf (stream, "{%r-%r}",
21815                    REGNO (x),
21816                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21817       return;
21818
21819     /* Like 'M', but writing doubleword vector registers, for use by Neon
21820        insns.  */
21821     case 'h':
21822       {
21823         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21824         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21825         if (numregs == 1)
21826           asm_fprintf (stream, "{d%d}", regno);
21827         else
21828           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21829       }
21830       return;
21831
21832     case 'd':
21833       /* CONST_TRUE_RTX means always -- that's the default.  */
21834       if (x == const_true_rtx)
21835         return;
21836
21837       if (!COMPARISON_P (x))
21838         {
21839           output_operand_lossage ("invalid operand for code '%c'", code);
21840           return;
21841         }
21842
21843       fputs (arm_condition_codes[get_arm_condition_code (x)],
21844              stream);
21845       return;
21846
21847     case 'D':
21848       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
21849          want to do that.  */
21850       if (x == const_true_rtx)
21851         {
21852           output_operand_lossage ("instruction never executed");
21853           return;
21854         }
21855       if (!COMPARISON_P (x))
21856         {
21857           output_operand_lossage ("invalid operand for code '%c'", code);
21858           return;
21859         }
21860
21861       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21862                                  (get_arm_condition_code (x))],
21863              stream);
21864       return;
21865
21866     case 's':
21867     case 'V':
21868     case 'W':
21869     case 'X':
21870     case 'Y':
21871     case 'Z':
21872       /* Former Maverick support, removed after GCC-4.7.  */
21873       output_operand_lossage ("obsolete Maverick format code '%c'", code);
21874       return;
21875
21876     case 'U':
21877       if (!REG_P (x)
21878           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21879           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21880         /* Bad value for wCG register number.  */
21881         {
21882           output_operand_lossage ("invalid operand for code '%c'", code);
21883           return;
21884         }
21885
21886       else
21887         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21888       return;
21889
21890       /* Print an iWMMXt control register name.  */
21891     case 'w':
21892       if (!CONST_INT_P (x)
21893           || INTVAL (x) < 0
21894           || INTVAL (x) >= 16)
21895         /* Bad value for wC register number.  */
21896         {
21897           output_operand_lossage ("invalid operand for code '%c'", code);
21898           return;
21899         }
21900
21901       else
21902         {
21903           static const char * wc_reg_names [16] =
21904             {
21905               "wCID",  "wCon",  "wCSSF", "wCASF",
21906               "wC4",   "wC5",   "wC6",   "wC7",
21907               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21908               "wC12",  "wC13",  "wC14",  "wC15"
21909             };
21910
21911           fputs (wc_reg_names [INTVAL (x)], stream);
21912         }
21913       return;
21914
21915     /* Print the high single-precision register of a VFP double-precision
21916        register.  */
21917     case 'p':
21918       {
21919         machine_mode mode = GET_MODE (x);
21920         int regno;
21921
21922         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21923           {
21924             output_operand_lossage ("invalid operand for code '%c'", code);
21925             return;
21926           }
21927
21928         regno = REGNO (x);
21929         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21930           {
21931             output_operand_lossage ("invalid operand for code '%c'", code);
21932             return;
21933           }
21934
21935         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21936       }
21937       return;
21938
21939     /* Print a VFP/Neon double precision or quad precision register name.  */
21940     case 'P':
21941     case 'q':
21942       {
21943         machine_mode mode = GET_MODE (x);
21944         int is_quad = (code == 'q');
21945         int regno;
21946
21947         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21948           {
21949             output_operand_lossage ("invalid operand for code '%c'", code);
21950             return;
21951           }
21952
21953         if (!REG_P (x)
21954             || !IS_VFP_REGNUM (REGNO (x)))
21955           {
21956             output_operand_lossage ("invalid operand for code '%c'", code);
21957             return;
21958           }
21959
21960         regno = REGNO (x);
21961         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21962             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21963           {
21964             output_operand_lossage ("invalid operand for code '%c'", code);
21965             return;
21966           }
21967
21968         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21969           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21970       }
21971       return;
21972
21973     /* These two codes print the low/high doubleword register of a Neon quad
21974        register, respectively.  For pair-structure types, can also print
21975        low/high quadword registers.  */
21976     case 'e':
21977     case 'f':
21978       {
21979         machine_mode mode = GET_MODE (x);
21980         int regno;
21981
21982         if ((GET_MODE_SIZE (mode) != 16
21983              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21984           {
21985             output_operand_lossage ("invalid operand for code '%c'", code);
21986             return;
21987           }
21988
21989         regno = REGNO (x);
21990         if (!NEON_REGNO_OK_FOR_QUAD (regno))
21991           {
21992             output_operand_lossage ("invalid operand for code '%c'", code);
21993             return;
21994           }
21995
21996         if (GET_MODE_SIZE (mode) == 16)
21997           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21998                                   + (code == 'f' ? 1 : 0));
21999         else
22000           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22001                                   + (code == 'f' ? 1 : 0));
22002       }
22003       return;
22004
22005     /* Print a VFPv3 floating-point constant, represented as an integer
22006        index.  */
22007     case 'G':
22008       {
22009         int index = vfp3_const_double_index (x);
22010         gcc_assert (index != -1);
22011         fprintf (stream, "%d", index);
22012       }
22013       return;
22014
22015     /* Print bits representing opcode features for Neon.
22016
22017        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22018        and polynomials as unsigned.
22019
22020        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22021
22022        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22023
22024     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22025     case 'T':
22026       {
22027         HOST_WIDE_INT bits = INTVAL (x);
22028         fputc ("uspf"[bits & 3], stream);
22029       }
22030       return;
22031
22032     /* Likewise, but signed and unsigned integers are both 'i'.  */
22033     case 'F':
22034       {
22035         HOST_WIDE_INT bits = INTVAL (x);
22036         fputc ("iipf"[bits & 3], stream);
22037       }
22038       return;
22039
22040     /* As for 'T', but emit 'u' instead of 'p'.  */
22041     case 't':
22042       {
22043         HOST_WIDE_INT bits = INTVAL (x);
22044         fputc ("usuf"[bits & 3], stream);
22045       }
22046       return;
22047
22048     /* Bit 2: rounding (vs none).  */
22049     case 'O':
22050       {
22051         HOST_WIDE_INT bits = INTVAL (x);
22052         fputs ((bits & 4) != 0 ? "r" : "", stream);
22053       }
22054       return;
22055
22056     /* Memory operand for vld1/vst1 instruction.  */
22057     case 'A':
22058       {
22059         rtx addr;
22060         bool postinc = FALSE;
22061         rtx postinc_reg = NULL;
22062         unsigned align, memsize, align_bits;
22063
22064         gcc_assert (MEM_P (x));
22065         addr = XEXP (x, 0);
22066         if (GET_CODE (addr) == POST_INC)
22067           {
22068             postinc = 1;
22069             addr = XEXP (addr, 0);
22070           }
22071         if (GET_CODE (addr) == POST_MODIFY)
22072           {
22073             postinc_reg = XEXP( XEXP (addr, 1), 1);
22074             addr = XEXP (addr, 0);
22075           }
22076         asm_fprintf (stream, "[%r", REGNO (addr));
22077
22078         /* We know the alignment of this access, so we can emit a hint in the
22079            instruction (for some alignments) as an aid to the memory subsystem
22080            of the target.  */
22081         align = MEM_ALIGN (x) >> 3;
22082         memsize = MEM_SIZE (x);
22083
22084         /* Only certain alignment specifiers are supported by the hardware.  */
22085         if (memsize == 32 && (align % 32) == 0)
22086           align_bits = 256;
22087         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22088           align_bits = 128;
22089         else if (memsize >= 8 && (align % 8) == 0)
22090           align_bits = 64;
22091         else
22092           align_bits = 0;
22093
22094         if (align_bits != 0)
22095           asm_fprintf (stream, ":%d", align_bits);
22096
22097         asm_fprintf (stream, "]");
22098
22099         if (postinc)
22100           fputs("!", stream);
22101         if (postinc_reg)
22102           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22103       }
22104       return;
22105
22106     case 'C':
22107       {
22108         rtx addr;
22109
22110         gcc_assert (MEM_P (x));
22111         addr = XEXP (x, 0);
22112         gcc_assert (REG_P (addr));
22113         asm_fprintf (stream, "[%r]", REGNO (addr));
22114       }
22115       return;
22116
22117     /* Translate an S register number into a D register number and element index.  */
22118     case 'y':
22119       {
22120         machine_mode mode = GET_MODE (x);
22121         int regno;
22122
22123         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22124           {
22125             output_operand_lossage ("invalid operand for code '%c'", code);
22126             return;
22127           }
22128
22129         regno = REGNO (x);
22130         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22131           {
22132             output_operand_lossage ("invalid operand for code '%c'", code);
22133             return;
22134           }
22135
22136         regno = regno - FIRST_VFP_REGNUM;
22137         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22138       }
22139       return;
22140
22141     case 'v':
22142         gcc_assert (CONST_DOUBLE_P (x));
22143         int result;
22144         result = vfp3_const_double_for_fract_bits (x);
22145         if (result == 0)
22146           result = vfp3_const_double_for_bits (x);
22147         fprintf (stream, "#%d", result);
22148         return;
22149
22150     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22151        number into a D register number and element index.  */
22152     case 'z':
22153       {
22154         machine_mode mode = GET_MODE (x);
22155         int regno;
22156
22157         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22158           {
22159             output_operand_lossage ("invalid operand for code '%c'", code);
22160             return;
22161           }
22162
22163         regno = REGNO (x);
22164         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22165           {
22166             output_operand_lossage ("invalid operand for code '%c'", code);
22167             return;
22168           }
22169
22170         regno = regno - FIRST_VFP_REGNUM;
22171         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22172       }
22173       return;
22174
22175     default:
22176       if (x == 0)
22177         {
22178           output_operand_lossage ("missing operand");
22179           return;
22180         }
22181
22182       switch (GET_CODE (x))
22183         {
22184         case REG:
22185           asm_fprintf (stream, "%r", REGNO (x));
22186           break;
22187
22188         case MEM:
22189           output_memory_reference_mode = GET_MODE (x);
22190           output_address (XEXP (x, 0));
22191           break;
22192
22193         case CONST_DOUBLE:
22194           {
22195             char fpstr[20];
22196             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22197                               sizeof (fpstr), 0, 1);
22198             fprintf (stream, "#%s", fpstr);
22199           }
22200           break;
22201
22202         default:
22203           gcc_assert (GET_CODE (x) != NEG);
22204           fputc ('#', stream);
22205           if (GET_CODE (x) == HIGH)
22206             {
22207               fputs (":lower16:", stream);
22208               x = XEXP (x, 0);
22209             }
22210
22211           output_addr_const (stream, x);
22212           break;
22213         }
22214     }
22215 }
22216 \f
22217 /* Target hook for printing a memory address.  */
22218 static void
22219 arm_print_operand_address (FILE *stream, rtx x)
22220 {
22221   if (TARGET_32BIT)
22222     {
22223       int is_minus = GET_CODE (x) == MINUS;
22224
22225       if (REG_P (x))
22226         asm_fprintf (stream, "[%r]", REGNO (x));
22227       else if (GET_CODE (x) == PLUS || is_minus)
22228         {
22229           rtx base = XEXP (x, 0);
22230           rtx index = XEXP (x, 1);
22231           HOST_WIDE_INT offset = 0;
22232           if (!REG_P (base)
22233               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22234             {
22235               /* Ensure that BASE is a register.  */
22236               /* (one of them must be).  */
22237               /* Also ensure the SP is not used as in index register.  */
22238               rtx temp = base;
22239               base = index;
22240               index = temp;
22241             }
22242           switch (GET_CODE (index))
22243             {
22244             case CONST_INT:
22245               offset = INTVAL (index);
22246               if (is_minus)
22247                 offset = -offset;
22248               asm_fprintf (stream, "[%r, #%wd]",
22249                            REGNO (base), offset);
22250               break;
22251
22252             case REG:
22253               asm_fprintf (stream, "[%r, %s%r]",
22254                            REGNO (base), is_minus ? "-" : "",
22255                            REGNO (index));
22256               break;
22257
22258             case MULT:
22259             case ASHIFTRT:
22260             case LSHIFTRT:
22261             case ASHIFT:
22262             case ROTATERT:
22263               {
22264                 asm_fprintf (stream, "[%r, %s%r",
22265                              REGNO (base), is_minus ? "-" : "",
22266                              REGNO (XEXP (index, 0)));
22267                 arm_print_operand (stream, index, 'S');
22268                 fputs ("]", stream);
22269                 break;
22270               }
22271
22272             default:
22273               gcc_unreachable ();
22274             }
22275         }
22276       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22277                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22278         {
22279           extern machine_mode output_memory_reference_mode;
22280
22281           gcc_assert (REG_P (XEXP (x, 0)));
22282
22283           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22284             asm_fprintf (stream, "[%r, #%s%d]!",
22285                          REGNO (XEXP (x, 0)),
22286                          GET_CODE (x) == PRE_DEC ? "-" : "",
22287                          GET_MODE_SIZE (output_memory_reference_mode));
22288           else
22289             asm_fprintf (stream, "[%r], #%s%d",
22290                          REGNO (XEXP (x, 0)),
22291                          GET_CODE (x) == POST_DEC ? "-" : "",
22292                          GET_MODE_SIZE (output_memory_reference_mode));
22293         }
22294       else if (GET_CODE (x) == PRE_MODIFY)
22295         {
22296           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22297           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22298             asm_fprintf (stream, "#%wd]!",
22299                          INTVAL (XEXP (XEXP (x, 1), 1)));
22300           else
22301             asm_fprintf (stream, "%r]!",
22302                          REGNO (XEXP (XEXP (x, 1), 1)));
22303         }
22304       else if (GET_CODE (x) == POST_MODIFY)
22305         {
22306           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22307           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22308             asm_fprintf (stream, "#%wd",
22309                          INTVAL (XEXP (XEXP (x, 1), 1)));
22310           else
22311             asm_fprintf (stream, "%r",
22312                          REGNO (XEXP (XEXP (x, 1), 1)));
22313         }
22314       else output_addr_const (stream, x);
22315     }
22316   else
22317     {
22318       if (REG_P (x))
22319         asm_fprintf (stream, "[%r]", REGNO (x));
22320       else if (GET_CODE (x) == POST_INC)
22321         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22322       else if (GET_CODE (x) == PLUS)
22323         {
22324           gcc_assert (REG_P (XEXP (x, 0)));
22325           if (CONST_INT_P (XEXP (x, 1)))
22326             asm_fprintf (stream, "[%r, #%wd]",
22327                          REGNO (XEXP (x, 0)),
22328                          INTVAL (XEXP (x, 1)));
22329           else
22330             asm_fprintf (stream, "[%r, %r]",
22331                          REGNO (XEXP (x, 0)),
22332                          REGNO (XEXP (x, 1)));
22333         }
22334       else
22335         output_addr_const (stream, x);
22336     }
22337 }
22338 \f
22339 /* Target hook for indicating whether a punctuation character for
22340    TARGET_PRINT_OPERAND is valid.  */
22341 static bool
22342 arm_print_operand_punct_valid_p (unsigned char code)
22343 {
22344   return (code == '@' || code == '|' || code == '.'
22345           || code == '(' || code == ')' || code == '#'
22346           || (TARGET_32BIT && (code == '?'))
22347           || (TARGET_THUMB2 && (code == '!'))
22348           || (TARGET_THUMB && (code == '_')));
22349 }
22350 \f
22351 /* Target hook for assembling integer objects.  The ARM version needs to
22352    handle word-sized values specially.  */
22353 static bool
22354 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22355 {
22356   machine_mode mode;
22357
22358   if (size == UNITS_PER_WORD && aligned_p)
22359     {
22360       fputs ("\t.word\t", asm_out_file);
22361       output_addr_const (asm_out_file, x);
22362
22363       /* Mark symbols as position independent.  We only do this in the
22364          .text segment, not in the .data segment.  */
22365       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22366           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22367         {
22368           /* See legitimize_pic_address for an explanation of the
22369              TARGET_VXWORKS_RTP check.  */
22370           if (!arm_pic_data_is_text_relative
22371               || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22372             fputs ("(GOT)", asm_out_file);
22373           else
22374             fputs ("(GOTOFF)", asm_out_file);
22375         }
22376       fputc ('\n', asm_out_file);
22377       return true;
22378     }
22379
22380   mode = GET_MODE (x);
22381
22382   if (arm_vector_mode_supported_p (mode))
22383     {
22384       int i, units;
22385
22386       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22387
22388       units = CONST_VECTOR_NUNITS (x);
22389       size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22390
22391       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22392         for (i = 0; i < units; i++)
22393           {
22394             rtx elt = CONST_VECTOR_ELT (x, i);
22395             assemble_integer
22396               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22397           }
22398       else
22399         for (i = 0; i < units; i++)
22400           {
22401             rtx elt = CONST_VECTOR_ELT (x, i);
22402             REAL_VALUE_TYPE rval;
22403
22404             REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22405
22406             assemble_real
22407               (rval, GET_MODE_INNER (mode),
22408               i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22409           }
22410
22411       return true;
22412     }
22413
22414   return default_assemble_integer (x, size, aligned_p);
22415 }
22416
22417 static void
22418 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22419 {
22420   section *s;
22421
22422   if (!TARGET_AAPCS_BASED)
22423     {
22424       (is_ctor ?
22425        default_named_section_asm_out_constructor
22426        : default_named_section_asm_out_destructor) (symbol, priority);
22427       return;
22428     }
22429
22430   /* Put these in the .init_array section, using a special relocation.  */
22431   if (priority != DEFAULT_INIT_PRIORITY)
22432     {
22433       char buf[18];
22434       sprintf (buf, "%s.%.5u",
22435                is_ctor ? ".init_array" : ".fini_array",
22436                priority);
22437       s = get_section (buf, SECTION_WRITE, NULL_TREE);
22438     }
22439   else if (is_ctor)
22440     s = ctors_section;
22441   else
22442     s = dtors_section;
22443
22444   switch_to_section (s);
22445   assemble_align (POINTER_SIZE);
22446   fputs ("\t.word\t", asm_out_file);
22447   output_addr_const (asm_out_file, symbol);
22448   fputs ("(target1)\n", asm_out_file);
22449 }
22450
22451 /* Add a function to the list of static constructors.  */
22452
22453 static void
22454 arm_elf_asm_constructor (rtx symbol, int priority)
22455 {
22456   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22457 }
22458
22459 /* Add a function to the list of static destructors.  */
22460
22461 static void
22462 arm_elf_asm_destructor (rtx symbol, int priority)
22463 {
22464   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22465 }
22466 \f
22467 /* A finite state machine takes care of noticing whether or not instructions
22468    can be conditionally executed, and thus decrease execution time and code
22469    size by deleting branch instructions.  The fsm is controlled by
22470    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22471
22472 /* The state of the fsm controlling condition codes are:
22473    0: normal, do nothing special
22474    1: make ASM_OUTPUT_OPCODE not output this instruction
22475    2: make ASM_OUTPUT_OPCODE not output this instruction
22476    3: make instructions conditional
22477    4: make instructions conditional
22478
22479    State transitions (state->state by whom under condition):
22480    0 -> 1 final_prescan_insn if the `target' is a label
22481    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22482    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22483    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22484    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22485           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22486    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22487           (the target insn is arm_target_insn).
22488
22489    If the jump clobbers the conditions then we use states 2 and 4.
22490
22491    A similar thing can be done with conditional return insns.
22492
22493    XXX In case the `target' is an unconditional branch, this conditionalising
22494    of the instructions always reduces code size, but not always execution
22495    time.  But then, I want to reduce the code size to somewhere near what
22496    /bin/cc produces.  */
22497
22498 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22499    instructions.  When a COND_EXEC instruction is seen the subsequent
22500    instructions are scanned so that multiple conditional instructions can be
22501    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22502    specify the length and true/false mask for the IT block.  These will be
22503    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22504
22505 /* Returns the index of the ARM condition code string in
22506    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22507    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22508
22509 enum arm_cond_code
22510 maybe_get_arm_condition_code (rtx comparison)
22511 {
22512   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22513   enum arm_cond_code code;
22514   enum rtx_code comp_code = GET_CODE (comparison);
22515
22516   if (GET_MODE_CLASS (mode) != MODE_CC)
22517     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22518                            XEXP (comparison, 1));
22519
22520   switch (mode)
22521     {
22522     case CC_DNEmode: code = ARM_NE; goto dominance;
22523     case CC_DEQmode: code = ARM_EQ; goto dominance;
22524     case CC_DGEmode: code = ARM_GE; goto dominance;
22525     case CC_DGTmode: code = ARM_GT; goto dominance;
22526     case CC_DLEmode: code = ARM_LE; goto dominance;
22527     case CC_DLTmode: code = ARM_LT; goto dominance;
22528     case CC_DGEUmode: code = ARM_CS; goto dominance;
22529     case CC_DGTUmode: code = ARM_HI; goto dominance;
22530     case CC_DLEUmode: code = ARM_LS; goto dominance;
22531     case CC_DLTUmode: code = ARM_CC;
22532
22533     dominance:
22534       if (comp_code == EQ)
22535         return ARM_INVERSE_CONDITION_CODE (code);
22536       if (comp_code == NE)
22537         return code;
22538       return ARM_NV;
22539
22540     case CC_NOOVmode:
22541       switch (comp_code)
22542         {
22543         case NE: return ARM_NE;
22544         case EQ: return ARM_EQ;
22545         case GE: return ARM_PL;
22546         case LT: return ARM_MI;
22547         default: return ARM_NV;
22548         }
22549
22550     case CC_Zmode:
22551       switch (comp_code)
22552         {
22553         case NE: return ARM_NE;
22554         case EQ: return ARM_EQ;
22555         default: return ARM_NV;
22556         }
22557
22558     case CC_Nmode:
22559       switch (comp_code)
22560         {
22561         case NE: return ARM_MI;
22562         case EQ: return ARM_PL;
22563         default: return ARM_NV;
22564         }
22565
22566     case CCFPEmode:
22567     case CCFPmode:
22568       /* We can handle all cases except UNEQ and LTGT.  */
22569       switch (comp_code)
22570         {
22571         case GE: return ARM_GE;
22572         case GT: return ARM_GT;
22573         case LE: return ARM_LS;
22574         case LT: return ARM_MI;
22575         case NE: return ARM_NE;
22576         case EQ: return ARM_EQ;
22577         case ORDERED: return ARM_VC;
22578         case UNORDERED: return ARM_VS;
22579         case UNLT: return ARM_LT;
22580         case UNLE: return ARM_LE;
22581         case UNGT: return ARM_HI;
22582         case UNGE: return ARM_PL;
22583           /* UNEQ and LTGT do not have a representation.  */
22584         case UNEQ: /* Fall through.  */
22585         case LTGT: /* Fall through.  */
22586         default: return ARM_NV;
22587         }
22588
22589     case CC_SWPmode:
22590       switch (comp_code)
22591         {
22592         case NE: return ARM_NE;
22593         case EQ: return ARM_EQ;
22594         case GE: return ARM_LE;
22595         case GT: return ARM_LT;
22596         case LE: return ARM_GE;
22597         case LT: return ARM_GT;
22598         case GEU: return ARM_LS;
22599         case GTU: return ARM_CC;
22600         case LEU: return ARM_CS;
22601         case LTU: return ARM_HI;
22602         default: return ARM_NV;
22603         }
22604
22605     case CC_Cmode:
22606       switch (comp_code)
22607         {
22608         case LTU: return ARM_CS;
22609         case GEU: return ARM_CC;
22610         default: return ARM_NV;
22611         }
22612
22613     case CC_CZmode:
22614       switch (comp_code)
22615         {
22616         case NE: return ARM_NE;
22617         case EQ: return ARM_EQ;
22618         case GEU: return ARM_CS;
22619         case GTU: return ARM_HI;
22620         case LEU: return ARM_LS;
22621         case LTU: return ARM_CC;
22622         default: return ARM_NV;
22623         }
22624
22625     case CC_NCVmode:
22626       switch (comp_code)
22627         {
22628         case GE: return ARM_GE;
22629         case LT: return ARM_LT;
22630         case GEU: return ARM_CS;
22631         case LTU: return ARM_CC;
22632         default: return ARM_NV;
22633         }
22634
22635     case CCmode:
22636       switch (comp_code)
22637         {
22638         case NE: return ARM_NE;
22639         case EQ: return ARM_EQ;
22640         case GE: return ARM_GE;
22641         case GT: return ARM_GT;
22642         case LE: return ARM_LE;
22643         case LT: return ARM_LT;
22644         case GEU: return ARM_CS;
22645         case GTU: return ARM_HI;
22646         case LEU: return ARM_LS;
22647         case LTU: return ARM_CC;
22648         default: return ARM_NV;
22649         }
22650
22651     default: gcc_unreachable ();
22652     }
22653 }
22654
22655 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22656 static enum arm_cond_code
22657 get_arm_condition_code (rtx comparison)
22658 {
22659   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22660   gcc_assert (code != ARM_NV);
22661   return code;
22662 }
22663
22664 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22665    instructions.  */
22666 void
22667 thumb2_final_prescan_insn (rtx_insn *insn)
22668 {
22669   rtx_insn *first_insn = insn;
22670   rtx body = PATTERN (insn);
22671   rtx predicate;
22672   enum arm_cond_code code;
22673   int n;
22674   int mask;
22675   int max;
22676
22677   /* max_insns_skipped in the tune was already taken into account in the
22678      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22679      just emit the IT blocks as we can.  It does not make sense to split
22680      the IT blocks.  */
22681   max = MAX_INSN_PER_IT_BLOCK;
22682
22683   /* Remove the previous insn from the count of insns to be output.  */
22684   if (arm_condexec_count)
22685       arm_condexec_count--;
22686
22687   /* Nothing to do if we are already inside a conditional block.  */
22688   if (arm_condexec_count)
22689     return;
22690
22691   if (GET_CODE (body) != COND_EXEC)
22692     return;
22693
22694   /* Conditional jumps are implemented directly.  */
22695   if (JUMP_P (insn))
22696     return;
22697
22698   predicate = COND_EXEC_TEST (body);
22699   arm_current_cc = get_arm_condition_code (predicate);
22700
22701   n = get_attr_ce_count (insn);
22702   arm_condexec_count = 1;
22703   arm_condexec_mask = (1 << n) - 1;
22704   arm_condexec_masklen = n;
22705   /* See if subsequent instructions can be combined into the same block.  */
22706   for (;;)
22707     {
22708       insn = next_nonnote_insn (insn);
22709
22710       /* Jumping into the middle of an IT block is illegal, so a label or
22711          barrier terminates the block.  */
22712       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22713         break;
22714
22715       body = PATTERN (insn);
22716       /* USE and CLOBBER aren't really insns, so just skip them.  */
22717       if (GET_CODE (body) == USE
22718           || GET_CODE (body) == CLOBBER)
22719         continue;
22720
22721       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
22722       if (GET_CODE (body) != COND_EXEC)
22723         break;
22724       /* Maximum number of conditionally executed instructions in a block.  */
22725       n = get_attr_ce_count (insn);
22726       if (arm_condexec_masklen + n > max)
22727         break;
22728
22729       predicate = COND_EXEC_TEST (body);
22730       code = get_arm_condition_code (predicate);
22731       mask = (1 << n) - 1;
22732       if (arm_current_cc == code)
22733         arm_condexec_mask |= (mask << arm_condexec_masklen);
22734       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22735         break;
22736
22737       arm_condexec_count++;
22738       arm_condexec_masklen += n;
22739
22740       /* A jump must be the last instruction in a conditional block.  */
22741       if (JUMP_P (insn))
22742         break;
22743     }
22744   /* Restore recog_data (getting the attributes of other insns can
22745      destroy this array, but final.c assumes that it remains intact
22746      across this call).  */
22747   extract_constrain_insn_cached (first_insn);
22748 }
22749
22750 void
22751 arm_final_prescan_insn (rtx_insn *insn)
22752 {
22753   /* BODY will hold the body of INSN.  */
22754   rtx body = PATTERN (insn);
22755
22756   /* This will be 1 if trying to repeat the trick, and things need to be
22757      reversed if it appears to fail.  */
22758   int reverse = 0;
22759
22760   /* If we start with a return insn, we only succeed if we find another one.  */
22761   int seeking_return = 0;
22762   enum rtx_code return_code = UNKNOWN;
22763
22764   /* START_INSN will hold the insn from where we start looking.  This is the
22765      first insn after the following code_label if REVERSE is true.  */
22766   rtx_insn *start_insn = insn;
22767
22768   /* If in state 4, check if the target branch is reached, in order to
22769      change back to state 0.  */
22770   if (arm_ccfsm_state == 4)
22771     {
22772       if (insn == arm_target_insn)
22773         {
22774           arm_target_insn = NULL;
22775           arm_ccfsm_state = 0;
22776         }
22777       return;
22778     }
22779
22780   /* If in state 3, it is possible to repeat the trick, if this insn is an
22781      unconditional branch to a label, and immediately following this branch
22782      is the previous target label which is only used once, and the label this
22783      branch jumps to is not too far off.  */
22784   if (arm_ccfsm_state == 3)
22785     {
22786       if (simplejump_p (insn))
22787         {
22788           start_insn = next_nonnote_insn (start_insn);
22789           if (BARRIER_P (start_insn))
22790             {
22791               /* XXX Isn't this always a barrier?  */
22792               start_insn = next_nonnote_insn (start_insn);
22793             }
22794           if (LABEL_P (start_insn)
22795               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22796               && LABEL_NUSES (start_insn) == 1)
22797             reverse = TRUE;
22798           else
22799             return;
22800         }
22801       else if (ANY_RETURN_P (body))
22802         {
22803           start_insn = next_nonnote_insn (start_insn);
22804           if (BARRIER_P (start_insn))
22805             start_insn = next_nonnote_insn (start_insn);
22806           if (LABEL_P (start_insn)
22807               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22808               && LABEL_NUSES (start_insn) == 1)
22809             {
22810               reverse = TRUE;
22811               seeking_return = 1;
22812               return_code = GET_CODE (body);
22813             }
22814           else
22815             return;
22816         }
22817       else
22818         return;
22819     }
22820
22821   gcc_assert (!arm_ccfsm_state || reverse);
22822   if (!JUMP_P (insn))
22823     return;
22824
22825   /* This jump might be paralleled with a clobber of the condition codes
22826      the jump should always come first */
22827   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22828     body = XVECEXP (body, 0, 0);
22829
22830   if (reverse
22831       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22832           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22833     {
22834       int insns_skipped;
22835       int fail = FALSE, succeed = FALSE;
22836       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
22837       int then_not_else = TRUE;
22838       rtx_insn *this_insn = start_insn;
22839       rtx label = 0;
22840
22841       /* Register the insn jumped to.  */
22842       if (reverse)
22843         {
22844           if (!seeking_return)
22845             label = XEXP (SET_SRC (body), 0);
22846         }
22847       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22848         label = XEXP (XEXP (SET_SRC (body), 1), 0);
22849       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22850         {
22851           label = XEXP (XEXP (SET_SRC (body), 2), 0);
22852           then_not_else = FALSE;
22853         }
22854       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22855         {
22856           seeking_return = 1;
22857           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22858         }
22859       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22860         {
22861           seeking_return = 1;
22862           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22863           then_not_else = FALSE;
22864         }
22865       else
22866         gcc_unreachable ();
22867
22868       /* See how many insns this branch skips, and what kind of insns.  If all
22869          insns are okay, and the label or unconditional branch to the same
22870          label is not too far away, succeed.  */
22871       for (insns_skipped = 0;
22872            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22873         {
22874           rtx scanbody;
22875
22876           this_insn = next_nonnote_insn (this_insn);
22877           if (!this_insn)
22878             break;
22879
22880           switch (GET_CODE (this_insn))
22881             {
22882             case CODE_LABEL:
22883               /* Succeed if it is the target label, otherwise fail since
22884                  control falls in from somewhere else.  */
22885               if (this_insn == label)
22886                 {
22887                   arm_ccfsm_state = 1;
22888                   succeed = TRUE;
22889                 }
22890               else
22891                 fail = TRUE;
22892               break;
22893
22894             case BARRIER:
22895               /* Succeed if the following insn is the target label.
22896                  Otherwise fail.
22897                  If return insns are used then the last insn in a function
22898                  will be a barrier.  */
22899               this_insn = next_nonnote_insn (this_insn);
22900               if (this_insn && this_insn == label)
22901                 {
22902                   arm_ccfsm_state = 1;
22903                   succeed = TRUE;
22904                 }
22905               else
22906                 fail = TRUE;
22907               break;
22908
22909             case CALL_INSN:
22910               /* The AAPCS says that conditional calls should not be
22911                  used since they make interworking inefficient (the
22912                  linker can't transform BL<cond> into BLX).  That's
22913                  only a problem if the machine has BLX.  */
22914               if (arm_arch5)
22915                 {
22916                   fail = TRUE;
22917                   break;
22918                 }
22919
22920               /* Succeed if the following insn is the target label, or
22921                  if the following two insns are a barrier and the
22922                  target label.  */
22923               this_insn = next_nonnote_insn (this_insn);
22924               if (this_insn && BARRIER_P (this_insn))
22925                 this_insn = next_nonnote_insn (this_insn);
22926
22927               if (this_insn && this_insn == label
22928                   && insns_skipped < max_insns_skipped)
22929                 {
22930                   arm_ccfsm_state = 1;
22931                   succeed = TRUE;
22932                 }
22933               else
22934                 fail = TRUE;
22935               break;
22936
22937             case JUMP_INSN:
22938               /* If this is an unconditional branch to the same label, succeed.
22939                  If it is to another label, do nothing.  If it is conditional,
22940                  fail.  */
22941               /* XXX Probably, the tests for SET and the PC are
22942                  unnecessary.  */
22943
22944               scanbody = PATTERN (this_insn);
22945               if (GET_CODE (scanbody) == SET
22946                   && GET_CODE (SET_DEST (scanbody)) == PC)
22947                 {
22948                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22949                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22950                     {
22951                       arm_ccfsm_state = 2;
22952                       succeed = TRUE;
22953                     }
22954                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22955                     fail = TRUE;
22956                 }
22957               /* Fail if a conditional return is undesirable (e.g. on a
22958                  StrongARM), but still allow this if optimizing for size.  */
22959               else if (GET_CODE (scanbody) == return_code
22960                        && !use_return_insn (TRUE, NULL)
22961                        && !optimize_size)
22962                 fail = TRUE;
22963               else if (GET_CODE (scanbody) == return_code)
22964                 {
22965                   arm_ccfsm_state = 2;
22966                   succeed = TRUE;
22967                 }
22968               else if (GET_CODE (scanbody) == PARALLEL)
22969                 {
22970                   switch (get_attr_conds (this_insn))
22971                     {
22972                     case CONDS_NOCOND:
22973                       break;
22974                     default:
22975                       fail = TRUE;
22976                       break;
22977                     }
22978                 }
22979               else
22980                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
22981
22982               break;
22983
22984             case INSN:
22985               /* Instructions using or affecting the condition codes make it
22986                  fail.  */
22987               scanbody = PATTERN (this_insn);
22988               if (!(GET_CODE (scanbody) == SET
22989                     || GET_CODE (scanbody) == PARALLEL)
22990                   || get_attr_conds (this_insn) != CONDS_NOCOND)
22991                 fail = TRUE;
22992               break;
22993
22994             default:
22995               break;
22996             }
22997         }
22998       if (succeed)
22999         {
23000           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23001             arm_target_label = CODE_LABEL_NUMBER (label);
23002           else
23003             {
23004               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23005
23006               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23007                 {
23008                   this_insn = next_nonnote_insn (this_insn);
23009                   gcc_assert (!this_insn
23010                               || (!BARRIER_P (this_insn)
23011                                   && !LABEL_P (this_insn)));
23012                 }
23013               if (!this_insn)
23014                 {
23015                   /* Oh, dear! we ran off the end.. give up.  */
23016                   extract_constrain_insn_cached (insn);
23017                   arm_ccfsm_state = 0;
23018                   arm_target_insn = NULL;
23019                   return;
23020                 }
23021               arm_target_insn = this_insn;
23022             }
23023
23024           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23025              what it was.  */
23026           if (!reverse)
23027             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23028
23029           if (reverse || then_not_else)
23030             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23031         }
23032
23033       /* Restore recog_data (getting the attributes of other insns can
23034          destroy this array, but final.c assumes that it remains intact
23035          across this call.  */
23036       extract_constrain_insn_cached (insn);
23037     }
23038 }
23039
23040 /* Output IT instructions.  */
23041 void
23042 thumb2_asm_output_opcode (FILE * stream)
23043 {
23044   char buff[5];
23045   int n;
23046
23047   if (arm_condexec_mask)
23048     {
23049       for (n = 0; n < arm_condexec_masklen; n++)
23050         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23051       buff[n] = 0;
23052       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23053                   arm_condition_codes[arm_current_cc]);
23054       arm_condexec_mask = 0;
23055     }
23056 }
23057
23058 /* Returns true if REGNO is a valid register
23059    for holding a quantity of type MODE.  */
23060 int
23061 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23062 {
23063   if (GET_MODE_CLASS (mode) == MODE_CC)
23064     return (regno == CC_REGNUM
23065             || (TARGET_HARD_FLOAT && TARGET_VFP
23066                 && regno == VFPCC_REGNUM));
23067
23068   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23069     return false;
23070
23071   if (TARGET_THUMB1)
23072     /* For the Thumb we only allow values bigger than SImode in
23073        registers 0 - 6, so that there is always a second low
23074        register available to hold the upper part of the value.
23075        We probably we ought to ensure that the register is the
23076        start of an even numbered register pair.  */
23077     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23078
23079   if (TARGET_HARD_FLOAT && TARGET_VFP
23080       && IS_VFP_REGNUM (regno))
23081     {
23082       if (mode == SFmode || mode == SImode)
23083         return VFP_REGNO_OK_FOR_SINGLE (regno);
23084
23085       if (mode == DFmode)
23086         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23087
23088       /* VFP registers can hold HFmode values, but there is no point in
23089          putting them there unless we have hardware conversion insns. */
23090       if (mode == HFmode)
23091         return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23092
23093       if (TARGET_NEON)
23094         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23095                || (VALID_NEON_QREG_MODE (mode)
23096                    && NEON_REGNO_OK_FOR_QUAD (regno))
23097                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23098                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23099                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23100                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23101                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23102
23103       return FALSE;
23104     }
23105
23106   if (TARGET_REALLY_IWMMXT)
23107     {
23108       if (IS_IWMMXT_GR_REGNUM (regno))
23109         return mode == SImode;
23110
23111       if (IS_IWMMXT_REGNUM (regno))
23112         return VALID_IWMMXT_REG_MODE (mode);
23113     }
23114
23115   /* We allow almost any value to be stored in the general registers.
23116      Restrict doubleword quantities to even register pairs in ARM state
23117      so that we can use ldrd.  Do not allow very large Neon structure
23118      opaque modes in general registers; they would use too many.  */
23119   if (regno <= LAST_ARM_REGNUM)
23120     {
23121       if (ARM_NUM_REGS (mode) > 4)
23122           return FALSE;
23123
23124       if (TARGET_THUMB2)
23125         return TRUE;
23126
23127       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23128     }
23129
23130   if (regno == FRAME_POINTER_REGNUM
23131       || regno == ARG_POINTER_REGNUM)
23132     /* We only allow integers in the fake hard registers.  */
23133     return GET_MODE_CLASS (mode) == MODE_INT;
23134
23135   return FALSE;
23136 }
23137
23138 /* Implement MODES_TIEABLE_P.  */
23139
23140 bool
23141 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23142 {
23143   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23144     return true;
23145
23146   /* We specifically want to allow elements of "structure" modes to
23147      be tieable to the structure.  This more general condition allows
23148      other rarer situations too.  */
23149   if (TARGET_NEON
23150       && (VALID_NEON_DREG_MODE (mode1)
23151           || VALID_NEON_QREG_MODE (mode1)
23152           || VALID_NEON_STRUCT_MODE (mode1))
23153       && (VALID_NEON_DREG_MODE (mode2)
23154           || VALID_NEON_QREG_MODE (mode2)
23155           || VALID_NEON_STRUCT_MODE (mode2)))
23156     return true;
23157
23158   return false;
23159 }
23160
23161 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23162    not used in arm mode.  */
23163
23164 enum reg_class
23165 arm_regno_class (int regno)
23166 {
23167   if (regno == PC_REGNUM)
23168     return NO_REGS;
23169
23170   if (TARGET_THUMB1)
23171     {
23172       if (regno == STACK_POINTER_REGNUM)
23173         return STACK_REG;
23174       if (regno == CC_REGNUM)
23175         return CC_REG;
23176       if (regno < 8)
23177         return LO_REGS;
23178       return HI_REGS;
23179     }
23180
23181   if (TARGET_THUMB2 && regno < 8)
23182     return LO_REGS;
23183
23184   if (   regno <= LAST_ARM_REGNUM
23185       || regno == FRAME_POINTER_REGNUM
23186       || regno == ARG_POINTER_REGNUM)
23187     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23188
23189   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23190     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23191
23192   if (IS_VFP_REGNUM (regno))
23193     {
23194       if (regno <= D7_VFP_REGNUM)
23195         return VFP_D0_D7_REGS;
23196       else if (regno <= LAST_LO_VFP_REGNUM)
23197         return VFP_LO_REGS;
23198       else
23199         return VFP_HI_REGS;
23200     }
23201
23202   if (IS_IWMMXT_REGNUM (regno))
23203     return IWMMXT_REGS;
23204
23205   if (IS_IWMMXT_GR_REGNUM (regno))
23206     return IWMMXT_GR_REGS;
23207
23208   return NO_REGS;
23209 }
23210
23211 /* Handle a special case when computing the offset
23212    of an argument from the frame pointer.  */
23213 int
23214 arm_debugger_arg_offset (int value, rtx addr)
23215 {
23216   rtx_insn *insn;
23217
23218   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23219   if (value != 0)
23220     return 0;
23221
23222   /* We can only cope with the case where the address is held in a register.  */
23223   if (!REG_P (addr))
23224     return 0;
23225
23226   /* If we are using the frame pointer to point at the argument, then
23227      an offset of 0 is correct.  */
23228   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23229     return 0;
23230
23231   /* If we are using the stack pointer to point at the
23232      argument, then an offset of 0 is correct.  */
23233   /* ??? Check this is consistent with thumb2 frame layout.  */
23234   if ((TARGET_THUMB || !frame_pointer_needed)
23235       && REGNO (addr) == SP_REGNUM)
23236     return 0;
23237
23238   /* Oh dear.  The argument is pointed to by a register rather
23239      than being held in a register, or being stored at a known
23240      offset from the frame pointer.  Since GDB only understands
23241      those two kinds of argument we must translate the address
23242      held in the register into an offset from the frame pointer.
23243      We do this by searching through the insns for the function
23244      looking to see where this register gets its value.  If the
23245      register is initialized from the frame pointer plus an offset
23246      then we are in luck and we can continue, otherwise we give up.
23247
23248      This code is exercised by producing debugging information
23249      for a function with arguments like this:
23250
23251            double func (double a, double b, int c, double d) {return d;}
23252
23253      Without this code the stab for parameter 'd' will be set to
23254      an offset of 0 from the frame pointer, rather than 8.  */
23255
23256   /* The if() statement says:
23257
23258      If the insn is a normal instruction
23259      and if the insn is setting the value in a register
23260      and if the register being set is the register holding the address of the argument
23261      and if the address is computing by an addition
23262      that involves adding to a register
23263      which is the frame pointer
23264      a constant integer
23265
23266      then...  */
23267
23268   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23269     {
23270       if (   NONJUMP_INSN_P (insn)
23271           && GET_CODE (PATTERN (insn)) == SET
23272           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23273           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23274           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23275           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23276           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23277              )
23278         {
23279           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23280
23281           break;
23282         }
23283     }
23284
23285   if (value == 0)
23286     {
23287       debug_rtx (addr);
23288       warning (0, "unable to compute real location of stacked parameter");
23289       value = 8; /* XXX magic hack */
23290     }
23291
23292   return value;
23293 }
23294 \f
23295 typedef enum {
23296   T_V8QI,
23297   T_V4HI,
23298   T_V4HF,
23299   T_V2SI,
23300   T_V2SF,
23301   T_DI,
23302   T_V16QI,
23303   T_V8HI,
23304   T_V4SI,
23305   T_V4SF,
23306   T_V2DI,
23307   T_TI,
23308   T_EI,
23309   T_OI,
23310   T_MAX         /* Size of enum.  Keep last.  */
23311 } neon_builtin_type_mode;
23312
23313 #define TYPE_MODE_BIT(X) (1 << (X))
23314
23315 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI)        \
23316                  | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI)      \
23317                  | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23318 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI)       \
23319                  | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF)      \
23320                  | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23321
23322 #define v8qi_UP  T_V8QI
23323 #define v4hi_UP  T_V4HI
23324 #define v4hf_UP  T_V4HF
23325 #define v2si_UP  T_V2SI
23326 #define v2sf_UP  T_V2SF
23327 #define di_UP    T_DI
23328 #define v16qi_UP T_V16QI
23329 #define v8hi_UP  T_V8HI
23330 #define v4si_UP  T_V4SI
23331 #define v4sf_UP  T_V4SF
23332 #define v2di_UP  T_V2DI
23333 #define ti_UP    T_TI
23334 #define ei_UP    T_EI
23335 #define oi_UP    T_OI
23336
23337 #define UP(X) X##_UP
23338
23339 typedef enum {
23340   NEON_BINOP,
23341   NEON_TERNOP,
23342   NEON_UNOP,
23343   NEON_BSWAP,
23344   NEON_GETLANE,
23345   NEON_SETLANE,
23346   NEON_CREATE,
23347   NEON_RINT,
23348   NEON_COPYSIGNF,
23349   NEON_DUP,
23350   NEON_DUPLANE,
23351   NEON_COMBINE,
23352   NEON_SPLIT,
23353   NEON_LANEMUL,
23354   NEON_LANEMULL,
23355   NEON_LANEMULH,
23356   NEON_LANEMAC,
23357   NEON_SCALARMUL,
23358   NEON_SCALARMULL,
23359   NEON_SCALARMULH,
23360   NEON_SCALARMAC,
23361   NEON_CONVERT,
23362   NEON_FLOAT_WIDEN,
23363   NEON_FLOAT_NARROW,
23364   NEON_FIXCONV,
23365   NEON_SELECT,
23366   NEON_REINTERP,
23367   NEON_VTBL,
23368   NEON_VTBX,
23369   NEON_LOAD1,
23370   NEON_LOAD1LANE,
23371   NEON_STORE1,
23372   NEON_STORE1LANE,
23373   NEON_LOADSTRUCT,
23374   NEON_LOADSTRUCTLANE,
23375   NEON_STORESTRUCT,
23376   NEON_STORESTRUCTLANE,
23377   NEON_LOGICBINOP,
23378   NEON_SHIFTINSERT,
23379   NEON_SHIFTIMM,
23380   NEON_SHIFTACC
23381 } neon_itype;
23382
23383 typedef struct {
23384   const char *name;
23385   const neon_itype itype;
23386   const neon_builtin_type_mode mode;
23387   const enum insn_code code;
23388   unsigned int fcode;
23389 } neon_builtin_datum;
23390
23391 #define CF(N,X) CODE_FOR_neon_##N##X
23392
23393 #define VAR1(T, N, A) \
23394   {#N, NEON_##T, UP (A), CF (N, A), 0}
23395 #define VAR2(T, N, A, B) \
23396   VAR1 (T, N, A), \
23397   {#N, NEON_##T, UP (B), CF (N, B), 0}
23398 #define VAR3(T, N, A, B, C) \
23399   VAR2 (T, N, A, B), \
23400   {#N, NEON_##T, UP (C), CF (N, C), 0}
23401 #define VAR4(T, N, A, B, C, D) \
23402   VAR3 (T, N, A, B, C), \
23403   {#N, NEON_##T, UP (D), CF (N, D), 0}
23404 #define VAR5(T, N, A, B, C, D, E) \
23405   VAR4 (T, N, A, B, C, D), \
23406   {#N, NEON_##T, UP (E), CF (N, E), 0}
23407 #define VAR6(T, N, A, B, C, D, E, F) \
23408   VAR5 (T, N, A, B, C, D, E), \
23409   {#N, NEON_##T, UP (F), CF (N, F), 0}
23410 #define VAR7(T, N, A, B, C, D, E, F, G) \
23411   VAR6 (T, N, A, B, C, D, E, F), \
23412   {#N, NEON_##T, UP (G), CF (N, G), 0}
23413 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23414   VAR7 (T, N, A, B, C, D, E, F, G), \
23415   {#N, NEON_##T, UP (H), CF (N, H), 0}
23416 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23417   VAR8 (T, N, A, B, C, D, E, F, G, H), \
23418   {#N, NEON_##T, UP (I), CF (N, I), 0}
23419 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23420   VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23421   {#N, NEON_##T, UP (J), CF (N, J), 0}
23422
23423 /* The NEON builtin data can be found in arm_neon_builtins.def.
23424    The mode entries in the following table correspond to the "key" type of the
23425    instruction variant, i.e. equivalent to that which would be specified after
23426    the assembler mnemonic, which usually refers to the last vector operand.
23427    (Signed/unsigned/polynomial types are not differentiated between though, and
23428    are all mapped onto the same mode for a given element size.) The modes
23429    listed per instruction should be the same as those defined for that
23430    instruction's pattern in neon.md.  */
23431
23432 static neon_builtin_datum neon_builtin_data[] =
23433 {
23434 #include "arm_neon_builtins.def"
23435 };
23436
23437 #undef CF
23438 #undef VAR1
23439 #undef VAR2
23440 #undef VAR3
23441 #undef VAR4
23442 #undef VAR5
23443 #undef VAR6
23444 #undef VAR7
23445 #undef VAR8
23446 #undef VAR9
23447 #undef VAR10
23448
23449 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23450 #define VAR1(T, N, A) \
23451   CF (N, A)
23452 #define VAR2(T, N, A, B) \
23453   VAR1 (T, N, A), \
23454   CF (N, B)
23455 #define VAR3(T, N, A, B, C) \
23456   VAR2 (T, N, A, B), \
23457   CF (N, C)
23458 #define VAR4(T, N, A, B, C, D) \
23459   VAR3 (T, N, A, B, C), \
23460   CF (N, D)
23461 #define VAR5(T, N, A, B, C, D, E) \
23462   VAR4 (T, N, A, B, C, D), \
23463   CF (N, E)
23464 #define VAR6(T, N, A, B, C, D, E, F) \
23465   VAR5 (T, N, A, B, C, D, E), \
23466   CF (N, F)
23467 #define VAR7(T, N, A, B, C, D, E, F, G) \
23468   VAR6 (T, N, A, B, C, D, E, F), \
23469   CF (N, G)
23470 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23471   VAR7 (T, N, A, B, C, D, E, F, G), \
23472   CF (N, H)
23473 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23474   VAR8 (T, N, A, B, C, D, E, F, G, H), \
23475   CF (N, I)
23476 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23477   VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23478   CF (N, J)
23479 enum arm_builtins
23480 {
23481   ARM_BUILTIN_GETWCGR0,
23482   ARM_BUILTIN_GETWCGR1,
23483   ARM_BUILTIN_GETWCGR2,
23484   ARM_BUILTIN_GETWCGR3,
23485
23486   ARM_BUILTIN_SETWCGR0,
23487   ARM_BUILTIN_SETWCGR1,
23488   ARM_BUILTIN_SETWCGR2,
23489   ARM_BUILTIN_SETWCGR3,
23490
23491   ARM_BUILTIN_WZERO,
23492
23493   ARM_BUILTIN_WAVG2BR,
23494   ARM_BUILTIN_WAVG2HR,
23495   ARM_BUILTIN_WAVG2B,
23496   ARM_BUILTIN_WAVG2H,
23497
23498   ARM_BUILTIN_WACCB,
23499   ARM_BUILTIN_WACCH,
23500   ARM_BUILTIN_WACCW,
23501
23502   ARM_BUILTIN_WMACS,
23503   ARM_BUILTIN_WMACSZ,
23504   ARM_BUILTIN_WMACU,
23505   ARM_BUILTIN_WMACUZ,
23506
23507   ARM_BUILTIN_WSADB,
23508   ARM_BUILTIN_WSADBZ,
23509   ARM_BUILTIN_WSADH,
23510   ARM_BUILTIN_WSADHZ,
23511
23512   ARM_BUILTIN_WALIGNI,
23513   ARM_BUILTIN_WALIGNR0,
23514   ARM_BUILTIN_WALIGNR1,
23515   ARM_BUILTIN_WALIGNR2,
23516   ARM_BUILTIN_WALIGNR3,
23517
23518   ARM_BUILTIN_TMIA,
23519   ARM_BUILTIN_TMIAPH,
23520   ARM_BUILTIN_TMIABB,
23521   ARM_BUILTIN_TMIABT,
23522   ARM_BUILTIN_TMIATB,
23523   ARM_BUILTIN_TMIATT,
23524
23525   ARM_BUILTIN_TMOVMSKB,
23526   ARM_BUILTIN_TMOVMSKH,
23527   ARM_BUILTIN_TMOVMSKW,
23528
23529   ARM_BUILTIN_TBCSTB,
23530   ARM_BUILTIN_TBCSTH,
23531   ARM_BUILTIN_TBCSTW,
23532
23533   ARM_BUILTIN_WMADDS,
23534   ARM_BUILTIN_WMADDU,
23535
23536   ARM_BUILTIN_WPACKHSS,
23537   ARM_BUILTIN_WPACKWSS,
23538   ARM_BUILTIN_WPACKDSS,
23539   ARM_BUILTIN_WPACKHUS,
23540   ARM_BUILTIN_WPACKWUS,
23541   ARM_BUILTIN_WPACKDUS,
23542
23543   ARM_BUILTIN_WADDB,
23544   ARM_BUILTIN_WADDH,
23545   ARM_BUILTIN_WADDW,
23546   ARM_BUILTIN_WADDSSB,
23547   ARM_BUILTIN_WADDSSH,
23548   ARM_BUILTIN_WADDSSW,
23549   ARM_BUILTIN_WADDUSB,
23550   ARM_BUILTIN_WADDUSH,
23551   ARM_BUILTIN_WADDUSW,
23552   ARM_BUILTIN_WSUBB,
23553   ARM_BUILTIN_WSUBH,
23554   ARM_BUILTIN_WSUBW,
23555   ARM_BUILTIN_WSUBSSB,
23556   ARM_BUILTIN_WSUBSSH,
23557   ARM_BUILTIN_WSUBSSW,
23558   ARM_BUILTIN_WSUBUSB,
23559   ARM_BUILTIN_WSUBUSH,
23560   ARM_BUILTIN_WSUBUSW,
23561
23562   ARM_BUILTIN_WAND,
23563   ARM_BUILTIN_WANDN,
23564   ARM_BUILTIN_WOR,
23565   ARM_BUILTIN_WXOR,
23566
23567   ARM_BUILTIN_WCMPEQB,
23568   ARM_BUILTIN_WCMPEQH,
23569   ARM_BUILTIN_WCMPEQW,
23570   ARM_BUILTIN_WCMPGTUB,
23571   ARM_BUILTIN_WCMPGTUH,
23572   ARM_BUILTIN_WCMPGTUW,
23573   ARM_BUILTIN_WCMPGTSB,
23574   ARM_BUILTIN_WCMPGTSH,
23575   ARM_BUILTIN_WCMPGTSW,
23576
23577   ARM_BUILTIN_TEXTRMSB,
23578   ARM_BUILTIN_TEXTRMSH,
23579   ARM_BUILTIN_TEXTRMSW,
23580   ARM_BUILTIN_TEXTRMUB,
23581   ARM_BUILTIN_TEXTRMUH,
23582   ARM_BUILTIN_TEXTRMUW,
23583   ARM_BUILTIN_TINSRB,
23584   ARM_BUILTIN_TINSRH,
23585   ARM_BUILTIN_TINSRW,
23586
23587   ARM_BUILTIN_WMAXSW,
23588   ARM_BUILTIN_WMAXSH,
23589   ARM_BUILTIN_WMAXSB,
23590   ARM_BUILTIN_WMAXUW,
23591   ARM_BUILTIN_WMAXUH,
23592   ARM_BUILTIN_WMAXUB,
23593   ARM_BUILTIN_WMINSW,
23594   ARM_BUILTIN_WMINSH,
23595   ARM_BUILTIN_WMINSB,
23596   ARM_BUILTIN_WMINUW,
23597   ARM_BUILTIN_WMINUH,
23598   ARM_BUILTIN_WMINUB,
23599
23600   ARM_BUILTIN_WMULUM,
23601   ARM_BUILTIN_WMULSM,
23602   ARM_BUILTIN_WMULUL,
23603
23604   ARM_BUILTIN_PSADBH,
23605   ARM_BUILTIN_WSHUFH,
23606
23607   ARM_BUILTIN_WSLLH,
23608   ARM_BUILTIN_WSLLW,
23609   ARM_BUILTIN_WSLLD,
23610   ARM_BUILTIN_WSRAH,
23611   ARM_BUILTIN_WSRAW,
23612   ARM_BUILTIN_WSRAD,
23613   ARM_BUILTIN_WSRLH,
23614   ARM_BUILTIN_WSRLW,
23615   ARM_BUILTIN_WSRLD,
23616   ARM_BUILTIN_WRORH,
23617   ARM_BUILTIN_WRORW,
23618   ARM_BUILTIN_WRORD,
23619   ARM_BUILTIN_WSLLHI,
23620   ARM_BUILTIN_WSLLWI,
23621   ARM_BUILTIN_WSLLDI,
23622   ARM_BUILTIN_WSRAHI,
23623   ARM_BUILTIN_WSRAWI,
23624   ARM_BUILTIN_WSRADI,
23625   ARM_BUILTIN_WSRLHI,
23626   ARM_BUILTIN_WSRLWI,
23627   ARM_BUILTIN_WSRLDI,
23628   ARM_BUILTIN_WRORHI,
23629   ARM_BUILTIN_WRORWI,
23630   ARM_BUILTIN_WRORDI,
23631
23632   ARM_BUILTIN_WUNPCKIHB,
23633   ARM_BUILTIN_WUNPCKIHH,
23634   ARM_BUILTIN_WUNPCKIHW,
23635   ARM_BUILTIN_WUNPCKILB,
23636   ARM_BUILTIN_WUNPCKILH,
23637   ARM_BUILTIN_WUNPCKILW,
23638
23639   ARM_BUILTIN_WUNPCKEHSB,
23640   ARM_BUILTIN_WUNPCKEHSH,
23641   ARM_BUILTIN_WUNPCKEHSW,
23642   ARM_BUILTIN_WUNPCKEHUB,
23643   ARM_BUILTIN_WUNPCKEHUH,
23644   ARM_BUILTIN_WUNPCKEHUW,
23645   ARM_BUILTIN_WUNPCKELSB,
23646   ARM_BUILTIN_WUNPCKELSH,
23647   ARM_BUILTIN_WUNPCKELSW,
23648   ARM_BUILTIN_WUNPCKELUB,
23649   ARM_BUILTIN_WUNPCKELUH,
23650   ARM_BUILTIN_WUNPCKELUW,
23651
23652   ARM_BUILTIN_WABSB,
23653   ARM_BUILTIN_WABSH,
23654   ARM_BUILTIN_WABSW,
23655
23656   ARM_BUILTIN_WADDSUBHX,
23657   ARM_BUILTIN_WSUBADDHX,
23658
23659   ARM_BUILTIN_WABSDIFFB,
23660   ARM_BUILTIN_WABSDIFFH,
23661   ARM_BUILTIN_WABSDIFFW,
23662
23663   ARM_BUILTIN_WADDCH,
23664   ARM_BUILTIN_WADDCW,
23665
23666   ARM_BUILTIN_WAVG4,
23667   ARM_BUILTIN_WAVG4R,
23668
23669   ARM_BUILTIN_WMADDSX,
23670   ARM_BUILTIN_WMADDUX,
23671
23672   ARM_BUILTIN_WMADDSN,
23673   ARM_BUILTIN_WMADDUN,
23674
23675   ARM_BUILTIN_WMULWSM,
23676   ARM_BUILTIN_WMULWUM,
23677
23678   ARM_BUILTIN_WMULWSMR,
23679   ARM_BUILTIN_WMULWUMR,
23680
23681   ARM_BUILTIN_WMULWL,
23682
23683   ARM_BUILTIN_WMULSMR,
23684   ARM_BUILTIN_WMULUMR,
23685
23686   ARM_BUILTIN_WQMULM,
23687   ARM_BUILTIN_WQMULMR,
23688
23689   ARM_BUILTIN_WQMULWM,
23690   ARM_BUILTIN_WQMULWMR,
23691
23692   ARM_BUILTIN_WADDBHUSM,
23693   ARM_BUILTIN_WADDBHUSL,
23694
23695   ARM_BUILTIN_WQMIABB,
23696   ARM_BUILTIN_WQMIABT,
23697   ARM_BUILTIN_WQMIATB,
23698   ARM_BUILTIN_WQMIATT,
23699
23700   ARM_BUILTIN_WQMIABBN,
23701   ARM_BUILTIN_WQMIABTN,
23702   ARM_BUILTIN_WQMIATBN,
23703   ARM_BUILTIN_WQMIATTN,
23704
23705   ARM_BUILTIN_WMIABB,
23706   ARM_BUILTIN_WMIABT,
23707   ARM_BUILTIN_WMIATB,
23708   ARM_BUILTIN_WMIATT,
23709
23710   ARM_BUILTIN_WMIABBN,
23711   ARM_BUILTIN_WMIABTN,
23712   ARM_BUILTIN_WMIATBN,
23713   ARM_BUILTIN_WMIATTN,
23714
23715   ARM_BUILTIN_WMIAWBB,
23716   ARM_BUILTIN_WMIAWBT,
23717   ARM_BUILTIN_WMIAWTB,
23718   ARM_BUILTIN_WMIAWTT,
23719
23720   ARM_BUILTIN_WMIAWBBN,
23721   ARM_BUILTIN_WMIAWBTN,
23722   ARM_BUILTIN_WMIAWTBN,
23723   ARM_BUILTIN_WMIAWTTN,
23724
23725   ARM_BUILTIN_WMERGE,
23726
23727   ARM_BUILTIN_CRC32B,
23728   ARM_BUILTIN_CRC32H,
23729   ARM_BUILTIN_CRC32W,
23730   ARM_BUILTIN_CRC32CB,
23731   ARM_BUILTIN_CRC32CH,
23732   ARM_BUILTIN_CRC32CW,
23733
23734   ARM_BUILTIN_GET_FPSCR,
23735   ARM_BUILTIN_SET_FPSCR,
23736
23737 #undef CRYPTO1
23738 #undef CRYPTO2
23739 #undef CRYPTO3
23740
23741 #define CRYPTO1(L, U, M1, M2) \
23742   ARM_BUILTIN_CRYPTO_##U,
23743 #define CRYPTO2(L, U, M1, M2, M3) \
23744   ARM_BUILTIN_CRYPTO_##U,
23745 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23746   ARM_BUILTIN_CRYPTO_##U,
23747
23748 #include "crypto.def"
23749
23750 #undef CRYPTO1
23751 #undef CRYPTO2
23752 #undef CRYPTO3
23753
23754 #include "arm_neon_builtins.def"
23755
23756   ,ARM_BUILTIN_MAX
23757 };
23758
23759 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23760
23761 #undef CF
23762 #undef VAR1
23763 #undef VAR2
23764 #undef VAR3
23765 #undef VAR4
23766 #undef VAR5
23767 #undef VAR6
23768 #undef VAR7
23769 #undef VAR8
23770 #undef VAR9
23771 #undef VAR10
23772
23773 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23774
23775 #define NUM_DREG_TYPES 5
23776 #define NUM_QREG_TYPES 6
23777
23778 static void
23779 arm_init_neon_builtins (void)
23780 {
23781   unsigned int i, fcode;
23782   tree decl;
23783
23784   tree neon_intQI_type_node;
23785   tree neon_intHI_type_node;
23786   tree neon_floatHF_type_node;
23787   tree neon_polyQI_type_node;
23788   tree neon_polyHI_type_node;
23789   tree neon_intSI_type_node;
23790   tree neon_intDI_type_node;
23791   tree neon_intUTI_type_node;
23792   tree neon_float_type_node;
23793
23794   tree intQI_pointer_node;
23795   tree intHI_pointer_node;
23796   tree intSI_pointer_node;
23797   tree intDI_pointer_node;
23798   tree float_pointer_node;
23799
23800   tree const_intQI_node;
23801   tree const_intHI_node;
23802   tree const_intSI_node;
23803   tree const_intDI_node;
23804   tree const_float_node;
23805
23806   tree const_intQI_pointer_node;
23807   tree const_intHI_pointer_node;
23808   tree const_intSI_pointer_node;
23809   tree const_intDI_pointer_node;
23810   tree const_float_pointer_node;
23811
23812   tree V8QI_type_node;
23813   tree V4HI_type_node;
23814   tree V4UHI_type_node;
23815   tree V4HF_type_node;
23816   tree V2SI_type_node;
23817   tree V2USI_type_node;
23818   tree V2SF_type_node;
23819   tree V16QI_type_node;
23820   tree V8HI_type_node;
23821   tree V8UHI_type_node;
23822   tree V4SI_type_node;
23823   tree V4USI_type_node;
23824   tree V4SF_type_node;
23825   tree V2DI_type_node;
23826   tree V2UDI_type_node;
23827
23828   tree intUQI_type_node;
23829   tree intUHI_type_node;
23830   tree intUSI_type_node;
23831   tree intUDI_type_node;
23832
23833   tree intEI_type_node;
23834   tree intOI_type_node;
23835   tree intCI_type_node;
23836   tree intXI_type_node;
23837
23838   tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23839   tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23840   tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23841
23842   /* Create distinguished type nodes for NEON vector element types,
23843      and pointers to values of such types, so we can detect them later.  */
23844   neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23845   neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23846   neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23847   neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23848   neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23849   neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23850   neon_float_type_node = make_node (REAL_TYPE);
23851   TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23852   layout_type (neon_float_type_node);
23853   neon_floatHF_type_node = make_node (REAL_TYPE);
23854   TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23855   layout_type (neon_floatHF_type_node);
23856
23857   /* Define typedefs which exactly correspond to the modes we are basing vector
23858      types on.  If you change these names you'll need to change
23859      the table used by arm_mangle_type too.  */
23860   (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23861                                              "__builtin_neon_qi");
23862   (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23863                                              "__builtin_neon_hi");
23864   (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23865                                              "__builtin_neon_hf");
23866   (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23867                                              "__builtin_neon_si");
23868   (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23869                                              "__builtin_neon_sf");
23870   (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23871                                              "__builtin_neon_di");
23872   (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23873                                              "__builtin_neon_poly8");
23874   (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23875                                              "__builtin_neon_poly16");
23876
23877   intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23878   intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23879   intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23880   intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23881   float_pointer_node = build_pointer_type (neon_float_type_node);
23882
23883   /* Next create constant-qualified versions of the above types.  */
23884   const_intQI_node = build_qualified_type (neon_intQI_type_node,
23885                                            TYPE_QUAL_CONST);
23886   const_intHI_node = build_qualified_type (neon_intHI_type_node,
23887                                            TYPE_QUAL_CONST);
23888   const_intSI_node = build_qualified_type (neon_intSI_type_node,
23889                                            TYPE_QUAL_CONST);
23890   const_intDI_node = build_qualified_type (neon_intDI_type_node,
23891                                            TYPE_QUAL_CONST);
23892   const_float_node = build_qualified_type (neon_float_type_node,
23893                                            TYPE_QUAL_CONST);
23894
23895   const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23896   const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23897   const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23898   const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23899   const_float_pointer_node = build_pointer_type (const_float_node);
23900
23901   /* Unsigned integer types for various mode sizes.  */
23902   intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23903   intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23904   intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23905   intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23906   neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23907   /* Now create vector types based on our NEON element types.  */
23908   /* 64-bit vectors.  */
23909   V8QI_type_node =
23910     build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23911   V4HI_type_node =
23912     build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23913   V4UHI_type_node =
23914     build_vector_type_for_mode (intUHI_type_node, V4HImode);
23915   V4HF_type_node =
23916     build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23917   V2SI_type_node =
23918     build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23919   V2USI_type_node =
23920     build_vector_type_for_mode (intUSI_type_node, V2SImode);
23921   V2SF_type_node =
23922     build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23923   /* 128-bit vectors.  */
23924   V16QI_type_node =
23925     build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23926   V8HI_type_node =
23927     build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23928   V8UHI_type_node =
23929     build_vector_type_for_mode (intUHI_type_node, V8HImode);
23930   V4SI_type_node =
23931     build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23932   V4USI_type_node =
23933     build_vector_type_for_mode (intUSI_type_node, V4SImode);
23934   V4SF_type_node =
23935     build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23936   V2DI_type_node =
23937     build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23938   V2UDI_type_node =
23939     build_vector_type_for_mode (intUDI_type_node, V2DImode);
23940
23941
23942   (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23943                                              "__builtin_neon_uqi");
23944   (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23945                                              "__builtin_neon_uhi");
23946   (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23947                                              "__builtin_neon_usi");
23948   (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23949                                              "__builtin_neon_udi");
23950   (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23951                                              "__builtin_neon_poly64");
23952   (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23953                                              "__builtin_neon_poly128");
23954
23955   /* Opaque integer types for structures of vectors.  */
23956   intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23957   intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23958   intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23959   intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23960
23961   (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23962                                              "__builtin_neon_ti");
23963   (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23964                                              "__builtin_neon_ei");
23965   (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23966                                              "__builtin_neon_oi");
23967   (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23968                                              "__builtin_neon_ci");
23969   (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23970                                              "__builtin_neon_xi");
23971
23972   if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23973   {
23974
23975     tree V16UQI_type_node =
23976       build_vector_type_for_mode (intUQI_type_node, V16QImode);
23977
23978     tree v16uqi_ftype_v16uqi
23979       = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23980
23981     tree v16uqi_ftype_v16uqi_v16uqi
23982       = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23983                                   V16UQI_type_node, NULL_TREE);
23984
23985     tree v4usi_ftype_v4usi
23986       = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23987
23988     tree v4usi_ftype_v4usi_v4usi
23989       = build_function_type_list (V4USI_type_node, V4USI_type_node,
23990                                   V4USI_type_node, NULL_TREE);
23991
23992     tree v4usi_ftype_v4usi_v4usi_v4usi
23993       = build_function_type_list (V4USI_type_node, V4USI_type_node,
23994                                   V4USI_type_node, V4USI_type_node, NULL_TREE);
23995
23996     tree uti_ftype_udi_udi
23997       = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23998                                   intUDI_type_node, NULL_TREE);
23999
24000     #undef CRYPTO1
24001     #undef CRYPTO2
24002     #undef CRYPTO3
24003     #undef C
24004     #undef N
24005     #undef CF
24006     #undef FT1
24007     #undef FT2
24008     #undef FT3
24009
24010     #define C(U) \
24011       ARM_BUILTIN_CRYPTO_##U
24012     #define N(L) \
24013       "__builtin_arm_crypto_"#L
24014     #define FT1(R, A) \
24015       R##_ftype_##A
24016     #define FT2(R, A1, A2) \
24017       R##_ftype_##A1##_##A2
24018     #define FT3(R, A1, A2, A3) \
24019       R##_ftype_##A1##_##A2##_##A3
24020     #define CRYPTO1(L, U, R, A) \
24021       arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
24022                                                        C (U), BUILT_IN_MD, \
24023                                                        NULL, NULL_TREE);
24024     #define CRYPTO2(L, U, R, A1, A2) \
24025       arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
24026                                                        C (U), BUILT_IN_MD, \
24027                                                        NULL, NULL_TREE);
24028
24029     #define CRYPTO3(L, U, R, A1, A2, A3) \
24030       arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
24031                                                        C (U), BUILT_IN_MD, \
24032                                                        NULL, NULL_TREE);
24033     #include "crypto.def"
24034
24035     #undef CRYPTO1
24036     #undef CRYPTO2
24037     #undef CRYPTO3
24038     #undef C
24039     #undef N
24040     #undef FT1
24041     #undef FT2
24042     #undef FT3
24043   }
24044   dreg_types[0] = V8QI_type_node;
24045   dreg_types[1] = V4HI_type_node;
24046   dreg_types[2] = V2SI_type_node;
24047   dreg_types[3] = V2SF_type_node;
24048   dreg_types[4] = neon_intDI_type_node;
24049
24050   qreg_types[0] = V16QI_type_node;
24051   qreg_types[1] = V8HI_type_node;
24052   qreg_types[2] = V4SI_type_node;
24053   qreg_types[3] = V4SF_type_node;
24054   qreg_types[4] = V2DI_type_node;
24055   qreg_types[5] = neon_intUTI_type_node;
24056
24057   for (i = 0; i < NUM_QREG_TYPES; i++)
24058     {
24059       int j;
24060       for (j = 0; j < NUM_QREG_TYPES; j++)
24061         {
24062           if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
24063             reinterp_ftype_dreg[i][j]
24064               = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
24065
24066           reinterp_ftype_qreg[i][j]
24067             = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
24068         }
24069     }
24070
24071   for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
24072        i < ARRAY_SIZE (neon_builtin_data);
24073        i++, fcode++)
24074     {
24075       neon_builtin_datum *d = &neon_builtin_data[i];
24076
24077       const char* const modenames[] = {
24078         "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
24079         "v16qi", "v8hi", "v4si", "v4sf", "v2di",
24080         "ti", "ei", "oi"
24081       };
24082       char namebuf[60];
24083       tree ftype = NULL;
24084       int is_load = 0, is_store = 0;
24085
24086       gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
24087
24088       d->fcode = fcode;
24089
24090       switch (d->itype)
24091         {
24092         case NEON_LOAD1:
24093         case NEON_LOAD1LANE:
24094         case NEON_LOADSTRUCT:
24095         case NEON_LOADSTRUCTLANE:
24096           is_load = 1;
24097           /* Fall through.  */
24098         case NEON_STORE1:
24099         case NEON_STORE1LANE:
24100         case NEON_STORESTRUCT:
24101         case NEON_STORESTRUCTLANE:
24102           if (!is_load)
24103             is_store = 1;
24104           /* Fall through.  */
24105         case NEON_UNOP:
24106         case NEON_RINT:
24107         case NEON_BINOP:
24108         case NEON_LOGICBINOP:
24109         case NEON_SHIFTINSERT:
24110         case NEON_TERNOP:
24111         case NEON_GETLANE:
24112         case NEON_SETLANE:
24113         case NEON_CREATE:
24114         case NEON_DUP:
24115         case NEON_DUPLANE:
24116         case NEON_SHIFTIMM:
24117         case NEON_SHIFTACC:
24118         case NEON_COMBINE:
24119         case NEON_SPLIT:
24120         case NEON_CONVERT:
24121         case NEON_FIXCONV:
24122         case NEON_LANEMUL:
24123         case NEON_LANEMULL:
24124         case NEON_LANEMULH:
24125         case NEON_LANEMAC:
24126         case NEON_SCALARMUL:
24127         case NEON_SCALARMULL:
24128         case NEON_SCALARMULH:
24129         case NEON_SCALARMAC:
24130         case NEON_SELECT:
24131         case NEON_VTBL:
24132         case NEON_VTBX:
24133           {
24134             int k;
24135             tree return_type = void_type_node, args = void_list_node;
24136
24137             /* Build a function type directly from the insn_data for
24138                this builtin.  The build_function_type() function takes
24139                care of removing duplicates for us.  */
24140             for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
24141               {
24142                 tree eltype;
24143
24144                 if (is_load && k == 1)
24145                   {
24146                     /* Neon load patterns always have the memory
24147                        operand in the operand 1 position.  */
24148                     gcc_assert (insn_data[d->code].operand[k].predicate
24149                                 == neon_struct_operand);
24150
24151                     switch (d->mode)
24152                       {
24153                       case T_V8QI:
24154                       case T_V16QI:
24155                         eltype = const_intQI_pointer_node;
24156                         break;
24157
24158                       case T_V4HI:
24159                       case T_V8HI:
24160                         eltype = const_intHI_pointer_node;
24161                         break;
24162
24163                       case T_V2SI:
24164                       case T_V4SI:
24165                         eltype = const_intSI_pointer_node;
24166                         break;
24167
24168                       case T_V2SF:
24169                       case T_V4SF:
24170                         eltype = const_float_pointer_node;
24171                         break;
24172
24173                       case T_DI:
24174                       case T_V2DI:
24175                         eltype = const_intDI_pointer_node;
24176                         break;
24177
24178                       default: gcc_unreachable ();
24179                       }
24180                   }
24181                 else if (is_store && k == 0)
24182                   {
24183                     /* Similarly, Neon store patterns use operand 0 as
24184                        the memory location to store to.  */
24185                     gcc_assert (insn_data[d->code].operand[k].predicate
24186                                 == neon_struct_operand);
24187
24188                     switch (d->mode)
24189                       {
24190                       case T_V8QI:
24191                       case T_V16QI:
24192                         eltype = intQI_pointer_node;
24193                         break;
24194
24195                       case T_V4HI:
24196                       case T_V8HI:
24197                         eltype = intHI_pointer_node;
24198                         break;
24199
24200                       case T_V2SI:
24201                       case T_V4SI:
24202                         eltype = intSI_pointer_node;
24203                         break;
24204
24205                       case T_V2SF:
24206                       case T_V4SF:
24207                         eltype = float_pointer_node;
24208                         break;
24209
24210                       case T_DI:
24211                       case T_V2DI:
24212                         eltype = intDI_pointer_node;
24213                         break;
24214
24215                       default: gcc_unreachable ();
24216                       }
24217                   }
24218                 else
24219                   {
24220                     switch (insn_data[d->code].operand[k].mode)
24221                       {
24222                       case VOIDmode: eltype = void_type_node; break;
24223                         /* Scalars.  */
24224                       case QImode: eltype = neon_intQI_type_node; break;
24225                       case HImode: eltype = neon_intHI_type_node; break;
24226                       case SImode: eltype = neon_intSI_type_node; break;
24227                       case SFmode: eltype = neon_float_type_node; break;
24228                       case DImode: eltype = neon_intDI_type_node; break;
24229                       case TImode: eltype = intTI_type_node; break;
24230                       case EImode: eltype = intEI_type_node; break;
24231                       case OImode: eltype = intOI_type_node; break;
24232                       case CImode: eltype = intCI_type_node; break;
24233                       case XImode: eltype = intXI_type_node; break;
24234                         /* 64-bit vectors.  */
24235                       case V8QImode: eltype = V8QI_type_node; break;
24236                       case V4HImode: eltype = V4HI_type_node; break;
24237                       case V2SImode: eltype = V2SI_type_node; break;
24238                       case V2SFmode: eltype = V2SF_type_node; break;
24239                         /* 128-bit vectors.  */
24240                       case V16QImode: eltype = V16QI_type_node; break;
24241                       case V8HImode: eltype = V8HI_type_node; break;
24242                       case V4SImode: eltype = V4SI_type_node; break;
24243                       case V4SFmode: eltype = V4SF_type_node; break;
24244                       case V2DImode: eltype = V2DI_type_node; break;
24245                       default: gcc_unreachable ();
24246                       }
24247                   }
24248
24249                 if (k == 0 && !is_store)
24250                   return_type = eltype;
24251                 else
24252                   args = tree_cons (NULL_TREE, eltype, args);
24253               }
24254
24255             ftype = build_function_type (return_type, args);
24256           }
24257           break;
24258
24259         case NEON_REINTERP:
24260           {
24261             /* We iterate over NUM_DREG_TYPES doubleword types,
24262                then NUM_QREG_TYPES quadword  types.
24263                V4HF is not a type used in reinterpret, so we translate
24264                d->mode to the correct index in reinterp_ftype_dreg.  */
24265             bool qreg_p
24266               = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24267             int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24268                       % NUM_QREG_TYPES;
24269             switch (insn_data[d->code].operand[0].mode)
24270               {
24271               case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24272               case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24273               case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24274               case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24275               case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24276               case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24277               case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24278               case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24279               case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24280               case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24281               case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24282               default: gcc_unreachable ();
24283               }
24284           }
24285           break;
24286         case NEON_FLOAT_WIDEN:
24287           {
24288             tree eltype = NULL_TREE;
24289             tree return_type = NULL_TREE;
24290
24291             switch (insn_data[d->code].operand[1].mode)
24292             {
24293               case V4HFmode:
24294                 eltype = V4HF_type_node;
24295                 return_type = V4SF_type_node;
24296                 break;
24297               default: gcc_unreachable ();
24298             }
24299             ftype = build_function_type_list (return_type, eltype, NULL);
24300             break;
24301           }
24302         case NEON_FLOAT_NARROW:
24303           {
24304             tree eltype = NULL_TREE;
24305             tree return_type = NULL_TREE;
24306
24307             switch (insn_data[d->code].operand[1].mode)
24308             {
24309               case V4SFmode:
24310                 eltype = V4SF_type_node;
24311                 return_type = V4HF_type_node;
24312                 break;
24313               default: gcc_unreachable ();
24314             }
24315             ftype = build_function_type_list (return_type, eltype, NULL);
24316             break;
24317           }
24318         case NEON_BSWAP:
24319         {
24320             tree eltype = NULL_TREE;
24321             switch (insn_data[d->code].operand[1].mode)
24322             {
24323               case V4HImode:
24324                 eltype = V4UHI_type_node;
24325                 break;
24326               case V8HImode:
24327                 eltype = V8UHI_type_node;
24328                 break;
24329               case V2SImode:
24330                 eltype = V2USI_type_node;
24331                 break;
24332               case V4SImode:
24333                 eltype = V4USI_type_node;
24334                 break;
24335               case V2DImode:
24336                 eltype = V2UDI_type_node;
24337                 break;
24338               default: gcc_unreachable ();
24339             }
24340             ftype = build_function_type_list (eltype, eltype, NULL);
24341             break;
24342         }
24343         case NEON_COPYSIGNF:
24344           {
24345             tree eltype = NULL_TREE;
24346             switch (insn_data[d->code].operand[1].mode)
24347               {
24348               case V2SFmode:
24349                 eltype = V2SF_type_node;
24350                 break;
24351               case V4SFmode:
24352                 eltype = V4SF_type_node;
24353                 break;
24354               default: gcc_unreachable ();
24355               }
24356             ftype = build_function_type_list (eltype, eltype, NULL);
24357             break;
24358           }
24359         default:
24360           gcc_unreachable ();
24361         }
24362
24363       gcc_assert (ftype != NULL);
24364
24365       sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24366
24367       decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24368                                    NULL_TREE);
24369       arm_builtin_decls[fcode] = decl;
24370     }
24371 }
24372
24373 #undef NUM_DREG_TYPES
24374 #undef NUM_QREG_TYPES
24375
24376 #define def_mbuiltin(MASK, NAME, TYPE, CODE)                            \
24377   do                                                                    \
24378     {                                                                   \
24379       if ((MASK) & insn_flags)                                          \
24380         {                                                               \
24381           tree bdecl;                                                   \
24382           bdecl = add_builtin_function ((NAME), (TYPE), (CODE),         \
24383                                         BUILT_IN_MD, NULL, NULL_TREE);  \
24384           arm_builtin_decls[CODE] = bdecl;                              \
24385         }                                                               \
24386     }                                                                   \
24387   while (0)
24388
24389 struct builtin_description
24390 {
24391   const unsigned int       mask;
24392   const enum insn_code     icode;
24393   const char * const       name;
24394   const enum arm_builtins  code;
24395   const enum rtx_code      comparison;
24396   const unsigned int       flag;
24397 };
24398
24399 static const struct builtin_description bdesc_2arg[] =
24400 {
24401 #define IWMMXT_BUILTIN(code, string, builtin) \
24402   { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24403     ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24404
24405 #define IWMMXT2_BUILTIN(code, string, builtin) \
24406   { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24407     ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24408
24409   IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24410   IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24411   IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24412   IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24413   IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24414   IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24415   IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24416   IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24417   IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24418   IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24419   IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24420   IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24421   IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24422   IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24423   IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24424   IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24425   IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24426   IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24427   IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24428   IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24429   IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24430   IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24431   IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24432   IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24433   IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24434   IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24435   IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24436   IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24437   IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24438   IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24439   IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24440   IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24441   IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24442   IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24443   IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24444   IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24445   IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24446   IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24447   IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24448   IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24449   IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24450   IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24451   IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24452   IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24453   IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24454   IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24455   IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24456   IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24457   IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24458   IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24459   IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24460   IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24461   IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24462   IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24463   IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24464   IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24465   IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24466   IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24467   IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24468   IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24469   IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24470   IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24471   IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24472   IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24473   IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24474   IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24475   IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24476   IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24477   IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24478   IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24479   IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24480   IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24481   IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24482   IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24483   IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24484   IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24485   IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24486   IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24487
24488 #define IWMMXT_BUILTIN2(code, builtin) \
24489   { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24490
24491 #define IWMMXT2_BUILTIN2(code, builtin) \
24492   { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24493
24494   IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24495   IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24496   IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24497   IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24498   IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24499   IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24500   IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24501   IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24502   IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24503   IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24504
24505
24506 #define FP_BUILTIN(L, U) \
24507   {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24508    UNKNOWN, 0},
24509
24510   FP_BUILTIN (get_fpscr, GET_FPSCR)
24511   FP_BUILTIN (set_fpscr, SET_FPSCR)
24512 #undef FP_BUILTIN
24513
24514 #define CRC32_BUILTIN(L, U) \
24515   {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24516    UNKNOWN, 0},
24517    CRC32_BUILTIN (crc32b, CRC32B)
24518    CRC32_BUILTIN (crc32h, CRC32H)
24519    CRC32_BUILTIN (crc32w, CRC32W)
24520    CRC32_BUILTIN (crc32cb, CRC32CB)
24521    CRC32_BUILTIN (crc32ch, CRC32CH)
24522    CRC32_BUILTIN (crc32cw, CRC32CW)
24523 #undef CRC32_BUILTIN
24524
24525
24526 #define CRYPTO_BUILTIN(L, U) \
24527   {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24528    UNKNOWN, 0},
24529 #undef CRYPTO1
24530 #undef CRYPTO2
24531 #undef CRYPTO3
24532 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24533 #define CRYPTO1(L, U, R, A)
24534 #define CRYPTO3(L, U, R, A1, A2, A3)
24535 #include "crypto.def"
24536 #undef CRYPTO1
24537 #undef CRYPTO2
24538 #undef CRYPTO3
24539
24540 };
24541
24542 static const struct builtin_description bdesc_1arg[] =
24543 {
24544   IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24545   IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24546   IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24547   IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24548   IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24549   IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24550   IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24551   IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24552   IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24553   IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24554   IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24555   IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24556   IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24557   IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24558   IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24559   IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24560   IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24561   IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24562   IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24563   IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24564   IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24565   IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24566   IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24567   IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24568
24569 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24570 #define CRYPTO2(L, U, R, A1, A2)
24571 #define CRYPTO3(L, U, R, A1, A2, A3)
24572 #include "crypto.def"
24573 #undef CRYPTO1
24574 #undef CRYPTO2
24575 #undef CRYPTO3
24576 };
24577
24578 static const struct builtin_description bdesc_3arg[] =
24579 {
24580 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24581 #define CRYPTO1(L, U, R, A)
24582 #define CRYPTO2(L, U, R, A1, A2)
24583 #include "crypto.def"
24584 #undef CRYPTO1
24585 #undef CRYPTO2
24586 #undef CRYPTO3
24587  };
24588 #undef CRYPTO_BUILTIN
24589
24590 /* Set up all the iWMMXt builtins.  This is not called if
24591    TARGET_IWMMXT is zero.  */
24592
24593 static void
24594 arm_init_iwmmxt_builtins (void)
24595 {
24596   const struct builtin_description * d;
24597   size_t i;
24598
24599   tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24600   tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24601   tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24602
24603   tree v8qi_ftype_v8qi_v8qi_int
24604     = build_function_type_list (V8QI_type_node,
24605                                 V8QI_type_node, V8QI_type_node,
24606                                 integer_type_node, NULL_TREE);
24607   tree v4hi_ftype_v4hi_int
24608     = build_function_type_list (V4HI_type_node,
24609                                 V4HI_type_node, integer_type_node, NULL_TREE);
24610   tree v2si_ftype_v2si_int
24611     = build_function_type_list (V2SI_type_node,
24612                                 V2SI_type_node, integer_type_node, NULL_TREE);
24613   tree v2si_ftype_di_di
24614     = build_function_type_list (V2SI_type_node,
24615                                 long_long_integer_type_node,
24616                                 long_long_integer_type_node,
24617                                 NULL_TREE);
24618   tree di_ftype_di_int
24619     = build_function_type_list (long_long_integer_type_node,
24620                                 long_long_integer_type_node,
24621                                 integer_type_node, NULL_TREE);
24622   tree di_ftype_di_int_int
24623     = build_function_type_list (long_long_integer_type_node,
24624                                 long_long_integer_type_node,
24625                                 integer_type_node,
24626                                 integer_type_node, NULL_TREE);
24627   tree int_ftype_v8qi
24628     = build_function_type_list (integer_type_node,
24629                                 V8QI_type_node, NULL_TREE);
24630   tree int_ftype_v4hi
24631     = build_function_type_list (integer_type_node,
24632                                 V4HI_type_node, NULL_TREE);
24633   tree int_ftype_v2si
24634     = build_function_type_list (integer_type_node,
24635                                 V2SI_type_node, NULL_TREE);
24636   tree int_ftype_v8qi_int
24637     = build_function_type_list (integer_type_node,
24638                                 V8QI_type_node, integer_type_node, NULL_TREE);
24639   tree int_ftype_v4hi_int
24640     = build_function_type_list (integer_type_node,
24641                                 V4HI_type_node, integer_type_node, NULL_TREE);
24642   tree int_ftype_v2si_int
24643     = build_function_type_list (integer_type_node,
24644                                 V2SI_type_node, integer_type_node, NULL_TREE);
24645   tree v8qi_ftype_v8qi_int_int
24646     = build_function_type_list (V8QI_type_node,
24647                                 V8QI_type_node, integer_type_node,
24648                                 integer_type_node, NULL_TREE);
24649   tree v4hi_ftype_v4hi_int_int
24650     = build_function_type_list (V4HI_type_node,
24651                                 V4HI_type_node, integer_type_node,
24652                                 integer_type_node, NULL_TREE);
24653   tree v2si_ftype_v2si_int_int
24654     = build_function_type_list (V2SI_type_node,
24655                                 V2SI_type_node, integer_type_node,
24656                                 integer_type_node, NULL_TREE);
24657   /* Miscellaneous.  */
24658   tree v8qi_ftype_v4hi_v4hi
24659     = build_function_type_list (V8QI_type_node,
24660                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
24661   tree v4hi_ftype_v2si_v2si
24662     = build_function_type_list (V4HI_type_node,
24663                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
24664   tree v8qi_ftype_v4hi_v8qi
24665     = build_function_type_list (V8QI_type_node,
24666                                 V4HI_type_node, V8QI_type_node, NULL_TREE);
24667   tree v2si_ftype_v4hi_v4hi
24668     = build_function_type_list (V2SI_type_node,
24669                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
24670   tree v2si_ftype_v8qi_v8qi
24671     = build_function_type_list (V2SI_type_node,
24672                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
24673   tree v4hi_ftype_v4hi_di
24674     = build_function_type_list (V4HI_type_node,
24675                                 V4HI_type_node, long_long_integer_type_node,
24676                                 NULL_TREE);
24677   tree v2si_ftype_v2si_di
24678     = build_function_type_list (V2SI_type_node,
24679                                 V2SI_type_node, long_long_integer_type_node,
24680                                 NULL_TREE);
24681   tree di_ftype_void
24682     = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24683   tree int_ftype_void
24684     = build_function_type_list (integer_type_node, NULL_TREE);
24685   tree di_ftype_v8qi
24686     = build_function_type_list (long_long_integer_type_node,
24687                                 V8QI_type_node, NULL_TREE);
24688   tree di_ftype_v4hi
24689     = build_function_type_list (long_long_integer_type_node,
24690                                 V4HI_type_node, NULL_TREE);
24691   tree di_ftype_v2si
24692     = build_function_type_list (long_long_integer_type_node,
24693                                 V2SI_type_node, NULL_TREE);
24694   tree v2si_ftype_v4hi
24695     = build_function_type_list (V2SI_type_node,
24696                                 V4HI_type_node, NULL_TREE);
24697   tree v4hi_ftype_v8qi
24698     = build_function_type_list (V4HI_type_node,
24699                                 V8QI_type_node, NULL_TREE);
24700   tree v8qi_ftype_v8qi
24701     = build_function_type_list (V8QI_type_node,
24702                                 V8QI_type_node, NULL_TREE);
24703   tree v4hi_ftype_v4hi
24704     = build_function_type_list (V4HI_type_node,
24705                                 V4HI_type_node, NULL_TREE);
24706   tree v2si_ftype_v2si
24707     = build_function_type_list (V2SI_type_node,
24708                                 V2SI_type_node, NULL_TREE);
24709
24710   tree di_ftype_di_v4hi_v4hi
24711     = build_function_type_list (long_long_unsigned_type_node,
24712                                 long_long_unsigned_type_node,
24713                                 V4HI_type_node, V4HI_type_node,
24714                                 NULL_TREE);
24715
24716   tree di_ftype_v4hi_v4hi
24717     = build_function_type_list (long_long_unsigned_type_node,
24718                                 V4HI_type_node,V4HI_type_node,
24719                                 NULL_TREE);
24720
24721   tree v2si_ftype_v2si_v4hi_v4hi
24722     = build_function_type_list (V2SI_type_node,
24723                                 V2SI_type_node, V4HI_type_node,
24724                                 V4HI_type_node, NULL_TREE);
24725
24726   tree v2si_ftype_v2si_v8qi_v8qi
24727     = build_function_type_list (V2SI_type_node,
24728                                 V2SI_type_node, V8QI_type_node,
24729                                 V8QI_type_node, NULL_TREE);
24730
24731   tree di_ftype_di_v2si_v2si
24732      = build_function_type_list (long_long_unsigned_type_node,
24733                                  long_long_unsigned_type_node,
24734                                  V2SI_type_node, V2SI_type_node,
24735                                  NULL_TREE);
24736
24737    tree di_ftype_di_di_int
24738      = build_function_type_list (long_long_unsigned_type_node,
24739                                  long_long_unsigned_type_node,
24740                                  long_long_unsigned_type_node,
24741                                  integer_type_node, NULL_TREE);
24742
24743    tree void_ftype_int
24744      = build_function_type_list (void_type_node,
24745                                  integer_type_node, NULL_TREE);
24746
24747    tree v8qi_ftype_char
24748      = build_function_type_list (V8QI_type_node,
24749                                  signed_char_type_node, NULL_TREE);
24750
24751    tree v4hi_ftype_short
24752      = build_function_type_list (V4HI_type_node,
24753                                  short_integer_type_node, NULL_TREE);
24754
24755    tree v2si_ftype_int
24756      = build_function_type_list (V2SI_type_node,
24757                                  integer_type_node, NULL_TREE);
24758
24759   /* Normal vector binops.  */
24760   tree v8qi_ftype_v8qi_v8qi
24761     = build_function_type_list (V8QI_type_node,
24762                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
24763   tree v4hi_ftype_v4hi_v4hi
24764     = build_function_type_list (V4HI_type_node,
24765                                 V4HI_type_node,V4HI_type_node, NULL_TREE);
24766   tree v2si_ftype_v2si_v2si
24767     = build_function_type_list (V2SI_type_node,
24768                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
24769   tree di_ftype_di_di
24770     = build_function_type_list (long_long_unsigned_type_node,
24771                                 long_long_unsigned_type_node,
24772                                 long_long_unsigned_type_node,
24773                                 NULL_TREE);
24774
24775   /* Add all builtins that are more or less simple operations on two
24776      operands.  */
24777   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24778     {
24779       /* Use one of the operands; the target can have a different mode for
24780          mask-generating compares.  */
24781       machine_mode mode;
24782       tree type;
24783
24784       if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24785         continue;
24786
24787       mode = insn_data[d->icode].operand[1].mode;
24788
24789       switch (mode)
24790         {
24791         case V8QImode:
24792           type = v8qi_ftype_v8qi_v8qi;
24793           break;
24794         case V4HImode:
24795           type = v4hi_ftype_v4hi_v4hi;
24796           break;
24797         case V2SImode:
24798           type = v2si_ftype_v2si_v2si;
24799           break;
24800         case DImode:
24801           type = di_ftype_di_di;
24802           break;
24803
24804         default:
24805           gcc_unreachable ();
24806         }
24807
24808       def_mbuiltin (d->mask, d->name, type, d->code);
24809     }
24810
24811   /* Add the remaining MMX insns with somewhat more complicated types.  */
24812 #define iwmmx_mbuiltin(NAME, TYPE, CODE)                        \
24813   def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE),       \
24814                 ARM_BUILTIN_ ## CODE)
24815
24816 #define iwmmx2_mbuiltin(NAME, TYPE, CODE)                      \
24817   def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE),     \
24818                ARM_BUILTIN_ ## CODE)
24819
24820   iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24821   iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24822   iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24823   iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24824   iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24825   iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24826   iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24827   iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24828   iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24829
24830   iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24831   iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24832   iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24833   iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24834   iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24835   iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24836
24837   iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24838   iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24839   iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24840   iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24841   iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24842   iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24843
24844   iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24845   iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24846   iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24847   iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24848   iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24849   iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24850
24851   iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24852   iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24853   iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24854   iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24855   iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24856   iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24857
24858   iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24859
24860   iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24861   iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24862   iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24863   iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24864   iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24865   iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24866   iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24867   iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24868   iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24869   iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24870
24871   iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24872   iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24873   iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24874   iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24875   iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24876   iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24877   iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24878   iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24879   iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24880
24881   iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24882   iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24883   iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24884
24885   iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24886   iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24887   iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24888
24889   iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24890   iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24891
24892   iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24893   iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24894   iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24895   iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24896   iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24897   iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24898
24899   iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24900   iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24901   iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24902   iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24903   iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24904   iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24905   iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24906   iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24907   iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24908   iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24909   iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24910   iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24911
24912   iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24913   iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24914   iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24915   iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24916
24917   iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24918   iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24919   iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24920   iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24921   iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24922   iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24923   iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24924
24925   iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24926   iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24927   iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24928
24929   iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24930   iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24931   iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24932   iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24933
24934   iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24935   iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24936   iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24937   iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24938
24939   iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24940   iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24941   iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24942   iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24943
24944   iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24945   iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24946   iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24947   iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24948
24949   iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24950   iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24951   iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24952   iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24953
24954   iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24955   iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24956   iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24957   iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24958
24959   iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24960
24961   iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24962   iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24963   iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24964
24965 #undef iwmmx_mbuiltin
24966 #undef iwmmx2_mbuiltin
24967 }
24968
24969 static void
24970 arm_init_fp16_builtins (void)
24971 {
24972   tree fp16_type = make_node (REAL_TYPE);
24973   TYPE_PRECISION (fp16_type) = 16;
24974   layout_type (fp16_type);
24975   (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24976 }
24977
24978 static void
24979 arm_init_crc32_builtins ()
24980 {
24981   tree si_ftype_si_qi
24982     = build_function_type_list (unsigned_intSI_type_node,
24983                                 unsigned_intSI_type_node,
24984                                 unsigned_intQI_type_node, NULL_TREE);
24985   tree si_ftype_si_hi
24986     = build_function_type_list (unsigned_intSI_type_node,
24987                                 unsigned_intSI_type_node,
24988                                 unsigned_intHI_type_node, NULL_TREE);
24989   tree si_ftype_si_si
24990     = build_function_type_list (unsigned_intSI_type_node,
24991                                 unsigned_intSI_type_node,
24992                                 unsigned_intSI_type_node, NULL_TREE);
24993
24994   arm_builtin_decls[ARM_BUILTIN_CRC32B]
24995     = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24996                             ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24997   arm_builtin_decls[ARM_BUILTIN_CRC32H]
24998     = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24999                             ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
25000   arm_builtin_decls[ARM_BUILTIN_CRC32W]
25001     = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
25002                             ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
25003   arm_builtin_decls[ARM_BUILTIN_CRC32CB]
25004     = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
25005                             ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
25006   arm_builtin_decls[ARM_BUILTIN_CRC32CH]
25007     = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
25008                             ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
25009   arm_builtin_decls[ARM_BUILTIN_CRC32CW]
25010     = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
25011                             ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
25012 }
25013
25014 static void
25015 arm_init_builtins (void)
25016 {
25017   if (TARGET_REALLY_IWMMXT)
25018     arm_init_iwmmxt_builtins ();
25019
25020   if (TARGET_NEON)
25021     arm_init_neon_builtins ();
25022
25023   if (arm_fp16_format)
25024     arm_init_fp16_builtins ();
25025
25026   if (TARGET_CRC32)
25027     arm_init_crc32_builtins ();
25028
25029   if (TARGET_VFP && TARGET_HARD_FLOAT)
25030     {
25031       tree ftype_set_fpscr
25032         = build_function_type_list (void_type_node, unsigned_type_node, NULL);
25033       tree ftype_get_fpscr
25034         = build_function_type_list (unsigned_type_node, NULL);
25035
25036       arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
25037         = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
25038                                 ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
25039       arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
25040         = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
25041                                 ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
25042     }
25043 }
25044
25045 /* Return the ARM builtin for CODE.  */
25046
25047 static tree
25048 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25049 {
25050   if (code >= ARM_BUILTIN_MAX)
25051     return error_mark_node;
25052
25053   return arm_builtin_decls[code];
25054 }
25055
25056 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
25057
25058 static const char *
25059 arm_invalid_parameter_type (const_tree t)
25060 {
25061   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25062     return N_("function parameters cannot have __fp16 type");
25063   return NULL;
25064 }
25065
25066 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
25067
25068 static const char *
25069 arm_invalid_return_type (const_tree t)
25070 {
25071   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25072     return N_("functions cannot return __fp16 type");
25073   return NULL;
25074 }
25075
25076 /* Implement TARGET_PROMOTED_TYPE.  */
25077
25078 static tree
25079 arm_promoted_type (const_tree t)
25080 {
25081   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
25082     return float_type_node;
25083   return NULL_TREE;
25084 }
25085
25086 /* Implement TARGET_CONVERT_TO_TYPE.
25087    Specifically, this hook implements the peculiarity of the ARM
25088    half-precision floating-point C semantics that requires conversions between
25089    __fp16 to or from double to do an intermediate conversion to float.  */
25090
25091 static tree
25092 arm_convert_to_type (tree type, tree expr)
25093 {
25094   tree fromtype = TREE_TYPE (expr);
25095   if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
25096     return NULL_TREE;
25097   if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
25098       || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
25099     return convert (type, convert (float_type_node, expr));
25100   return NULL_TREE;
25101 }
25102
25103 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25104    This simply adds HFmode as a supported mode; even though we don't
25105    implement arithmetic on this type directly, it's supported by
25106    optabs conversions, much the way the double-word arithmetic is
25107    special-cased in the default hook.  */
25108
25109 static bool
25110 arm_scalar_mode_supported_p (machine_mode mode)
25111 {
25112   if (mode == HFmode)
25113     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25114   else if (ALL_FIXED_POINT_MODE_P (mode))
25115     return true;
25116   else
25117     return default_scalar_mode_supported_p (mode);
25118 }
25119
25120 /* Errors in the source file can cause expand_expr to return const0_rtx
25121    where we expect a vector.  To avoid crashing, use one of the vector
25122    clear instructions.  */
25123
25124 static rtx
25125 safe_vector_operand (rtx x, machine_mode mode)
25126 {
25127   if (x != const0_rtx)
25128     return x;
25129   x = gen_reg_rtx (mode);
25130
25131   emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
25132                                : gen_rtx_SUBREG (DImode, x, 0)));
25133   return x;
25134 }
25135
25136 /* Function to expand ternary builtins.  */
25137 static rtx
25138 arm_expand_ternop_builtin (enum insn_code icode,
25139                            tree exp, rtx target)
25140 {
25141   rtx pat;
25142   tree arg0 = CALL_EXPR_ARG (exp, 0);
25143   tree arg1 = CALL_EXPR_ARG (exp, 1);
25144   tree arg2 = CALL_EXPR_ARG (exp, 2);
25145
25146   rtx op0 = expand_normal (arg0);
25147   rtx op1 = expand_normal (arg1);
25148   rtx op2 = expand_normal (arg2);
25149   rtx op3 = NULL_RTX;
25150
25151   /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
25152      lane operand depending on endianness.  */
25153   bool builtin_sha1cpm_p = false;
25154
25155   if (insn_data[icode].n_operands == 5)
25156     {
25157       gcc_assert (icode == CODE_FOR_crypto_sha1c
25158                   || icode == CODE_FOR_crypto_sha1p
25159                   || icode == CODE_FOR_crypto_sha1m);
25160       builtin_sha1cpm_p = true;
25161     }
25162   machine_mode tmode = insn_data[icode].operand[0].mode;
25163   machine_mode mode0 = insn_data[icode].operand[1].mode;
25164   machine_mode mode1 = insn_data[icode].operand[2].mode;
25165   machine_mode mode2 = insn_data[icode].operand[3].mode;
25166
25167
25168   if (VECTOR_MODE_P (mode0))
25169     op0 = safe_vector_operand (op0, mode0);
25170   if (VECTOR_MODE_P (mode1))
25171     op1 = safe_vector_operand (op1, mode1);
25172   if (VECTOR_MODE_P (mode2))
25173     op2 = safe_vector_operand (op2, mode2);
25174
25175   if (! target
25176       || GET_MODE (target) != tmode
25177       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25178     target = gen_reg_rtx (tmode);
25179
25180   gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25181               && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
25182               && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
25183
25184   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25185     op0 = copy_to_mode_reg (mode0, op0);
25186   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25187     op1 = copy_to_mode_reg (mode1, op1);
25188   if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25189     op2 = copy_to_mode_reg (mode2, op2);
25190   if (builtin_sha1cpm_p)
25191     op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25192
25193   if (builtin_sha1cpm_p)
25194     pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
25195   else
25196     pat = GEN_FCN (icode) (target, op0, op1, op2);
25197   if (! pat)
25198     return 0;
25199   emit_insn (pat);
25200   return target;
25201 }
25202
25203 /* Subroutine of arm_expand_builtin to take care of binop insns.  */
25204
25205 static rtx
25206 arm_expand_binop_builtin (enum insn_code icode,
25207                           tree exp, rtx target)
25208 {
25209   rtx pat;
25210   tree arg0 = CALL_EXPR_ARG (exp, 0);
25211   tree arg1 = CALL_EXPR_ARG (exp, 1);
25212   rtx op0 = expand_normal (arg0);
25213   rtx op1 = expand_normal (arg1);
25214   machine_mode tmode = insn_data[icode].operand[0].mode;
25215   machine_mode mode0 = insn_data[icode].operand[1].mode;
25216   machine_mode mode1 = insn_data[icode].operand[2].mode;
25217
25218   if (VECTOR_MODE_P (mode0))
25219     op0 = safe_vector_operand (op0, mode0);
25220   if (VECTOR_MODE_P (mode1))
25221     op1 = safe_vector_operand (op1, mode1);
25222
25223   if (! target
25224       || GET_MODE (target) != tmode
25225       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25226     target = gen_reg_rtx (tmode);
25227
25228   gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25229               && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
25230
25231   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25232     op0 = copy_to_mode_reg (mode0, op0);
25233   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25234     op1 = copy_to_mode_reg (mode1, op1);
25235
25236   pat = GEN_FCN (icode) (target, op0, op1);
25237   if (! pat)
25238     return 0;
25239   emit_insn (pat);
25240   return target;
25241 }
25242
25243 /* Subroutine of arm_expand_builtin to take care of unop insns.  */
25244
25245 static rtx
25246 arm_expand_unop_builtin (enum insn_code icode,
25247                          tree exp, rtx target, int do_load)
25248 {
25249   rtx pat;
25250   tree arg0 = CALL_EXPR_ARG (exp, 0);
25251   rtx op0 = expand_normal (arg0);
25252   rtx op1 = NULL_RTX;
25253   machine_mode tmode = insn_data[icode].operand[0].mode;
25254   machine_mode mode0 = insn_data[icode].operand[1].mode;
25255   bool builtin_sha1h_p = false;
25256
25257   if (insn_data[icode].n_operands == 3)
25258     {
25259       gcc_assert (icode == CODE_FOR_crypto_sha1h);
25260       builtin_sha1h_p = true;
25261     }
25262
25263   if (! target
25264       || GET_MODE (target) != tmode
25265       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25266     target = gen_reg_rtx (tmode);
25267   if (do_load)
25268     op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25269   else
25270     {
25271       if (VECTOR_MODE_P (mode0))
25272         op0 = safe_vector_operand (op0, mode0);
25273
25274       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25275         op0 = copy_to_mode_reg (mode0, op0);
25276     }
25277   if (builtin_sha1h_p)
25278     op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25279
25280   if (builtin_sha1h_p)
25281     pat = GEN_FCN (icode) (target, op0, op1);
25282   else
25283     pat = GEN_FCN (icode) (target, op0);
25284   if (! pat)
25285     return 0;
25286   emit_insn (pat);
25287   return target;
25288 }
25289
25290 typedef enum {
25291   NEON_ARG_COPY_TO_REG,
25292   NEON_ARG_CONSTANT,
25293   NEON_ARG_MEMORY,
25294   NEON_ARG_STOP
25295 } builtin_arg;
25296
25297 #define NEON_MAX_BUILTIN_ARGS 5
25298
25299 /* EXP is a pointer argument to a Neon load or store intrinsic.  Derive
25300    and return an expression for the accessed memory.
25301
25302    The intrinsic function operates on a block of registers that has
25303    mode REG_MODE.  This block contains vectors of type TYPE_MODE.  The
25304    function references the memory at EXP of type TYPE and in mode
25305    MEM_MODE; this mode may be BLKmode if no more suitable mode is
25306    available.  */
25307
25308 static tree
25309 neon_dereference_pointer (tree exp, tree type, machine_mode mem_mode,
25310                           machine_mode reg_mode,
25311                           neon_builtin_type_mode type_mode)
25312 {
25313   HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25314   tree elem_type, upper_bound, array_type;
25315
25316   /* Work out the size of the register block in bytes.  */
25317   reg_size = GET_MODE_SIZE (reg_mode);
25318
25319   /* Work out the size of each vector in bytes.  */
25320   gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25321   vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25322
25323   /* Work out how many vectors there are.  */
25324   gcc_assert (reg_size % vector_size == 0);
25325   nvectors = reg_size / vector_size;
25326
25327   /* Work out the type of each element.  */
25328   gcc_assert (POINTER_TYPE_P (type));
25329   elem_type = TREE_TYPE (type);
25330
25331   /* Work out how many elements are being loaded or stored.
25332      MEM_MODE == REG_MODE implies a one-to-one mapping between register
25333      and memory elements; anything else implies a lane load or store.  */
25334   if (mem_mode == reg_mode)
25335     nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25336   else
25337     nelems = nvectors;
25338
25339   /* Create a type that describes the full access.  */
25340   upper_bound = build_int_cst (size_type_node, nelems - 1);
25341   array_type = build_array_type (elem_type, build_index_type (upper_bound));
25342
25343   /* Dereference EXP using that type.  */
25344   return fold_build2 (MEM_REF, array_type, exp,
25345                       build_int_cst (build_pointer_type (array_type), 0));
25346 }
25347
25348 /* Expand a Neon builtin.  */
25349 static rtx
25350 arm_expand_neon_args (rtx target, int icode, int have_retval,
25351                       neon_builtin_type_mode type_mode,
25352                       tree exp, int fcode, ...)
25353 {
25354   va_list ap;
25355   rtx pat;
25356   tree arg[NEON_MAX_BUILTIN_ARGS];
25357   rtx op[NEON_MAX_BUILTIN_ARGS];
25358   tree arg_type;
25359   tree formals;
25360   machine_mode tmode = insn_data[icode].operand[0].mode;
25361   machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25362   machine_mode other_mode;
25363   int argc = 0;
25364   int opno;
25365
25366   if (have_retval
25367       && (!target
25368           || GET_MODE (target) != tmode
25369           || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25370     target = gen_reg_rtx (tmode);
25371
25372   va_start (ap, fcode);
25373
25374   formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25375
25376   for (;;)
25377     {
25378       builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25379
25380       if (thisarg == NEON_ARG_STOP)
25381         break;
25382       else
25383         {
25384           opno = argc + have_retval;
25385           mode[argc] = insn_data[icode].operand[opno].mode;
25386           arg[argc] = CALL_EXPR_ARG (exp, argc);
25387           arg_type = TREE_VALUE (formals);
25388           if (thisarg == NEON_ARG_MEMORY)
25389             {
25390               other_mode = insn_data[icode].operand[1 - opno].mode;
25391               arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25392                                                     mode[argc], other_mode,
25393                                                     type_mode);
25394             }
25395
25396           /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25397              be returned.  */
25398           op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25399                                   (thisarg == NEON_ARG_MEMORY
25400                                    ? EXPAND_MEMORY : EXPAND_NORMAL));
25401
25402           switch (thisarg)
25403             {
25404             case NEON_ARG_COPY_TO_REG:
25405               /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25406               if (!(*insn_data[icode].operand[opno].predicate)
25407                      (op[argc], mode[argc]))
25408                 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25409               break;
25410
25411             case NEON_ARG_CONSTANT:
25412               /* FIXME: This error message is somewhat unhelpful.  */
25413               if (!(*insn_data[icode].operand[opno].predicate)
25414                     (op[argc], mode[argc]))
25415                 error ("argument must be a constant");
25416               break;
25417
25418             case NEON_ARG_MEMORY:
25419               /* Check if expand failed.  */
25420               if (op[argc] == const0_rtx)
25421                 return 0;
25422               gcc_assert (MEM_P (op[argc]));
25423               PUT_MODE (op[argc], mode[argc]);
25424               /* ??? arm_neon.h uses the same built-in functions for signed
25425                  and unsigned accesses, casting where necessary.  This isn't
25426                  alias safe.  */
25427               set_mem_alias_set (op[argc], 0);
25428               if (!(*insn_data[icode].operand[opno].predicate)
25429                     (op[argc], mode[argc]))
25430                 op[argc] = (replace_equiv_address
25431                             (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25432               break;
25433
25434             case NEON_ARG_STOP:
25435               gcc_unreachable ();
25436             }
25437
25438           argc++;
25439           formals = TREE_CHAIN (formals);
25440         }
25441     }
25442
25443   va_end (ap);
25444
25445   if (have_retval)
25446     switch (argc)
25447       {
25448       case 1:
25449         pat = GEN_FCN (icode) (target, op[0]);
25450         break;
25451
25452       case 2:
25453         pat = GEN_FCN (icode) (target, op[0], op[1]);
25454         break;
25455
25456       case 3:
25457         pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25458         break;
25459
25460       case 4:
25461         pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25462         break;
25463
25464       case 5:
25465         pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25466         break;
25467
25468       default:
25469         gcc_unreachable ();
25470       }
25471   else
25472     switch (argc)
25473       {
25474       case 1:
25475         pat = GEN_FCN (icode) (op[0]);
25476         break;
25477
25478       case 2:
25479         pat = GEN_FCN (icode) (op[0], op[1]);
25480         break;
25481
25482       case 3:
25483         pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25484         break;
25485
25486       case 4:
25487         pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25488         break;
25489
25490       case 5:
25491         pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25492         break;
25493
25494       default:
25495         gcc_unreachable ();
25496       }
25497
25498   if (!pat)
25499     return 0;
25500
25501   emit_insn (pat);
25502
25503   return target;
25504 }
25505
25506 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25507    constants defined per-instruction or per instruction-variant. Instead, the
25508    required info is looked up in the table neon_builtin_data.  */
25509 static rtx
25510 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25511 {
25512   neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25513   neon_itype itype = d->itype;
25514   enum insn_code icode = d->code;
25515   neon_builtin_type_mode type_mode = d->mode;
25516
25517   switch (itype)
25518     {
25519     case NEON_UNOP:
25520     case NEON_CONVERT:
25521     case NEON_DUPLANE:
25522       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25523         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25524
25525     case NEON_BINOP:
25526     case NEON_SETLANE:
25527     case NEON_SCALARMUL:
25528     case NEON_SCALARMULL:
25529     case NEON_SCALARMULH:
25530     case NEON_SHIFTINSERT:
25531     case NEON_LOGICBINOP:
25532       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25533         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25534         NEON_ARG_STOP);
25535
25536     case NEON_TERNOP:
25537       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25538         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25539         NEON_ARG_CONSTANT, NEON_ARG_STOP);
25540
25541     case NEON_GETLANE:
25542     case NEON_FIXCONV:
25543     case NEON_SHIFTIMM:
25544       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25545         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25546         NEON_ARG_STOP);
25547
25548     case NEON_CREATE:
25549       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25550         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25551
25552     case NEON_DUP:
25553     case NEON_RINT:
25554     case NEON_SPLIT:
25555     case NEON_FLOAT_WIDEN:
25556     case NEON_FLOAT_NARROW:
25557     case NEON_BSWAP:
25558     case NEON_REINTERP:
25559       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25560         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25561
25562     case NEON_COPYSIGNF:
25563     case NEON_COMBINE:
25564     case NEON_VTBL:
25565       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25566         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25567
25568     case NEON_LANEMUL:
25569     case NEON_LANEMULL:
25570     case NEON_LANEMULH:
25571       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25572         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25573         NEON_ARG_CONSTANT, NEON_ARG_STOP);
25574
25575     case NEON_LANEMAC:
25576       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25577         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25578         NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25579
25580     case NEON_SHIFTACC:
25581       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25582         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25583         NEON_ARG_CONSTANT, NEON_ARG_STOP);
25584
25585     case NEON_SCALARMAC:
25586       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25587         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25588         NEON_ARG_CONSTANT, NEON_ARG_STOP);
25589
25590     case NEON_SELECT:
25591     case NEON_VTBX:
25592       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25593         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25594         NEON_ARG_STOP);
25595
25596     case NEON_LOAD1:
25597     case NEON_LOADSTRUCT:
25598       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25599         NEON_ARG_MEMORY, NEON_ARG_STOP);
25600
25601     case NEON_LOAD1LANE:
25602     case NEON_LOADSTRUCTLANE:
25603       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25604         NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25605         NEON_ARG_STOP);
25606
25607     case NEON_STORE1:
25608     case NEON_STORESTRUCT:
25609       return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25610         NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25611
25612     case NEON_STORE1LANE:
25613     case NEON_STORESTRUCTLANE:
25614       return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25615         NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25616         NEON_ARG_STOP);
25617     }
25618
25619   gcc_unreachable ();
25620 }
25621
25622 /* Emit code to reinterpret one Neon type as another, without altering bits.  */
25623 void
25624 neon_reinterpret (rtx dest, rtx src)
25625 {
25626   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25627 }
25628
25629 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25630    not to early-clobber SRC registers in the process.
25631
25632    We assume that the operands described by SRC and DEST represent a
25633    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
25634    number of components into which the copy has been decomposed.  */
25635 void
25636 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25637 {
25638   unsigned int i;
25639
25640   if (!reg_overlap_mentioned_p (operands[0], operands[1])
25641       || REGNO (operands[0]) < REGNO (operands[1]))
25642     {
25643       for (i = 0; i < count; i++)
25644         {
25645           operands[2 * i] = dest[i];
25646           operands[2 * i + 1] = src[i];
25647         }
25648     }
25649   else
25650     {
25651       for (i = 0; i < count; i++)
25652         {
25653           operands[2 * i] = dest[count - i - 1];
25654           operands[2 * i + 1] = src[count - i - 1];
25655         }
25656     }
25657 }
25658
25659 /* Split operands into moves from op[1] + op[2] into op[0].  */
25660
25661 void
25662 neon_split_vcombine (rtx operands[3])
25663 {
25664   unsigned int dest = REGNO (operands[0]);
25665   unsigned int src1 = REGNO (operands[1]);
25666   unsigned int src2 = REGNO (operands[2]);
25667   machine_mode halfmode = GET_MODE (operands[1]);
25668   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25669   rtx destlo, desthi;
25670
25671   if (src1 == dest && src2 == dest + halfregs)
25672     {
25673       /* No-op move.  Can't split to nothing; emit something.  */
25674       emit_note (NOTE_INSN_DELETED);
25675       return;
25676     }
25677
25678   /* Preserve register attributes for variable tracking.  */
25679   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25680   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25681                                GET_MODE_SIZE (halfmode));
25682
25683   /* Special case of reversed high/low parts.  Use VSWP.  */
25684   if (src2 == dest && src1 == dest + halfregs)
25685     {
25686       rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25687       rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25688       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25689       return;
25690     }
25691
25692   if (!reg_overlap_mentioned_p (operands[2], destlo))
25693     {
25694       /* Try to avoid unnecessary moves if part of the result
25695          is in the right place already.  */
25696       if (src1 != dest)
25697         emit_move_insn (destlo, operands[1]);
25698       if (src2 != dest + halfregs)
25699         emit_move_insn (desthi, operands[2]);
25700     }
25701   else
25702     {
25703       if (src2 != dest + halfregs)
25704         emit_move_insn (desthi, operands[2]);
25705       if (src1 != dest)
25706         emit_move_insn (destlo, operands[1]);
25707     }
25708 }
25709
25710 /* Expand an expression EXP that calls a built-in function,
25711    with result going to TARGET if that's convenient
25712    (and in mode MODE if that's convenient).
25713    SUBTARGET may be used as the target for computing one of EXP's operands.
25714    IGNORE is nonzero if the value is to be ignored.  */
25715
25716 static rtx
25717 arm_expand_builtin (tree exp,
25718                     rtx target,
25719                     rtx subtarget ATTRIBUTE_UNUSED,
25720                     machine_mode mode ATTRIBUTE_UNUSED,
25721                     int ignore ATTRIBUTE_UNUSED)
25722 {
25723   const struct builtin_description * d;
25724   enum insn_code    icode;
25725   tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25726   tree              arg0;
25727   tree              arg1;
25728   tree              arg2;
25729   rtx               op0;
25730   rtx               op1;
25731   rtx               op2;
25732   rtx               pat;
25733   unsigned int      fcode = DECL_FUNCTION_CODE (fndecl);
25734   size_t            i;
25735   machine_mode tmode;
25736   machine_mode mode0;
25737   machine_mode mode1;
25738   machine_mode mode2;
25739   int opint;
25740   int selector;
25741   int mask;
25742   int imm;
25743
25744   if (fcode >= ARM_BUILTIN_NEON_BASE)
25745     return arm_expand_neon_builtin (fcode, exp, target);
25746
25747   switch (fcode)
25748     {
25749     case ARM_BUILTIN_GET_FPSCR:
25750     case ARM_BUILTIN_SET_FPSCR:
25751       if (fcode == ARM_BUILTIN_GET_FPSCR)
25752         {
25753           icode = CODE_FOR_get_fpscr;
25754           target = gen_reg_rtx (SImode);
25755           pat = GEN_FCN (icode) (target);
25756         }
25757       else
25758         {
25759           target = NULL_RTX;
25760           icode = CODE_FOR_set_fpscr;
25761           arg0 = CALL_EXPR_ARG (exp, 0);
25762           op0 = expand_normal (arg0);
25763           pat = GEN_FCN (icode) (op0);
25764         }
25765       emit_insn (pat);
25766       return target;
25767
25768     case ARM_BUILTIN_TEXTRMSB:
25769     case ARM_BUILTIN_TEXTRMUB:
25770     case ARM_BUILTIN_TEXTRMSH:
25771     case ARM_BUILTIN_TEXTRMUH:
25772     case ARM_BUILTIN_TEXTRMSW:
25773     case ARM_BUILTIN_TEXTRMUW:
25774       icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25775                : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25776                : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25777                : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25778                : CODE_FOR_iwmmxt_textrmw);
25779
25780       arg0 = CALL_EXPR_ARG (exp, 0);
25781       arg1 = CALL_EXPR_ARG (exp, 1);
25782       op0 = expand_normal (arg0);
25783       op1 = expand_normal (arg1);
25784       tmode = insn_data[icode].operand[0].mode;
25785       mode0 = insn_data[icode].operand[1].mode;
25786       mode1 = insn_data[icode].operand[2].mode;
25787
25788       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25789         op0 = copy_to_mode_reg (mode0, op0);
25790       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25791         {
25792           /* @@@ better error message */
25793           error ("selector must be an immediate");
25794           return gen_reg_rtx (tmode);
25795         }
25796
25797       opint = INTVAL (op1);
25798       if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25799         {
25800           if (opint > 7 || opint < 0)
25801             error ("the range of selector should be in 0 to 7");
25802         }
25803       else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25804         {
25805           if (opint > 3 || opint < 0)
25806             error ("the range of selector should be in 0 to 3");
25807         }
25808       else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW.  */
25809         {
25810           if (opint > 1 || opint < 0)
25811             error ("the range of selector should be in 0 to 1");
25812         }
25813
25814       if (target == 0
25815           || GET_MODE (target) != tmode
25816           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25817         target = gen_reg_rtx (tmode);
25818       pat = GEN_FCN (icode) (target, op0, op1);
25819       if (! pat)
25820         return 0;
25821       emit_insn (pat);
25822       return target;
25823
25824     case ARM_BUILTIN_WALIGNI:
25825       /* If op2 is immediate, call walighi, else call walighr.  */
25826       arg0 = CALL_EXPR_ARG (exp, 0);
25827       arg1 = CALL_EXPR_ARG (exp, 1);
25828       arg2 = CALL_EXPR_ARG (exp, 2);
25829       op0 = expand_normal (arg0);
25830       op1 = expand_normal (arg1);
25831       op2 = expand_normal (arg2);
25832       if (CONST_INT_P (op2))
25833         {
25834           icode = CODE_FOR_iwmmxt_waligni;
25835           tmode = insn_data[icode].operand[0].mode;
25836           mode0 = insn_data[icode].operand[1].mode;
25837           mode1 = insn_data[icode].operand[2].mode;
25838           mode2 = insn_data[icode].operand[3].mode;
25839           if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25840             op0 = copy_to_mode_reg (mode0, op0);
25841           if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25842             op1 = copy_to_mode_reg (mode1, op1);
25843           gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25844           selector = INTVAL (op2);
25845           if (selector > 7 || selector < 0)
25846             error ("the range of selector should be in 0 to 7");
25847         }
25848       else
25849         {
25850           icode = CODE_FOR_iwmmxt_walignr;
25851           tmode = insn_data[icode].operand[0].mode;
25852           mode0 = insn_data[icode].operand[1].mode;
25853           mode1 = insn_data[icode].operand[2].mode;
25854           mode2 = insn_data[icode].operand[3].mode;
25855           if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25856             op0 = copy_to_mode_reg (mode0, op0);
25857           if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25858             op1 = copy_to_mode_reg (mode1, op1);
25859           if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25860             op2 = copy_to_mode_reg (mode2, op2);
25861         }
25862       if (target == 0
25863           || GET_MODE (target) != tmode
25864           || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25865         target = gen_reg_rtx (tmode);
25866       pat = GEN_FCN (icode) (target, op0, op1, op2);
25867       if (!pat)
25868         return 0;
25869       emit_insn (pat);
25870       return target;
25871
25872     case ARM_BUILTIN_TINSRB:
25873     case ARM_BUILTIN_TINSRH:
25874     case ARM_BUILTIN_TINSRW:
25875     case ARM_BUILTIN_WMERGE:
25876       icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25877                : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25878                : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25879                : CODE_FOR_iwmmxt_tinsrw);
25880       arg0 = CALL_EXPR_ARG (exp, 0);
25881       arg1 = CALL_EXPR_ARG (exp, 1);
25882       arg2 = CALL_EXPR_ARG (exp, 2);
25883       op0 = expand_normal (arg0);
25884       op1 = expand_normal (arg1);
25885       op2 = expand_normal (arg2);
25886       tmode = insn_data[icode].operand[0].mode;
25887       mode0 = insn_data[icode].operand[1].mode;
25888       mode1 = insn_data[icode].operand[2].mode;
25889       mode2 = insn_data[icode].operand[3].mode;
25890
25891       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25892         op0 = copy_to_mode_reg (mode0, op0);
25893       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25894         op1 = copy_to_mode_reg (mode1, op1);
25895       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25896         {
25897           error ("selector must be an immediate");
25898           return const0_rtx;
25899         }
25900       if (icode == CODE_FOR_iwmmxt_wmerge)
25901         {
25902           selector = INTVAL (op2);
25903           if (selector > 7 || selector < 0)
25904             error ("the range of selector should be in 0 to 7");
25905         }
25906       if ((icode == CODE_FOR_iwmmxt_tinsrb)
25907           || (icode == CODE_FOR_iwmmxt_tinsrh)
25908           || (icode == CODE_FOR_iwmmxt_tinsrw))
25909         {
25910           mask = 0x01;
25911           selector= INTVAL (op2);
25912           if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25913             error ("the range of selector should be in 0 to 7");
25914           else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25915             error ("the range of selector should be in 0 to 3");
25916           else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25917             error ("the range of selector should be in 0 to 1");
25918           mask <<= selector;
25919           op2 = GEN_INT (mask);
25920         }
25921       if (target == 0
25922           || GET_MODE (target) != tmode
25923           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25924         target = gen_reg_rtx (tmode);
25925       pat = GEN_FCN (icode) (target, op0, op1, op2);
25926       if (! pat)
25927         return 0;
25928       emit_insn (pat);
25929       return target;
25930
25931     case ARM_BUILTIN_SETWCGR0:
25932     case ARM_BUILTIN_SETWCGR1:
25933     case ARM_BUILTIN_SETWCGR2:
25934     case ARM_BUILTIN_SETWCGR3:
25935       icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25936                : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25937                : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25938                : CODE_FOR_iwmmxt_setwcgr3);
25939       arg0 = CALL_EXPR_ARG (exp, 0);
25940       op0 = expand_normal (arg0);
25941       mode0 = insn_data[icode].operand[0].mode;
25942       if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25943         op0 = copy_to_mode_reg (mode0, op0);
25944       pat = GEN_FCN (icode) (op0);
25945       if (!pat)
25946         return 0;
25947       emit_insn (pat);
25948       return 0;
25949
25950     case ARM_BUILTIN_GETWCGR0:
25951     case ARM_BUILTIN_GETWCGR1:
25952     case ARM_BUILTIN_GETWCGR2:
25953     case ARM_BUILTIN_GETWCGR3:
25954       icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25955                : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25956                : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25957                : CODE_FOR_iwmmxt_getwcgr3);
25958       tmode = insn_data[icode].operand[0].mode;
25959       if (target == 0
25960           || GET_MODE (target) != tmode
25961           || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25962         target = gen_reg_rtx (tmode);
25963       pat = GEN_FCN (icode) (target);
25964       if (!pat)
25965         return 0;
25966       emit_insn (pat);
25967       return target;
25968
25969     case ARM_BUILTIN_WSHUFH:
25970       icode = CODE_FOR_iwmmxt_wshufh;
25971       arg0 = CALL_EXPR_ARG (exp, 0);
25972       arg1 = CALL_EXPR_ARG (exp, 1);
25973       op0 = expand_normal (arg0);
25974       op1 = expand_normal (arg1);
25975       tmode = insn_data[icode].operand[0].mode;
25976       mode1 = insn_data[icode].operand[1].mode;
25977       mode2 = insn_data[icode].operand[2].mode;
25978
25979       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25980         op0 = copy_to_mode_reg (mode1, op0);
25981       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25982         {
25983           error ("mask must be an immediate");
25984           return const0_rtx;
25985         }
25986       selector = INTVAL (op1);
25987       if (selector < 0 || selector > 255)
25988         error ("the range of mask should be in 0 to 255");
25989       if (target == 0
25990           || GET_MODE (target) != tmode
25991           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25992         target = gen_reg_rtx (tmode);
25993       pat = GEN_FCN (icode) (target, op0, op1);
25994       if (! pat)
25995         return 0;
25996       emit_insn (pat);
25997       return target;
25998
25999     case ARM_BUILTIN_WMADDS:
26000       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
26001     case ARM_BUILTIN_WMADDSX:
26002       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
26003     case ARM_BUILTIN_WMADDSN:
26004       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
26005     case ARM_BUILTIN_WMADDU:
26006       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
26007     case ARM_BUILTIN_WMADDUX:
26008       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
26009     case ARM_BUILTIN_WMADDUN:
26010       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
26011     case ARM_BUILTIN_WSADBZ:
26012       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
26013     case ARM_BUILTIN_WSADHZ:
26014       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
26015
26016       /* Several three-argument builtins.  */
26017     case ARM_BUILTIN_WMACS:
26018     case ARM_BUILTIN_WMACU:
26019     case ARM_BUILTIN_TMIA:
26020     case ARM_BUILTIN_TMIAPH:
26021     case ARM_BUILTIN_TMIATT:
26022     case ARM_BUILTIN_TMIATB:
26023     case ARM_BUILTIN_TMIABT:
26024     case ARM_BUILTIN_TMIABB:
26025     case ARM_BUILTIN_WQMIABB:
26026     case ARM_BUILTIN_WQMIABT:
26027     case ARM_BUILTIN_WQMIATB:
26028     case ARM_BUILTIN_WQMIATT:
26029     case ARM_BUILTIN_WQMIABBN:
26030     case ARM_BUILTIN_WQMIABTN:
26031     case ARM_BUILTIN_WQMIATBN:
26032     case ARM_BUILTIN_WQMIATTN:
26033     case ARM_BUILTIN_WMIABB:
26034     case ARM_BUILTIN_WMIABT:
26035     case ARM_BUILTIN_WMIATB:
26036     case ARM_BUILTIN_WMIATT:
26037     case ARM_BUILTIN_WMIABBN:
26038     case ARM_BUILTIN_WMIABTN:
26039     case ARM_BUILTIN_WMIATBN:
26040     case ARM_BUILTIN_WMIATTN:
26041     case ARM_BUILTIN_WMIAWBB:
26042     case ARM_BUILTIN_WMIAWBT:
26043     case ARM_BUILTIN_WMIAWTB:
26044     case ARM_BUILTIN_WMIAWTT:
26045     case ARM_BUILTIN_WMIAWBBN:
26046     case ARM_BUILTIN_WMIAWBTN:
26047     case ARM_BUILTIN_WMIAWTBN:
26048     case ARM_BUILTIN_WMIAWTTN:
26049     case ARM_BUILTIN_WSADB:
26050     case ARM_BUILTIN_WSADH:
26051       icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
26052                : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
26053                : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
26054                : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
26055                : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
26056                : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
26057                : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
26058                : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
26059                : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
26060                : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
26061                : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
26062                : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
26063                : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
26064                : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
26065                : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
26066                : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
26067                : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
26068                : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
26069                : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
26070                : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
26071                : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
26072                : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
26073                : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
26074                : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
26075                : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
26076                : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
26077                : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
26078                : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
26079                : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
26080                : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
26081                : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
26082                : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
26083                : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
26084                : CODE_FOR_iwmmxt_wsadh);
26085       arg0 = CALL_EXPR_ARG (exp, 0);
26086       arg1 = CALL_EXPR_ARG (exp, 1);
26087       arg2 = CALL_EXPR_ARG (exp, 2);
26088       op0 = expand_normal (arg0);
26089       op1 = expand_normal (arg1);
26090       op2 = expand_normal (arg2);
26091       tmode = insn_data[icode].operand[0].mode;
26092       mode0 = insn_data[icode].operand[1].mode;
26093       mode1 = insn_data[icode].operand[2].mode;
26094       mode2 = insn_data[icode].operand[3].mode;
26095
26096       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
26097         op0 = copy_to_mode_reg (mode0, op0);
26098       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
26099         op1 = copy_to_mode_reg (mode1, op1);
26100       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
26101         op2 = copy_to_mode_reg (mode2, op2);
26102       if (target == 0
26103           || GET_MODE (target) != tmode
26104           || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
26105         target = gen_reg_rtx (tmode);
26106       pat = GEN_FCN (icode) (target, op0, op1, op2);
26107       if (! pat)
26108         return 0;
26109       emit_insn (pat);
26110       return target;
26111
26112     case ARM_BUILTIN_WZERO:
26113       target = gen_reg_rtx (DImode);
26114       emit_insn (gen_iwmmxt_clrdi (target));
26115       return target;
26116
26117     case ARM_BUILTIN_WSRLHI:
26118     case ARM_BUILTIN_WSRLWI:
26119     case ARM_BUILTIN_WSRLDI:
26120     case ARM_BUILTIN_WSLLHI:
26121     case ARM_BUILTIN_WSLLWI:
26122     case ARM_BUILTIN_WSLLDI:
26123     case ARM_BUILTIN_WSRAHI:
26124     case ARM_BUILTIN_WSRAWI:
26125     case ARM_BUILTIN_WSRADI:
26126     case ARM_BUILTIN_WRORHI:
26127     case ARM_BUILTIN_WRORWI:
26128     case ARM_BUILTIN_WRORDI:
26129     case ARM_BUILTIN_WSRLH:
26130     case ARM_BUILTIN_WSRLW:
26131     case ARM_BUILTIN_WSRLD:
26132     case ARM_BUILTIN_WSLLH:
26133     case ARM_BUILTIN_WSLLW:
26134     case ARM_BUILTIN_WSLLD:
26135     case ARM_BUILTIN_WSRAH:
26136     case ARM_BUILTIN_WSRAW:
26137     case ARM_BUILTIN_WSRAD:
26138     case ARM_BUILTIN_WRORH:
26139     case ARM_BUILTIN_WRORW:
26140     case ARM_BUILTIN_WRORD:
26141       icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
26142                : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
26143                : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
26144                : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
26145                : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
26146                : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
26147                : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
26148                : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
26149                : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
26150                : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
26151                : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
26152                : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
26153                : fcode == ARM_BUILTIN_WSRLH  ? CODE_FOR_lshrv4hi3_di
26154                : fcode == ARM_BUILTIN_WSRLW  ? CODE_FOR_lshrv2si3_di
26155                : fcode == ARM_BUILTIN_WSRLD  ? CODE_FOR_lshrdi3_di
26156                : fcode == ARM_BUILTIN_WSLLH  ? CODE_FOR_ashlv4hi3_di
26157                : fcode == ARM_BUILTIN_WSLLW  ? CODE_FOR_ashlv2si3_di
26158                : fcode == ARM_BUILTIN_WSLLD  ? CODE_FOR_ashldi3_di
26159                : fcode == ARM_BUILTIN_WSRAH  ? CODE_FOR_ashrv4hi3_di
26160                : fcode == ARM_BUILTIN_WSRAW  ? CODE_FOR_ashrv2si3_di
26161                : fcode == ARM_BUILTIN_WSRAD  ? CODE_FOR_ashrdi3_di
26162                : fcode == ARM_BUILTIN_WRORH  ? CODE_FOR_rorv4hi3_di
26163                : fcode == ARM_BUILTIN_WRORW  ? CODE_FOR_rorv2si3_di
26164                : fcode == ARM_BUILTIN_WRORD  ? CODE_FOR_rordi3_di
26165                : CODE_FOR_nothing);
26166       arg1 = CALL_EXPR_ARG (exp, 1);
26167       op1 = expand_normal (arg1);
26168       if (GET_MODE (op1) == VOIDmode)
26169         {
26170           imm = INTVAL (op1);
26171           if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
26172                || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
26173               && (imm < 0 || imm > 32))
26174             {
26175               if (fcode == ARM_BUILTIN_WRORHI)
26176                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_rori_pi16 in code.");
26177               else if (fcode == ARM_BUILTIN_WRORWI)
26178                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_rori_pi32 in code.");
26179               else if (fcode == ARM_BUILTIN_WRORH)
26180                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_ror_pi16 in code.");
26181               else
26182                 error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_ror_pi32 in code.");
26183             }
26184           else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
26185                    && (imm < 0 || imm > 64))
26186             {
26187               if (fcode == ARM_BUILTIN_WRORDI)
26188                 error ("the range of count should be in 0 to 64.  please check the intrinsic _mm_rori_si64 in code.");
26189               else
26190                 error ("the range of count should be in 0 to 64.  please check the intrinsic _mm_ror_si64 in code.");
26191             }
26192           else if (imm < 0)
26193             {
26194               if (fcode == ARM_BUILTIN_WSRLHI)
26195                 error ("the count should be no less than 0.  please check the intrinsic _mm_srli_pi16 in code.");
26196               else if (fcode == ARM_BUILTIN_WSRLWI)
26197                 error ("the count should be no less than 0.  please check the intrinsic _mm_srli_pi32 in code.");
26198               else if (fcode == ARM_BUILTIN_WSRLDI)
26199                 error ("the count should be no less than 0.  please check the intrinsic _mm_srli_si64 in code.");
26200               else if (fcode == ARM_BUILTIN_WSLLHI)
26201                 error ("the count should be no less than 0.  please check the intrinsic _mm_slli_pi16 in code.");
26202               else if (fcode == ARM_BUILTIN_WSLLWI)
26203                 error ("the count should be no less than 0.  please check the intrinsic _mm_slli_pi32 in code.");
26204               else if (fcode == ARM_BUILTIN_WSLLDI)
26205                 error ("the count should be no less than 0.  please check the intrinsic _mm_slli_si64 in code.");
26206               else if (fcode == ARM_BUILTIN_WSRAHI)
26207                 error ("the count should be no less than 0.  please check the intrinsic _mm_srai_pi16 in code.");
26208               else if (fcode == ARM_BUILTIN_WSRAWI)
26209                 error ("the count should be no less than 0.  please check the intrinsic _mm_srai_pi32 in code.");
26210               else if (fcode == ARM_BUILTIN_WSRADI)
26211                 error ("the count should be no less than 0.  please check the intrinsic _mm_srai_si64 in code.");
26212               else if (fcode == ARM_BUILTIN_WSRLH)
26213                 error ("the count should be no less than 0.  please check the intrinsic _mm_srl_pi16 in code.");
26214               else if (fcode == ARM_BUILTIN_WSRLW)
26215                 error ("the count should be no less than 0.  please check the intrinsic _mm_srl_pi32 in code.");
26216               else if (fcode == ARM_BUILTIN_WSRLD)
26217                 error ("the count should be no less than 0.  please check the intrinsic _mm_srl_si64 in code.");
26218               else if (fcode == ARM_BUILTIN_WSLLH)
26219                 error ("the count should be no less than 0.  please check the intrinsic _mm_sll_pi16 in code.");
26220               else if (fcode == ARM_BUILTIN_WSLLW)
26221                 error ("the count should be no less than 0.  please check the intrinsic _mm_sll_pi32 in code.");
26222               else if (fcode == ARM_BUILTIN_WSLLD)
26223                 error ("the count should be no less than 0.  please check the intrinsic _mm_sll_si64 in code.");
26224               else if (fcode == ARM_BUILTIN_WSRAH)
26225                 error ("the count should be no less than 0.  please check the intrinsic _mm_sra_pi16 in code.");
26226               else if (fcode == ARM_BUILTIN_WSRAW)
26227                 error ("the count should be no less than 0.  please check the intrinsic _mm_sra_pi32 in code.");
26228               else
26229                 error ("the count should be no less than 0.  please check the intrinsic _mm_sra_si64 in code.");
26230             }
26231         }
26232       return arm_expand_binop_builtin (icode, exp, target);
26233
26234     default:
26235       break;
26236     }
26237
26238   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
26239     if (d->code == (const enum arm_builtins) fcode)
26240       return arm_expand_binop_builtin (d->icode, exp, target);
26241
26242   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
26243     if (d->code == (const enum arm_builtins) fcode)
26244       return arm_expand_unop_builtin (d->icode, exp, target, 0);
26245
26246   for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
26247     if (d->code == (const enum arm_builtins) fcode)
26248       return arm_expand_ternop_builtin (d->icode, exp, target);
26249
26250   /* @@@ Should really do something sensible here.  */
26251   return NULL_RTX;
26252 }
26253 \f
26254 /* Return the number (counting from 0) of
26255    the least significant set bit in MASK.  */
26256
26257 inline static int
26258 number_of_first_bit_set (unsigned mask)
26259 {
26260   return ctz_hwi (mask);
26261 }
26262
26263 /* Like emit_multi_reg_push, but allowing for a different set of
26264    registers to be described as saved.  MASK is the set of registers
26265    to be saved; REAL_REGS is the set of registers to be described as
26266    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
26267
26268 static rtx_insn *
26269 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26270 {
26271   unsigned long regno;
26272   rtx par[10], tmp, reg;
26273   rtx_insn *insn;
26274   int i, j;
26275
26276   /* Build the parallel of the registers actually being stored.  */
26277   for (i = 0; mask; ++i, mask &= mask - 1)
26278     {
26279       regno = ctz_hwi (mask);
26280       reg = gen_rtx_REG (SImode, regno);
26281
26282       if (i == 0)
26283         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26284       else
26285         tmp = gen_rtx_USE (VOIDmode, reg);
26286
26287       par[i] = tmp;
26288     }
26289
26290   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26291   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26292   tmp = gen_frame_mem (BLKmode, tmp);
26293   tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26294   par[0] = tmp;
26295
26296   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26297   insn = emit_insn (tmp);
26298
26299   /* Always build the stack adjustment note for unwind info.  */
26300   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26301   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26302   par[0] = tmp;
26303
26304   /* Build the parallel of the registers recorded as saved for unwind.  */
26305   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26306     {
26307       regno = ctz_hwi (real_regs);
26308       reg = gen_rtx_REG (SImode, regno);
26309
26310       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26311       tmp = gen_frame_mem (SImode, tmp);
26312       tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26313       RTX_FRAME_RELATED_P (tmp) = 1;
26314       par[j + 1] = tmp;
26315     }
26316
26317   if (j == 0)
26318     tmp = par[0];
26319   else
26320     {
26321       RTX_FRAME_RELATED_P (par[0]) = 1;
26322       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26323     }
26324
26325   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26326
26327   return insn;
26328 }
26329
26330 /* Emit code to push or pop registers to or from the stack.  F is the
26331    assembly file.  MASK is the registers to pop.  */
26332 static void
26333 thumb_pop (FILE *f, unsigned long mask)
26334 {
26335   int regno;
26336   int lo_mask = mask & 0xFF;
26337   int pushed_words = 0;
26338
26339   gcc_assert (mask);
26340
26341   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26342     {
26343       /* Special case.  Do not generate a POP PC statement here, do it in
26344          thumb_exit() */
26345       thumb_exit (f, -1);
26346       return;
26347     }
26348
26349   fprintf (f, "\tpop\t{");
26350
26351   /* Look at the low registers first.  */
26352   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26353     {
26354       if (lo_mask & 1)
26355         {
26356           asm_fprintf (f, "%r", regno);
26357
26358           if ((lo_mask & ~1) != 0)
26359             fprintf (f, ", ");
26360
26361           pushed_words++;
26362         }
26363     }
26364
26365   if (mask & (1 << PC_REGNUM))
26366     {
26367       /* Catch popping the PC.  */
26368       if (TARGET_INTERWORK || TARGET_BACKTRACE
26369           || crtl->calls_eh_return)
26370         {
26371           /* The PC is never poped directly, instead
26372              it is popped into r3 and then BX is used.  */
26373           fprintf (f, "}\n");
26374
26375           thumb_exit (f, -1);
26376
26377           return;
26378         }
26379       else
26380         {
26381           if (mask & 0xFF)
26382             fprintf (f, ", ");
26383
26384           asm_fprintf (f, "%r", PC_REGNUM);
26385         }
26386     }
26387
26388   fprintf (f, "}\n");
26389 }
26390
26391 /* Generate code to return from a thumb function.
26392    If 'reg_containing_return_addr' is -1, then the return address is
26393    actually on the stack, at the stack pointer.  */
26394 static void
26395 thumb_exit (FILE *f, int reg_containing_return_addr)
26396 {
26397   unsigned regs_available_for_popping;
26398   unsigned regs_to_pop;
26399   int pops_needed;
26400   unsigned available;
26401   unsigned required;
26402   machine_mode mode;
26403   int size;
26404   int restore_a4 = FALSE;
26405
26406   /* Compute the registers we need to pop.  */
26407   regs_to_pop = 0;
26408   pops_needed = 0;
26409
26410   if (reg_containing_return_addr == -1)
26411     {
26412       regs_to_pop |= 1 << LR_REGNUM;
26413       ++pops_needed;
26414     }
26415
26416   if (TARGET_BACKTRACE)
26417     {
26418       /* Restore the (ARM) frame pointer and stack pointer.  */
26419       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26420       pops_needed += 2;
26421     }
26422
26423   /* If there is nothing to pop then just emit the BX instruction and
26424      return.  */
26425   if (pops_needed == 0)
26426     {
26427       if (crtl->calls_eh_return)
26428         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26429
26430       asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26431       return;
26432     }
26433   /* Otherwise if we are not supporting interworking and we have not created
26434      a backtrace structure and the function was not entered in ARM mode then
26435      just pop the return address straight into the PC.  */
26436   else if (!TARGET_INTERWORK
26437            && !TARGET_BACKTRACE
26438            && !is_called_in_ARM_mode (current_function_decl)
26439            && !crtl->calls_eh_return)
26440     {
26441       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26442       return;
26443     }
26444
26445   /* Find out how many of the (return) argument registers we can corrupt.  */
26446   regs_available_for_popping = 0;
26447
26448   /* If returning via __builtin_eh_return, the bottom three registers
26449      all contain information needed for the return.  */
26450   if (crtl->calls_eh_return)
26451     size = 12;
26452   else
26453     {
26454       /* If we can deduce the registers used from the function's
26455          return value.  This is more reliable that examining
26456          df_regs_ever_live_p () because that will be set if the register is
26457          ever used in the function, not just if the register is used
26458          to hold a return value.  */
26459
26460       if (crtl->return_rtx != 0)
26461         mode = GET_MODE (crtl->return_rtx);
26462       else
26463         mode = DECL_MODE (DECL_RESULT (current_function_decl));
26464
26465       size = GET_MODE_SIZE (mode);
26466
26467       if (size == 0)
26468         {
26469           /* In a void function we can use any argument register.
26470              In a function that returns a structure on the stack
26471              we can use the second and third argument registers.  */
26472           if (mode == VOIDmode)
26473             regs_available_for_popping =
26474               (1 << ARG_REGISTER (1))
26475               | (1 << ARG_REGISTER (2))
26476               | (1 << ARG_REGISTER (3));
26477           else
26478             regs_available_for_popping =
26479               (1 << ARG_REGISTER (2))
26480               | (1 << ARG_REGISTER (3));
26481         }
26482       else if (size <= 4)
26483         regs_available_for_popping =
26484           (1 << ARG_REGISTER (2))
26485           | (1 << ARG_REGISTER (3));
26486       else if (size <= 8)
26487         regs_available_for_popping =
26488           (1 << ARG_REGISTER (3));
26489     }
26490
26491   /* Match registers to be popped with registers into which we pop them.  */
26492   for (available = regs_available_for_popping,
26493        required  = regs_to_pop;
26494        required != 0 && available != 0;
26495        available &= ~(available & - available),
26496        required  &= ~(required  & - required))
26497     -- pops_needed;
26498
26499   /* If we have any popping registers left over, remove them.  */
26500   if (available > 0)
26501     regs_available_for_popping &= ~available;
26502
26503   /* Otherwise if we need another popping register we can use
26504      the fourth argument register.  */
26505   else if (pops_needed)
26506     {
26507       /* If we have not found any free argument registers and
26508          reg a4 contains the return address, we must move it.  */
26509       if (regs_available_for_popping == 0
26510           && reg_containing_return_addr == LAST_ARG_REGNUM)
26511         {
26512           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26513           reg_containing_return_addr = LR_REGNUM;
26514         }
26515       else if (size > 12)
26516         {
26517           /* Register a4 is being used to hold part of the return value,
26518              but we have dire need of a free, low register.  */
26519           restore_a4 = TRUE;
26520
26521           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26522         }
26523
26524       if (reg_containing_return_addr != LAST_ARG_REGNUM)
26525         {
26526           /* The fourth argument register is available.  */
26527           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26528
26529           --pops_needed;
26530         }
26531     }
26532
26533   /* Pop as many registers as we can.  */
26534   thumb_pop (f, regs_available_for_popping);
26535
26536   /* Process the registers we popped.  */
26537   if (reg_containing_return_addr == -1)
26538     {
26539       /* The return address was popped into the lowest numbered register.  */
26540       regs_to_pop &= ~(1 << LR_REGNUM);
26541
26542       reg_containing_return_addr =
26543         number_of_first_bit_set (regs_available_for_popping);
26544
26545       /* Remove this register for the mask of available registers, so that
26546          the return address will not be corrupted by further pops.  */
26547       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26548     }
26549
26550   /* If we popped other registers then handle them here.  */
26551   if (regs_available_for_popping)
26552     {
26553       int frame_pointer;
26554
26555       /* Work out which register currently contains the frame pointer.  */
26556       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26557
26558       /* Move it into the correct place.  */
26559       asm_fprintf (f, "\tmov\t%r, %r\n",
26560                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26561
26562       /* (Temporarily) remove it from the mask of popped registers.  */
26563       regs_available_for_popping &= ~(1 << frame_pointer);
26564       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26565
26566       if (regs_available_for_popping)
26567         {
26568           int stack_pointer;
26569
26570           /* We popped the stack pointer as well,
26571              find the register that contains it.  */
26572           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26573
26574           /* Move it into the stack register.  */
26575           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26576
26577           /* At this point we have popped all necessary registers, so
26578              do not worry about restoring regs_available_for_popping
26579              to its correct value:
26580
26581              assert (pops_needed == 0)
26582              assert (regs_available_for_popping == (1 << frame_pointer))
26583              assert (regs_to_pop == (1 << STACK_POINTER))  */
26584         }
26585       else
26586         {
26587           /* Since we have just move the popped value into the frame
26588              pointer, the popping register is available for reuse, and
26589              we know that we still have the stack pointer left to pop.  */
26590           regs_available_for_popping |= (1 << frame_pointer);
26591         }
26592     }
26593
26594   /* If we still have registers left on the stack, but we no longer have
26595      any registers into which we can pop them, then we must move the return
26596      address into the link register and make available the register that
26597      contained it.  */
26598   if (regs_available_for_popping == 0 && pops_needed > 0)
26599     {
26600       regs_available_for_popping |= 1 << reg_containing_return_addr;
26601
26602       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26603                    reg_containing_return_addr);
26604
26605       reg_containing_return_addr = LR_REGNUM;
26606     }
26607
26608   /* If we have registers left on the stack then pop some more.
26609      We know that at most we will want to pop FP and SP.  */
26610   if (pops_needed > 0)
26611     {
26612       int  popped_into;
26613       int  move_to;
26614
26615       thumb_pop (f, regs_available_for_popping);
26616
26617       /* We have popped either FP or SP.
26618          Move whichever one it is into the correct register.  */
26619       popped_into = number_of_first_bit_set (regs_available_for_popping);
26620       move_to     = number_of_first_bit_set (regs_to_pop);
26621
26622       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26623
26624       regs_to_pop &= ~(1 << move_to);
26625
26626       --pops_needed;
26627     }
26628
26629   /* If we still have not popped everything then we must have only
26630      had one register available to us and we are now popping the SP.  */
26631   if (pops_needed > 0)
26632     {
26633       int  popped_into;
26634
26635       thumb_pop (f, regs_available_for_popping);
26636
26637       popped_into = number_of_first_bit_set (regs_available_for_popping);
26638
26639       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26640       /*
26641         assert (regs_to_pop == (1 << STACK_POINTER))
26642         assert (pops_needed == 1)
26643       */
26644     }
26645
26646   /* If necessary restore the a4 register.  */
26647   if (restore_a4)
26648     {
26649       if (reg_containing_return_addr != LR_REGNUM)
26650         {
26651           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26652           reg_containing_return_addr = LR_REGNUM;
26653         }
26654
26655       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26656     }
26657
26658   if (crtl->calls_eh_return)
26659     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26660
26661   /* Return to caller.  */
26662   asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26663 }
26664 \f
26665 /* Scan INSN just before assembler is output for it.
26666    For Thumb-1, we track the status of the condition codes; this
26667    information is used in the cbranchsi4_insn pattern.  */
26668 void
26669 thumb1_final_prescan_insn (rtx_insn *insn)
26670 {
26671   if (flag_print_asm_name)
26672     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26673                  INSN_ADDRESSES (INSN_UID (insn)));
26674   /* Don't overwrite the previous setter when we get to a cbranch.  */
26675   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26676     {
26677       enum attr_conds conds;
26678
26679       if (cfun->machine->thumb1_cc_insn)
26680         {
26681           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26682               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26683             CC_STATUS_INIT;
26684         }
26685       conds = get_attr_conds (insn);
26686       if (conds == CONDS_SET)
26687         {
26688           rtx set = single_set (insn);
26689           cfun->machine->thumb1_cc_insn = insn;
26690           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26691           cfun->machine->thumb1_cc_op1 = const0_rtx;
26692           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26693           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26694             {
26695               rtx src1 = XEXP (SET_SRC (set), 1);
26696               if (src1 == const0_rtx)
26697                 cfun->machine->thumb1_cc_mode = CCmode;
26698             }
26699           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26700             {
26701               /* Record the src register operand instead of dest because
26702                  cprop_hardreg pass propagates src.  */
26703               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26704             }
26705         }
26706       else if (conds != CONDS_NOCOND)
26707         cfun->machine->thumb1_cc_insn = NULL_RTX;
26708     }
26709
26710     /* Check if unexpected far jump is used.  */
26711     if (cfun->machine->lr_save_eliminated
26712         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26713       internal_error("Unexpected thumb1 far jump");
26714 }
26715
26716 int
26717 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26718 {
26719   unsigned HOST_WIDE_INT mask = 0xff;
26720   int i;
26721
26722   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26723   if (val == 0) /* XXX */
26724     return 0;
26725
26726   for (i = 0; i < 25; i++)
26727     if ((val & (mask << i)) == val)
26728       return 1;
26729
26730   return 0;
26731 }
26732
26733 /* Returns nonzero if the current function contains,
26734    or might contain a far jump.  */
26735 static int
26736 thumb_far_jump_used_p (void)
26737 {
26738   rtx_insn *insn;
26739   bool far_jump = false;
26740   unsigned int func_size = 0;
26741
26742   /* This test is only important for leaf functions.  */
26743   /* assert (!leaf_function_p ()); */
26744
26745   /* If we have already decided that far jumps may be used,
26746      do not bother checking again, and always return true even if
26747      it turns out that they are not being used.  Once we have made
26748      the decision that far jumps are present (and that hence the link
26749      register will be pushed onto the stack) we cannot go back on it.  */
26750   if (cfun->machine->far_jump_used)
26751     return 1;
26752
26753   /* If this function is not being called from the prologue/epilogue
26754      generation code then it must be being called from the
26755      INITIAL_ELIMINATION_OFFSET macro.  */
26756   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26757     {
26758       /* In this case we know that we are being asked about the elimination
26759          of the arg pointer register.  If that register is not being used,
26760          then there are no arguments on the stack, and we do not have to
26761          worry that a far jump might force the prologue to push the link
26762          register, changing the stack offsets.  In this case we can just
26763          return false, since the presence of far jumps in the function will
26764          not affect stack offsets.
26765
26766          If the arg pointer is live (or if it was live, but has now been
26767          eliminated and so set to dead) then we do have to test to see if
26768          the function might contain a far jump.  This test can lead to some
26769          false negatives, since before reload is completed, then length of
26770          branch instructions is not known, so gcc defaults to returning their
26771          longest length, which in turn sets the far jump attribute to true.
26772
26773          A false negative will not result in bad code being generated, but it
26774          will result in a needless push and pop of the link register.  We
26775          hope that this does not occur too often.
26776
26777          If we need doubleword stack alignment this could affect the other
26778          elimination offsets so we can't risk getting it wrong.  */
26779       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26780         cfun->machine->arg_pointer_live = 1;
26781       else if (!cfun->machine->arg_pointer_live)
26782         return 0;
26783     }
26784
26785   /* We should not change far_jump_used during or after reload, as there is
26786      no chance to change stack frame layout.  */
26787   if (reload_in_progress || reload_completed)
26788     return 0;
26789
26790   /* Check to see if the function contains a branch
26791      insn with the far jump attribute set.  */
26792   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26793     {
26794       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26795         {
26796           far_jump = true;
26797         }
26798       func_size += get_attr_length (insn);
26799     }
26800
26801   /* Attribute far_jump will always be true for thumb1 before
26802      shorten_branch pass.  So checking far_jump attribute before
26803      shorten_branch isn't much useful.
26804
26805      Following heuristic tries to estimate more accurately if a far jump
26806      may finally be used.  The heuristic is very conservative as there is
26807      no chance to roll-back the decision of not to use far jump.
26808
26809      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26810      2-byte insn is associated with a 4 byte constant pool.  Using
26811      function size 2048/3 as the threshold is conservative enough.  */
26812   if (far_jump)
26813     {
26814       if ((func_size * 3) >= 2048)
26815         {
26816           /* Record the fact that we have decided that
26817              the function does use far jumps.  */
26818           cfun->machine->far_jump_used = 1;
26819           return 1;
26820         }
26821     }
26822
26823   return 0;
26824 }
26825
26826 /* Return nonzero if FUNC must be entered in ARM mode.  */
26827 int
26828 is_called_in_ARM_mode (tree func)
26829 {
26830   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26831
26832   /* Ignore the problem about functions whose address is taken.  */
26833   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26834     return TRUE;
26835
26836 #ifdef ARM_PE
26837   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26838 #else
26839   return FALSE;
26840 #endif
26841 }
26842
26843 /* Given the stack offsets and register mask in OFFSETS, decide how
26844    many additional registers to push instead of subtracting a constant
26845    from SP.  For epilogues the principle is the same except we use pop.
26846    FOR_PROLOGUE indicates which we're generating.  */
26847 static int
26848 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26849 {
26850   HOST_WIDE_INT amount;
26851   unsigned long live_regs_mask = offsets->saved_regs_mask;
26852   /* Extract a mask of the ones we can give to the Thumb's push/pop
26853      instruction.  */
26854   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26855   /* Then count how many other high registers will need to be pushed.  */
26856   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26857   int n_free, reg_base, size;
26858
26859   if (!for_prologue && frame_pointer_needed)
26860     amount = offsets->locals_base - offsets->saved_regs;
26861   else
26862     amount = offsets->outgoing_args - offsets->saved_regs;
26863
26864   /* If the stack frame size is 512 exactly, we can save one load
26865      instruction, which should make this a win even when optimizing
26866      for speed.  */
26867   if (!optimize_size && amount != 512)
26868     return 0;
26869
26870   /* Can't do this if there are high registers to push.  */
26871   if (high_regs_pushed != 0)
26872     return 0;
26873
26874   /* Shouldn't do it in the prologue if no registers would normally
26875      be pushed at all.  In the epilogue, also allow it if we'll have
26876      a pop insn for the PC.  */
26877   if  (l_mask == 0
26878        && (for_prologue
26879            || TARGET_BACKTRACE
26880            || (live_regs_mask & 1 << LR_REGNUM) == 0
26881            || TARGET_INTERWORK
26882            || crtl->args.pretend_args_size != 0))
26883     return 0;
26884
26885   /* Don't do this if thumb_expand_prologue wants to emit instructions
26886      between the push and the stack frame allocation.  */
26887   if (for_prologue
26888       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26889           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26890     return 0;
26891
26892   reg_base = 0;
26893   n_free = 0;
26894   if (!for_prologue)
26895     {
26896       size = arm_size_return_regs ();
26897       reg_base = ARM_NUM_INTS (size);
26898       live_regs_mask >>= reg_base;
26899     }
26900
26901   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26902          && (for_prologue || call_used_regs[reg_base + n_free]))
26903     {
26904       live_regs_mask >>= 1;
26905       n_free++;
26906     }
26907
26908   if (n_free == 0)
26909     return 0;
26910   gcc_assert (amount / 4 * 4 == amount);
26911
26912   if (amount >= 512 && (amount - n_free * 4) < 512)
26913     return (amount - 508) / 4;
26914   if (amount <= n_free * 4)
26915     return amount / 4;
26916   return 0;
26917 }
26918
26919 /* The bits which aren't usefully expanded as rtl.  */
26920 const char *
26921 thumb1_unexpanded_epilogue (void)
26922 {
26923   arm_stack_offsets *offsets;
26924   int regno;
26925   unsigned long live_regs_mask = 0;
26926   int high_regs_pushed = 0;
26927   int extra_pop;
26928   int had_to_push_lr;
26929   int size;
26930
26931   if (cfun->machine->return_used_this_function != 0)
26932     return "";
26933
26934   if (IS_NAKED (arm_current_func_type ()))
26935     return "";
26936
26937   offsets = arm_get_frame_offsets ();
26938   live_regs_mask = offsets->saved_regs_mask;
26939   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26940
26941   /* If we can deduce the registers used from the function's return value.
26942      This is more reliable that examining df_regs_ever_live_p () because that
26943      will be set if the register is ever used in the function, not just if
26944      the register is used to hold a return value.  */
26945   size = arm_size_return_regs ();
26946
26947   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26948   if (extra_pop > 0)
26949     {
26950       unsigned long extra_mask = (1 << extra_pop) - 1;
26951       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26952     }
26953
26954   /* The prolog may have pushed some high registers to use as
26955      work registers.  e.g. the testsuite file:
26956      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26957      compiles to produce:
26958         push    {r4, r5, r6, r7, lr}
26959         mov     r7, r9
26960         mov     r6, r8
26961         push    {r6, r7}
26962      as part of the prolog.  We have to undo that pushing here.  */
26963
26964   if (high_regs_pushed)
26965     {
26966       unsigned long mask = live_regs_mask & 0xff;
26967       int next_hi_reg;
26968
26969       /* The available low registers depend on the size of the value we are
26970          returning.  */
26971       if (size <= 12)
26972         mask |=  1 << 3;
26973       if (size <= 8)
26974         mask |= 1 << 2;
26975
26976       if (mask == 0)
26977         /* Oh dear!  We have no low registers into which we can pop
26978            high registers!  */
26979         internal_error
26980           ("no low registers available for popping high registers");
26981
26982       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26983         if (live_regs_mask & (1 << next_hi_reg))
26984           break;
26985
26986       while (high_regs_pushed)
26987         {
26988           /* Find lo register(s) into which the high register(s) can
26989              be popped.  */
26990           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26991             {
26992               if (mask & (1 << regno))
26993                 high_regs_pushed--;
26994               if (high_regs_pushed == 0)
26995                 break;
26996             }
26997
26998           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
26999
27000           /* Pop the values into the low register(s).  */
27001           thumb_pop (asm_out_file, mask);
27002
27003           /* Move the value(s) into the high registers.  */
27004           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
27005             {
27006               if (mask & (1 << regno))
27007                 {
27008                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
27009                                regno);
27010
27011                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
27012                     if (live_regs_mask & (1 << next_hi_reg))
27013                       break;
27014                 }
27015             }
27016         }
27017       live_regs_mask &= ~0x0f00;
27018     }
27019
27020   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
27021   live_regs_mask &= 0xff;
27022
27023   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
27024     {
27025       /* Pop the return address into the PC.  */
27026       if (had_to_push_lr)
27027         live_regs_mask |= 1 << PC_REGNUM;
27028
27029       /* Either no argument registers were pushed or a backtrace
27030          structure was created which includes an adjusted stack
27031          pointer, so just pop everything.  */
27032       if (live_regs_mask)
27033         thumb_pop (asm_out_file, live_regs_mask);
27034
27035       /* We have either just popped the return address into the
27036          PC or it is was kept in LR for the entire function.
27037          Note that thumb_pop has already called thumb_exit if the
27038          PC was in the list.  */
27039       if (!had_to_push_lr)
27040         thumb_exit (asm_out_file, LR_REGNUM);
27041     }
27042   else
27043     {
27044       /* Pop everything but the return address.  */
27045       if (live_regs_mask)
27046         thumb_pop (asm_out_file, live_regs_mask);
27047
27048       if (had_to_push_lr)
27049         {
27050           if (size > 12)
27051             {
27052               /* We have no free low regs, so save one.  */
27053               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
27054                            LAST_ARG_REGNUM);
27055             }
27056
27057           /* Get the return address into a temporary register.  */
27058           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
27059
27060           if (size > 12)
27061             {
27062               /* Move the return address to lr.  */
27063               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
27064                            LAST_ARG_REGNUM);
27065               /* Restore the low register.  */
27066               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
27067                            IP_REGNUM);
27068               regno = LR_REGNUM;
27069             }
27070           else
27071             regno = LAST_ARG_REGNUM;
27072         }
27073       else
27074         regno = LR_REGNUM;
27075
27076       /* Remove the argument registers that were pushed onto the stack.  */
27077       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
27078                    SP_REGNUM, SP_REGNUM,
27079                    crtl->args.pretend_args_size);
27080
27081       thumb_exit (asm_out_file, regno);
27082     }
27083
27084   return "";
27085 }
27086
27087 /* Functions to save and restore machine-specific function data.  */
27088 static struct machine_function *
27089 arm_init_machine_status (void)
27090 {
27091   struct machine_function *machine;
27092   machine = ggc_cleared_alloc<machine_function> ();
27093
27094 #if ARM_FT_UNKNOWN != 0
27095   machine->func_type = ARM_FT_UNKNOWN;
27096 #endif
27097   return machine;
27098 }
27099
27100 /* Return an RTX indicating where the return address to the
27101    calling function can be found.  */
27102 rtx
27103 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27104 {
27105   if (count != 0)
27106     return NULL_RTX;
27107
27108   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27109 }
27110
27111 /* Do anything needed before RTL is emitted for each function.  */
27112 void
27113 arm_init_expanders (void)
27114 {
27115   /* Arrange to initialize and mark the machine per-function status.  */
27116   init_machine_status = arm_init_machine_status;
27117
27118   /* This is to stop the combine pass optimizing away the alignment
27119      adjustment of va_arg.  */
27120   /* ??? It is claimed that this should not be necessary.  */
27121   if (cfun)
27122     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27123 }
27124
27125
27126 /* Like arm_compute_initial_elimination offset.  Simpler because there
27127    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
27128    to point at the base of the local variables after static stack
27129    space for a function has been allocated.  */
27130
27131 HOST_WIDE_INT
27132 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27133 {
27134   arm_stack_offsets *offsets;
27135
27136   offsets = arm_get_frame_offsets ();
27137
27138   switch (from)
27139     {
27140     case ARG_POINTER_REGNUM:
27141       switch (to)
27142         {
27143         case STACK_POINTER_REGNUM:
27144           return offsets->outgoing_args - offsets->saved_args;
27145
27146         case FRAME_POINTER_REGNUM:
27147           return offsets->soft_frame - offsets->saved_args;
27148
27149         case ARM_HARD_FRAME_POINTER_REGNUM:
27150           return offsets->saved_regs - offsets->saved_args;
27151
27152         case THUMB_HARD_FRAME_POINTER_REGNUM:
27153           return offsets->locals_base - offsets->saved_args;
27154
27155         default:
27156           gcc_unreachable ();
27157         }
27158       break;
27159
27160     case FRAME_POINTER_REGNUM:
27161       switch (to)
27162         {
27163         case STACK_POINTER_REGNUM:
27164           return offsets->outgoing_args - offsets->soft_frame;
27165
27166         case ARM_HARD_FRAME_POINTER_REGNUM:
27167           return offsets->saved_regs - offsets->soft_frame;
27168
27169         case THUMB_HARD_FRAME_POINTER_REGNUM:
27170           return offsets->locals_base - offsets->soft_frame;
27171
27172         default:
27173           gcc_unreachable ();
27174         }
27175       break;
27176
27177     default:
27178       gcc_unreachable ();
27179     }
27180 }
27181
27182 /* Generate the function's prologue.  */
27183
27184 void
27185 thumb1_expand_prologue (void)
27186 {
27187   rtx_insn *insn;
27188
27189   HOST_WIDE_INT amount;
27190   arm_stack_offsets *offsets;
27191   unsigned long func_type;
27192   int regno;
27193   unsigned long live_regs_mask;
27194   unsigned long l_mask;
27195   unsigned high_regs_pushed = 0;
27196
27197   func_type = arm_current_func_type ();
27198
27199   /* Naked functions don't have prologues.  */
27200   if (IS_NAKED (func_type))
27201     return;
27202
27203   if (IS_INTERRUPT (func_type))
27204     {
27205       error ("interrupt Service Routines cannot be coded in Thumb mode");
27206       return;
27207     }
27208
27209   if (is_called_in_ARM_mode (current_function_decl))
27210     emit_insn (gen_prologue_thumb1_interwork ());
27211
27212   offsets = arm_get_frame_offsets ();
27213   live_regs_mask = offsets->saved_regs_mask;
27214
27215   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
27216   l_mask = live_regs_mask & 0x40ff;
27217   /* Then count how many other high registers will need to be pushed.  */
27218   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27219
27220   if (crtl->args.pretend_args_size)
27221     {
27222       rtx x = GEN_INT (-crtl->args.pretend_args_size);
27223
27224       if (cfun->machine->uses_anonymous_args)
27225         {
27226           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27227           unsigned long mask;
27228
27229           mask = 1ul << (LAST_ARG_REGNUM + 1);
27230           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27231
27232           insn = thumb1_emit_multi_reg_push (mask, 0);
27233         }
27234       else
27235         {
27236           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27237                                         stack_pointer_rtx, x));
27238         }
27239       RTX_FRAME_RELATED_P (insn) = 1;
27240     }
27241
27242   if (TARGET_BACKTRACE)
27243     {
27244       HOST_WIDE_INT offset = 0;
27245       unsigned work_register;
27246       rtx work_reg, x, arm_hfp_rtx;
27247
27248       /* We have been asked to create a stack backtrace structure.
27249          The code looks like this:
27250
27251          0   .align 2
27252          0   func:
27253          0     sub   SP, #16         Reserve space for 4 registers.
27254          2     push  {R7}            Push low registers.
27255          4     add   R7, SP, #20     Get the stack pointer before the push.
27256          6     str   R7, [SP, #8]    Store the stack pointer
27257                                         (before reserving the space).
27258          8     mov   R7, PC          Get hold of the start of this code + 12.
27259         10     str   R7, [SP, #16]   Store it.
27260         12     mov   R7, FP          Get hold of the current frame pointer.
27261         14     str   R7, [SP, #4]    Store it.
27262         16     mov   R7, LR          Get hold of the current return address.
27263         18     str   R7, [SP, #12]   Store it.
27264         20     add   R7, SP, #16     Point at the start of the
27265                                         backtrace structure.
27266         22     mov   FP, R7          Put this value into the frame pointer.  */
27267
27268       work_register = thumb_find_work_register (live_regs_mask);
27269       work_reg = gen_rtx_REG (SImode, work_register);
27270       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27271
27272       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27273                                     stack_pointer_rtx, GEN_INT (-16)));
27274       RTX_FRAME_RELATED_P (insn) = 1;
27275
27276       if (l_mask)
27277         {
27278           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27279           RTX_FRAME_RELATED_P (insn) = 1;
27280
27281           offset = bit_count (l_mask) * UNITS_PER_WORD;
27282         }
27283
27284       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27285       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27286
27287       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27288       x = gen_frame_mem (SImode, x);
27289       emit_move_insn (x, work_reg);
27290
27291       /* Make sure that the instruction fetching the PC is in the right place
27292          to calculate "start of backtrace creation code + 12".  */
27293       /* ??? The stores using the common WORK_REG ought to be enough to
27294          prevent the scheduler from doing anything weird.  Failing that
27295          we could always move all of the following into an UNSPEC_VOLATILE.  */
27296       if (l_mask)
27297         {
27298           x = gen_rtx_REG (SImode, PC_REGNUM);
27299           emit_move_insn (work_reg, x);
27300
27301           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27302           x = gen_frame_mem (SImode, x);
27303           emit_move_insn (x, work_reg);
27304
27305           emit_move_insn (work_reg, arm_hfp_rtx);
27306
27307           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27308           x = gen_frame_mem (SImode, x);
27309           emit_move_insn (x, work_reg);
27310         }
27311       else
27312         {
27313           emit_move_insn (work_reg, arm_hfp_rtx);
27314
27315           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27316           x = gen_frame_mem (SImode, x);
27317           emit_move_insn (x, work_reg);
27318
27319           x = gen_rtx_REG (SImode, PC_REGNUM);
27320           emit_move_insn (work_reg, x);
27321
27322           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27323           x = gen_frame_mem (SImode, x);
27324           emit_move_insn (x, work_reg);
27325         }
27326
27327       x = gen_rtx_REG (SImode, LR_REGNUM);
27328       emit_move_insn (work_reg, x);
27329
27330       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27331       x = gen_frame_mem (SImode, x);
27332       emit_move_insn (x, work_reg);
27333
27334       x = GEN_INT (offset + 12);
27335       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27336
27337       emit_move_insn (arm_hfp_rtx, work_reg);
27338     }
27339   /* Optimization:  If we are not pushing any low registers but we are going
27340      to push some high registers then delay our first push.  This will just
27341      be a push of LR and we can combine it with the push of the first high
27342      register.  */
27343   else if ((l_mask & 0xff) != 0
27344            || (high_regs_pushed == 0 && l_mask))
27345     {
27346       unsigned long mask = l_mask;
27347       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27348       insn = thumb1_emit_multi_reg_push (mask, mask);
27349       RTX_FRAME_RELATED_P (insn) = 1;
27350     }
27351
27352   if (high_regs_pushed)
27353     {
27354       unsigned pushable_regs;
27355       unsigned next_hi_reg;
27356       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27357                                                  : crtl->args.info.nregs;
27358       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27359
27360       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27361         if (live_regs_mask & (1 << next_hi_reg))
27362           break;
27363
27364       /* Here we need to mask out registers used for passing arguments
27365          even if they can be pushed.  This is to avoid using them to stash the high
27366          registers.  Such kind of stash may clobber the use of arguments.  */
27367       pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27368
27369       if (pushable_regs == 0)
27370         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27371
27372       while (high_regs_pushed > 0)
27373         {
27374           unsigned long real_regs_mask = 0;
27375
27376           for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27377             {
27378               if (pushable_regs & (1 << regno))
27379                 {
27380                   emit_move_insn (gen_rtx_REG (SImode, regno),
27381                                   gen_rtx_REG (SImode, next_hi_reg));
27382
27383                   high_regs_pushed --;
27384                   real_regs_mask |= (1 << next_hi_reg);
27385
27386                   if (high_regs_pushed)
27387                     {
27388                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27389                            next_hi_reg --)
27390                         if (live_regs_mask & (1 << next_hi_reg))
27391                           break;
27392                     }
27393                   else
27394                     {
27395                       pushable_regs &= ~((1 << regno) - 1);
27396                       break;
27397                     }
27398                 }
27399             }
27400
27401           /* If we had to find a work register and we have not yet
27402              saved the LR then add it to the list of regs to push.  */
27403           if (l_mask == (1 << LR_REGNUM))
27404             {
27405               pushable_regs |= l_mask;
27406               real_regs_mask |= l_mask;
27407               l_mask = 0;
27408             }
27409
27410           insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27411           RTX_FRAME_RELATED_P (insn) = 1;
27412         }
27413     }
27414
27415   /* Load the pic register before setting the frame pointer,
27416      so we can use r7 as a temporary work register.  */
27417   if (flag_pic && arm_pic_register != INVALID_REGNUM)
27418     arm_load_pic_register (live_regs_mask);
27419
27420   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27421     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27422                     stack_pointer_rtx);
27423
27424   if (flag_stack_usage_info)
27425     current_function_static_stack_size
27426       = offsets->outgoing_args - offsets->saved_args;
27427
27428   amount = offsets->outgoing_args - offsets->saved_regs;
27429   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27430   if (amount)
27431     {
27432       if (amount < 512)
27433         {
27434           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27435                                         GEN_INT (- amount)));
27436           RTX_FRAME_RELATED_P (insn) = 1;
27437         }
27438       else
27439         {
27440           rtx reg, dwarf;
27441
27442           /* The stack decrement is too big for an immediate value in a single
27443              insn.  In theory we could issue multiple subtracts, but after
27444              three of them it becomes more space efficient to place the full
27445              value in the constant pool and load into a register.  (Also the
27446              ARM debugger really likes to see only one stack decrement per
27447              function).  So instead we look for a scratch register into which
27448              we can load the decrement, and then we subtract this from the
27449              stack pointer.  Unfortunately on the thumb the only available
27450              scratch registers are the argument registers, and we cannot use
27451              these as they may hold arguments to the function.  Instead we
27452              attempt to locate a call preserved register which is used by this
27453              function.  If we can find one, then we know that it will have
27454              been pushed at the start of the prologue and so we can corrupt
27455              it now.  */
27456           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27457             if (live_regs_mask & (1 << regno))
27458               break;
27459
27460           gcc_assert(regno <= LAST_LO_REGNUM);
27461
27462           reg = gen_rtx_REG (SImode, regno);
27463
27464           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27465
27466           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27467                                         stack_pointer_rtx, reg));
27468
27469           dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27470                                plus_constant (Pmode, stack_pointer_rtx,
27471                                               -amount));
27472           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27473           RTX_FRAME_RELATED_P (insn) = 1;
27474         }
27475     }
27476
27477   if (frame_pointer_needed)
27478     thumb_set_frame_pointer (offsets);
27479
27480   /* If we are profiling, make sure no instructions are scheduled before
27481      the call to mcount.  Similarly if the user has requested no
27482      scheduling in the prolog.  Similarly if we want non-call exceptions
27483      using the EABI unwinder, to prevent faulting instructions from being
27484      swapped with a stack adjustment.  */
27485   if (crtl->profile || !TARGET_SCHED_PROLOG
27486       || (arm_except_unwind_info (&global_options) == UI_TARGET
27487           && cfun->can_throw_non_call_exceptions))
27488     emit_insn (gen_blockage ());
27489
27490   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27491   if (live_regs_mask & 0xff)
27492     cfun->machine->lr_save_eliminated = 0;
27493 }
27494
27495 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27496    POP instruction can be generated.  LR should be replaced by PC.  All
27497    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27498    all we really need to check here is if single register is to be
27499    returned, or multiple register return.  */
27500 void
27501 thumb2_expand_return (bool simple_return)
27502 {
27503   int i, num_regs;
27504   unsigned long saved_regs_mask;
27505   arm_stack_offsets *offsets;
27506
27507   offsets = arm_get_frame_offsets ();
27508   saved_regs_mask = offsets->saved_regs_mask;
27509
27510   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27511     if (saved_regs_mask & (1 << i))
27512       num_regs++;
27513
27514   if (!simple_return && saved_regs_mask)
27515     {
27516       if (num_regs == 1)
27517         {
27518           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27519           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27520           rtx addr = gen_rtx_MEM (SImode,
27521                                   gen_rtx_POST_INC (SImode,
27522                                                     stack_pointer_rtx));
27523           set_mem_alias_set (addr, get_frame_alias_set ());
27524           XVECEXP (par, 0, 0) = ret_rtx;
27525           XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27526           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27527           emit_jump_insn (par);
27528         }
27529       else
27530         {
27531           saved_regs_mask &= ~ (1 << LR_REGNUM);
27532           saved_regs_mask |=   (1 << PC_REGNUM);
27533           arm_emit_multi_reg_pop (saved_regs_mask);
27534         }
27535     }
27536   else
27537     {
27538       emit_jump_insn (simple_return_rtx);
27539     }
27540 }
27541
27542 void
27543 thumb1_expand_epilogue (void)
27544 {
27545   HOST_WIDE_INT amount;
27546   arm_stack_offsets *offsets;
27547   int regno;
27548
27549   /* Naked functions don't have prologues.  */
27550   if (IS_NAKED (arm_current_func_type ()))
27551     return;
27552
27553   offsets = arm_get_frame_offsets ();
27554   amount = offsets->outgoing_args - offsets->saved_regs;
27555
27556   if (frame_pointer_needed)
27557     {
27558       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27559       amount = offsets->locals_base - offsets->saved_regs;
27560     }
27561   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27562
27563   gcc_assert (amount >= 0);
27564   if (amount)
27565     {
27566       emit_insn (gen_blockage ());
27567
27568       if (amount < 512)
27569         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27570                                GEN_INT (amount)));
27571       else
27572         {
27573           /* r3 is always free in the epilogue.  */
27574           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27575
27576           emit_insn (gen_movsi (reg, GEN_INT (amount)));
27577           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27578         }
27579     }
27580
27581   /* Emit a USE (stack_pointer_rtx), so that
27582      the stack adjustment will not be deleted.  */
27583   emit_insn (gen_force_register_use (stack_pointer_rtx));
27584
27585   if (crtl->profile || !TARGET_SCHED_PROLOG)
27586     emit_insn (gen_blockage ());
27587
27588   /* Emit a clobber for each insn that will be restored in the epilogue,
27589      so that flow2 will get register lifetimes correct.  */
27590   for (regno = 0; regno < 13; regno++)
27591     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27592       emit_clobber (gen_rtx_REG (SImode, regno));
27593
27594   if (! df_regs_ever_live_p (LR_REGNUM))
27595     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27596 }
27597
27598 /* Epilogue code for APCS frame.  */
27599 static void
27600 arm_expand_epilogue_apcs_frame (bool really_return)
27601 {
27602   unsigned long func_type;
27603   unsigned long saved_regs_mask;
27604   int num_regs = 0;
27605   int i;
27606   int floats_from_frame = 0;
27607   arm_stack_offsets *offsets;
27608
27609   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27610   func_type = arm_current_func_type ();
27611
27612   /* Get frame offsets for ARM.  */
27613   offsets = arm_get_frame_offsets ();
27614   saved_regs_mask = offsets->saved_regs_mask;
27615
27616   /* Find the offset of the floating-point save area in the frame.  */
27617   floats_from_frame
27618     = (offsets->saved_args
27619        + arm_compute_static_chain_stack_bytes ()
27620        - offsets->frame);
27621
27622   /* Compute how many core registers saved and how far away the floats are.  */
27623   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27624     if (saved_regs_mask & (1 << i))
27625       {
27626         num_regs++;
27627         floats_from_frame += 4;
27628       }
27629
27630   if (TARGET_HARD_FLOAT && TARGET_VFP)
27631     {
27632       int start_reg;
27633       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27634
27635       /* The offset is from IP_REGNUM.  */
27636       int saved_size = arm_get_vfp_saved_size ();
27637       if (saved_size > 0)
27638         {
27639           rtx_insn *insn;
27640           floats_from_frame += saved_size;
27641           insn = emit_insn (gen_addsi3 (ip_rtx,
27642                                         hard_frame_pointer_rtx,
27643                                         GEN_INT (-floats_from_frame)));
27644           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27645                                        ip_rtx, hard_frame_pointer_rtx);
27646         }
27647
27648       /* Generate VFP register multi-pop.  */
27649       start_reg = FIRST_VFP_REGNUM;
27650
27651       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27652         /* Look for a case where a reg does not need restoring.  */
27653         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27654             && (!df_regs_ever_live_p (i + 1)
27655                 || call_used_regs[i + 1]))
27656           {
27657             if (start_reg != i)
27658               arm_emit_vfp_multi_reg_pop (start_reg,
27659                                           (i - start_reg) / 2,
27660                                           gen_rtx_REG (SImode,
27661                                                        IP_REGNUM));
27662             start_reg = i + 2;
27663           }
27664
27665       /* Restore the remaining regs that we have discovered (or possibly
27666          even all of them, if the conditional in the for loop never
27667          fired).  */
27668       if (start_reg != i)
27669         arm_emit_vfp_multi_reg_pop (start_reg,
27670                                     (i - start_reg) / 2,
27671                                     gen_rtx_REG (SImode, IP_REGNUM));
27672     }
27673
27674   if (TARGET_IWMMXT)
27675     {
27676       /* The frame pointer is guaranteed to be non-double-word aligned, as
27677          it is set to double-word-aligned old_stack_pointer - 4.  */
27678       rtx_insn *insn;
27679       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27680
27681       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27682         if (df_regs_ever_live_p (i) && !call_used_regs[i])
27683           {
27684             rtx addr = gen_frame_mem (V2SImode,
27685                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27686                                                 - lrm_count * 4));
27687             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27688             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27689                                                gen_rtx_REG (V2SImode, i),
27690                                                NULL_RTX);
27691             lrm_count += 2;
27692           }
27693     }
27694
27695   /* saved_regs_mask should contain IP which contains old stack pointer
27696      at the time of activation creation.  Since SP and IP are adjacent registers,
27697      we can restore the value directly into SP.  */
27698   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27699   saved_regs_mask &= ~(1 << IP_REGNUM);
27700   saved_regs_mask |= (1 << SP_REGNUM);
27701
27702   /* There are two registers left in saved_regs_mask - LR and PC.  We
27703      only need to restore LR (the return address), but to
27704      save time we can load it directly into PC, unless we need a
27705      special function exit sequence, or we are not really returning.  */
27706   if (really_return
27707       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27708       && !crtl->calls_eh_return)
27709     /* Delete LR from the register mask, so that LR on
27710        the stack is loaded into the PC in the register mask.  */
27711     saved_regs_mask &= ~(1 << LR_REGNUM);
27712   else
27713     saved_regs_mask &= ~(1 << PC_REGNUM);
27714
27715   num_regs = bit_count (saved_regs_mask);
27716   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27717     {
27718       rtx_insn *insn;
27719       emit_insn (gen_blockage ());
27720       /* Unwind the stack to just below the saved registers.  */
27721       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27722                                     hard_frame_pointer_rtx,
27723                                     GEN_INT (- 4 * num_regs)));
27724
27725       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27726                                    stack_pointer_rtx, hard_frame_pointer_rtx);
27727     }
27728
27729   arm_emit_multi_reg_pop (saved_regs_mask);
27730
27731   if (IS_INTERRUPT (func_type))
27732     {
27733       /* Interrupt handlers will have pushed the
27734          IP onto the stack, so restore it now.  */
27735       rtx_insn *insn;
27736       rtx addr = gen_rtx_MEM (SImode,
27737                               gen_rtx_POST_INC (SImode,
27738                               stack_pointer_rtx));
27739       set_mem_alias_set (addr, get_frame_alias_set ());
27740       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27741       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27742                                          gen_rtx_REG (SImode, IP_REGNUM),
27743                                          NULL_RTX);
27744     }
27745
27746   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27747     return;
27748
27749   if (crtl->calls_eh_return)
27750     emit_insn (gen_addsi3 (stack_pointer_rtx,
27751                            stack_pointer_rtx,
27752                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27753
27754   if (IS_STACKALIGN (func_type))
27755     /* Restore the original stack pointer.  Before prologue, the stack was
27756        realigned and the original stack pointer saved in r0.  For details,
27757        see comment in arm_expand_prologue.  */
27758     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27759
27760   emit_jump_insn (simple_return_rtx);
27761 }
27762
27763 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27764    function is not a sibcall.  */
27765 void
27766 arm_expand_epilogue (bool really_return)
27767 {
27768   unsigned long func_type;
27769   unsigned long saved_regs_mask;
27770   int num_regs = 0;
27771   int i;
27772   int amount;
27773   arm_stack_offsets *offsets;
27774
27775   func_type = arm_current_func_type ();
27776
27777   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27778      let output_return_instruction take care of instruction emission if any.  */
27779   if (IS_NAKED (func_type)
27780       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27781     {
27782       if (really_return)
27783         emit_jump_insn (simple_return_rtx);
27784       return;
27785     }
27786
27787   /* If we are throwing an exception, then we really must be doing a
27788      return, so we can't tail-call.  */
27789   gcc_assert (!crtl->calls_eh_return || really_return);
27790
27791   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27792     {
27793       arm_expand_epilogue_apcs_frame (really_return);
27794       return;
27795     }
27796
27797   /* Get frame offsets for ARM.  */
27798   offsets = arm_get_frame_offsets ();
27799   saved_regs_mask = offsets->saved_regs_mask;
27800   num_regs = bit_count (saved_regs_mask);
27801
27802   if (frame_pointer_needed)
27803     {
27804       rtx_insn *insn;
27805       /* Restore stack pointer if necessary.  */
27806       if (TARGET_ARM)
27807         {
27808           /* In ARM mode, frame pointer points to first saved register.
27809              Restore stack pointer to last saved register.  */
27810           amount = offsets->frame - offsets->saved_regs;
27811
27812           /* Force out any pending memory operations that reference stacked data
27813              before stack de-allocation occurs.  */
27814           emit_insn (gen_blockage ());
27815           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27816                             hard_frame_pointer_rtx,
27817                             GEN_INT (amount)));
27818           arm_add_cfa_adjust_cfa_note (insn, amount,
27819                                        stack_pointer_rtx,
27820                                        hard_frame_pointer_rtx);
27821
27822           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27823              deleted.  */
27824           emit_insn (gen_force_register_use (stack_pointer_rtx));
27825         }
27826       else
27827         {
27828           /* In Thumb-2 mode, the frame pointer points to the last saved
27829              register.  */
27830           amount = offsets->locals_base - offsets->saved_regs;
27831           if (amount)
27832             {
27833               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27834                                 hard_frame_pointer_rtx,
27835                                 GEN_INT (amount)));
27836               arm_add_cfa_adjust_cfa_note (insn, amount,
27837                                            hard_frame_pointer_rtx,
27838                                            hard_frame_pointer_rtx);
27839             }
27840
27841           /* Force out any pending memory operations that reference stacked data
27842              before stack de-allocation occurs.  */
27843           emit_insn (gen_blockage ());
27844           insn = emit_insn (gen_movsi (stack_pointer_rtx,
27845                                        hard_frame_pointer_rtx));
27846           arm_add_cfa_adjust_cfa_note (insn, 0,
27847                                        stack_pointer_rtx,
27848                                        hard_frame_pointer_rtx);
27849           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27850              deleted.  */
27851           emit_insn (gen_force_register_use (stack_pointer_rtx));
27852         }
27853     }
27854   else
27855     {
27856       /* Pop off outgoing args and local frame to adjust stack pointer to
27857          last saved register.  */
27858       amount = offsets->outgoing_args - offsets->saved_regs;
27859       if (amount)
27860         {
27861           rtx_insn *tmp;
27862           /* Force out any pending memory operations that reference stacked data
27863              before stack de-allocation occurs.  */
27864           emit_insn (gen_blockage ());
27865           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27866                                        stack_pointer_rtx,
27867                                        GEN_INT (amount)));
27868           arm_add_cfa_adjust_cfa_note (tmp, amount,
27869                                        stack_pointer_rtx, stack_pointer_rtx);
27870           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27871              not deleted.  */
27872           emit_insn (gen_force_register_use (stack_pointer_rtx));
27873         }
27874     }
27875
27876   if (TARGET_HARD_FLOAT && TARGET_VFP)
27877     {
27878       /* Generate VFP register multi-pop.  */
27879       int end_reg = LAST_VFP_REGNUM + 1;
27880
27881       /* Scan the registers in reverse order.  We need to match
27882          any groupings made in the prologue and generate matching
27883          vldm operations.  The need to match groups is because,
27884          unlike pop, vldm can only do consecutive regs.  */
27885       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27886         /* Look for a case where a reg does not need restoring.  */
27887         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27888             && (!df_regs_ever_live_p (i + 1)
27889                 || call_used_regs[i + 1]))
27890           {
27891             /* Restore the regs discovered so far (from reg+2 to
27892                end_reg).  */
27893             if (end_reg > i + 2)
27894               arm_emit_vfp_multi_reg_pop (i + 2,
27895                                           (end_reg - (i + 2)) / 2,
27896                                           stack_pointer_rtx);
27897             end_reg = i;
27898           }
27899
27900       /* Restore the remaining regs that we have discovered (or possibly
27901          even all of them, if the conditional in the for loop never
27902          fired).  */
27903       if (end_reg > i + 2)
27904         arm_emit_vfp_multi_reg_pop (i + 2,
27905                                     (end_reg - (i + 2)) / 2,
27906                                     stack_pointer_rtx);
27907     }
27908
27909   if (TARGET_IWMMXT)
27910     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27911       if (df_regs_ever_live_p (i) && !call_used_regs[i])
27912         {
27913           rtx_insn *insn;
27914           rtx addr = gen_rtx_MEM (V2SImode,
27915                                   gen_rtx_POST_INC (SImode,
27916                                                     stack_pointer_rtx));
27917           set_mem_alias_set (addr, get_frame_alias_set ());
27918           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27919           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27920                                              gen_rtx_REG (V2SImode, i),
27921                                              NULL_RTX);
27922           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27923                                        stack_pointer_rtx, stack_pointer_rtx);
27924         }
27925
27926   if (saved_regs_mask)
27927     {
27928       rtx insn;
27929       bool return_in_pc = false;
27930
27931       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27932           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27933           && !IS_STACKALIGN (func_type)
27934           && really_return
27935           && crtl->args.pretend_args_size == 0
27936           && saved_regs_mask & (1 << LR_REGNUM)
27937           && !crtl->calls_eh_return)
27938         {
27939           saved_regs_mask &= ~(1 << LR_REGNUM);
27940           saved_regs_mask |= (1 << PC_REGNUM);
27941           return_in_pc = true;
27942         }
27943
27944       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27945         {
27946           for (i = 0; i <= LAST_ARM_REGNUM; i++)
27947             if (saved_regs_mask & (1 << i))
27948               {
27949                 rtx addr = gen_rtx_MEM (SImode,
27950                                         gen_rtx_POST_INC (SImode,
27951                                                           stack_pointer_rtx));
27952                 set_mem_alias_set (addr, get_frame_alias_set ());
27953
27954                 if (i == PC_REGNUM)
27955                   {
27956                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27957                     XVECEXP (insn, 0, 0) = ret_rtx;
27958                     XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27959                                                         gen_rtx_REG (SImode, i),
27960                                                         addr);
27961                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27962                     insn = emit_jump_insn (insn);
27963                   }
27964                 else
27965                   {
27966                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27967                                                  addr));
27968                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27969                                                        gen_rtx_REG (SImode, i),
27970                                                        NULL_RTX);
27971                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27972                                                  stack_pointer_rtx,
27973                                                  stack_pointer_rtx);
27974                   }
27975               }
27976         }
27977       else
27978         {
27979           if (TARGET_LDRD
27980               && current_tune->prefer_ldrd_strd
27981               && !optimize_function_for_size_p (cfun))
27982             {
27983               if (TARGET_THUMB2)
27984                 thumb2_emit_ldrd_pop (saved_regs_mask);
27985               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27986                 arm_emit_ldrd_pop (saved_regs_mask);
27987               else
27988                 arm_emit_multi_reg_pop (saved_regs_mask);
27989             }
27990           else
27991             arm_emit_multi_reg_pop (saved_regs_mask);
27992         }
27993
27994       if (return_in_pc == true)
27995         return;
27996     }
27997
27998   if (crtl->args.pretend_args_size)
27999     {
28000       int i, j;
28001       rtx dwarf = NULL_RTX;
28002       rtx_insn *tmp =
28003         emit_insn (gen_addsi3 (stack_pointer_rtx,
28004                                stack_pointer_rtx,
28005                                GEN_INT (crtl->args.pretend_args_size)));
28006
28007       RTX_FRAME_RELATED_P (tmp) = 1;
28008
28009       if (cfun->machine->uses_anonymous_args)
28010         {
28011           /* Restore pretend args.  Refer arm_expand_prologue on how to save
28012              pretend_args in stack.  */
28013           int num_regs = crtl->args.pretend_args_size / 4;
28014           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28015           for (j = 0, i = 0; j < num_regs; i++)
28016             if (saved_regs_mask & (1 << i))
28017               {
28018                 rtx reg = gen_rtx_REG (SImode, i);
28019                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28020                 j++;
28021               }
28022           REG_NOTES (tmp) = dwarf;
28023         }
28024       arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
28025                                    stack_pointer_rtx, stack_pointer_rtx);
28026     }
28027
28028   if (!really_return)
28029     return;
28030
28031   if (crtl->calls_eh_return)
28032     emit_insn (gen_addsi3 (stack_pointer_rtx,
28033                            stack_pointer_rtx,
28034                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28035
28036   if (IS_STACKALIGN (func_type))
28037     /* Restore the original stack pointer.  Before prologue, the stack was
28038        realigned and the original stack pointer saved in r0.  For details,
28039        see comment in arm_expand_prologue.  */
28040     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
28041
28042   emit_jump_insn (simple_return_rtx);
28043 }
28044
28045 /* Implementation of insn prologue_thumb1_interwork.  This is the first
28046    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
28047
28048 const char *
28049 thumb1_output_interwork (void)
28050 {
28051   const char * name;
28052   FILE *f = asm_out_file;
28053
28054   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28055   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28056               == SYMBOL_REF);
28057   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28058
28059   /* Generate code sequence to switch us into Thumb mode.  */
28060   /* The .code 32 directive has already been emitted by
28061      ASM_DECLARE_FUNCTION_NAME.  */
28062   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28063   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28064
28065   /* Generate a label, so that the debugger will notice the
28066      change in instruction sets.  This label is also used by
28067      the assembler to bypass the ARM code when this function
28068      is called from a Thumb encoded function elsewhere in the
28069      same file.  Hence the definition of STUB_NAME here must
28070      agree with the definition in gas/config/tc-arm.c.  */
28071
28072 #define STUB_NAME ".real_start_of"
28073
28074   fprintf (f, "\t.code\t16\n");
28075 #ifdef ARM_PE
28076   if (arm_dllexport_name_p (name))
28077     name = arm_strip_name_encoding (name);
28078 #endif
28079   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28080   fprintf (f, "\t.thumb_func\n");
28081   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28082
28083   return "";
28084 }
28085
28086 /* Handle the case of a double word load into a low register from
28087    a computed memory address.  The computed address may involve a
28088    register which is overwritten by the load.  */
28089 const char *
28090 thumb_load_double_from_address (rtx *operands)
28091 {
28092   rtx addr;
28093   rtx base;
28094   rtx offset;
28095   rtx arg1;
28096   rtx arg2;
28097
28098   gcc_assert (REG_P (operands[0]));
28099   gcc_assert (MEM_P (operands[1]));
28100
28101   /* Get the memory address.  */
28102   addr = XEXP (operands[1], 0);
28103
28104   /* Work out how the memory address is computed.  */
28105   switch (GET_CODE (addr))
28106     {
28107     case REG:
28108       operands[2] = adjust_address (operands[1], SImode, 4);
28109
28110       if (REGNO (operands[0]) == REGNO (addr))
28111         {
28112           output_asm_insn ("ldr\t%H0, %2", operands);
28113           output_asm_insn ("ldr\t%0, %1", operands);
28114         }
28115       else
28116         {
28117           output_asm_insn ("ldr\t%0, %1", operands);
28118           output_asm_insn ("ldr\t%H0, %2", operands);
28119         }
28120       break;
28121
28122     case CONST:
28123       /* Compute <address> + 4 for the high order load.  */
28124       operands[2] = adjust_address (operands[1], SImode, 4);
28125
28126       output_asm_insn ("ldr\t%0, %1", operands);
28127       output_asm_insn ("ldr\t%H0, %2", operands);
28128       break;
28129
28130     case PLUS:
28131       arg1   = XEXP (addr, 0);
28132       arg2   = XEXP (addr, 1);
28133
28134       if (CONSTANT_P (arg1))
28135         base = arg2, offset = arg1;
28136       else
28137         base = arg1, offset = arg2;
28138
28139       gcc_assert (REG_P (base));
28140
28141       /* Catch the case of <address> = <reg> + <reg> */
28142       if (REG_P (offset))
28143         {
28144           int reg_offset = REGNO (offset);
28145           int reg_base   = REGNO (base);
28146           int reg_dest   = REGNO (operands[0]);
28147
28148           /* Add the base and offset registers together into the
28149              higher destination register.  */
28150           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28151                        reg_dest + 1, reg_base, reg_offset);
28152
28153           /* Load the lower destination register from the address in
28154              the higher destination register.  */
28155           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28156                        reg_dest, reg_dest + 1);
28157
28158           /* Load the higher destination register from its own address
28159              plus 4.  */
28160           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28161                        reg_dest + 1, reg_dest + 1);
28162         }
28163       else
28164         {
28165           /* Compute <address> + 4 for the high order load.  */
28166           operands[2] = adjust_address (operands[1], SImode, 4);
28167
28168           /* If the computed address is held in the low order register
28169              then load the high order register first, otherwise always
28170              load the low order register first.  */
28171           if (REGNO (operands[0]) == REGNO (base))
28172             {
28173               output_asm_insn ("ldr\t%H0, %2", operands);
28174               output_asm_insn ("ldr\t%0, %1", operands);
28175             }
28176           else
28177             {
28178               output_asm_insn ("ldr\t%0, %1", operands);
28179               output_asm_insn ("ldr\t%H0, %2", operands);
28180             }
28181         }
28182       break;
28183
28184     case LABEL_REF:
28185       /* With no registers to worry about we can just load the value
28186          directly.  */
28187       operands[2] = adjust_address (operands[1], SImode, 4);
28188
28189       output_asm_insn ("ldr\t%H0, %2", operands);
28190       output_asm_insn ("ldr\t%0, %1", operands);
28191       break;
28192
28193     default:
28194       gcc_unreachable ();
28195     }
28196
28197   return "";
28198 }
28199
28200 const char *
28201 thumb_output_move_mem_multiple (int n, rtx *operands)
28202 {
28203   rtx tmp;
28204
28205   switch (n)
28206     {
28207     case 2:
28208       if (REGNO (operands[4]) > REGNO (operands[5]))
28209         {
28210           tmp = operands[4];
28211           operands[4] = operands[5];
28212           operands[5] = tmp;
28213         }
28214       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28215       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28216       break;
28217
28218     case 3:
28219       if (REGNO (operands[4]) > REGNO (operands[5]))
28220         {
28221           tmp = operands[4];
28222           operands[4] = operands[5];
28223           operands[5] = tmp;
28224         }
28225       if (REGNO (operands[5]) > REGNO (operands[6]))
28226         {
28227           tmp = operands[5];
28228           operands[5] = operands[6];
28229           operands[6] = tmp;
28230         }
28231       if (REGNO (operands[4]) > REGNO (operands[5]))
28232         {
28233           tmp = operands[4];
28234           operands[4] = operands[5];
28235           operands[5] = tmp;
28236         }
28237
28238       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28239       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28240       break;
28241
28242     default:
28243       gcc_unreachable ();
28244     }
28245
28246   return "";
28247 }
28248
28249 /* Output a call-via instruction for thumb state.  */
28250 const char *
28251 thumb_call_via_reg (rtx reg)
28252 {
28253   int regno = REGNO (reg);
28254   rtx *labelp;
28255
28256   gcc_assert (regno < LR_REGNUM);
28257
28258   /* If we are in the normal text section we can use a single instance
28259      per compilation unit.  If we are doing function sections, then we need
28260      an entry per section, since we can't rely on reachability.  */
28261   if (in_section == text_section)
28262     {
28263       thumb_call_reg_needed = 1;
28264
28265       if (thumb_call_via_label[regno] == NULL)
28266         thumb_call_via_label[regno] = gen_label_rtx ();
28267       labelp = thumb_call_via_label + regno;
28268     }
28269   else
28270     {
28271       if (cfun->machine->call_via[regno] == NULL)
28272         cfun->machine->call_via[regno] = gen_label_rtx ();
28273       labelp = cfun->machine->call_via + regno;
28274     }
28275
28276   output_asm_insn ("bl\t%a0", labelp);
28277   return "";
28278 }
28279
28280 /* Routines for generating rtl.  */
28281 void
28282 thumb_expand_movmemqi (rtx *operands)
28283 {
28284   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28285   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28286   HOST_WIDE_INT len = INTVAL (operands[2]);
28287   HOST_WIDE_INT offset = 0;
28288
28289   while (len >= 12)
28290     {
28291       emit_insn (gen_movmem12b (out, in, out, in));
28292       len -= 12;
28293     }
28294
28295   if (len >= 8)
28296     {
28297       emit_insn (gen_movmem8b (out, in, out, in));
28298       len -= 8;
28299     }
28300
28301   if (len >= 4)
28302     {
28303       rtx reg = gen_reg_rtx (SImode);
28304       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28305       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28306       len -= 4;
28307       offset += 4;
28308     }
28309
28310   if (len >= 2)
28311     {
28312       rtx reg = gen_reg_rtx (HImode);
28313       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28314                                               plus_constant (Pmode, in,
28315                                                              offset))));
28316       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28317                                                                 offset)),
28318                             reg));
28319       len -= 2;
28320       offset += 2;
28321     }
28322
28323   if (len)
28324     {
28325       rtx reg = gen_reg_rtx (QImode);
28326       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28327                                               plus_constant (Pmode, in,
28328                                                              offset))));
28329       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28330                                                                 offset)),
28331                             reg));
28332     }
28333 }
28334
28335 void
28336 thumb_reload_out_hi (rtx *operands)
28337 {
28338   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28339 }
28340
28341 /* Handle reading a half-word from memory during reload.  */
28342 void
28343 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28344 {
28345   gcc_unreachable ();
28346 }
28347
28348 /* Return the length of a function name prefix
28349     that starts with the character 'c'.  */
28350 static int
28351 arm_get_strip_length (int c)
28352 {
28353   switch (c)
28354     {
28355     ARM_NAME_ENCODING_LENGTHS
28356       default: return 0;
28357     }
28358 }
28359
28360 /* Return a pointer to a function's name with any
28361    and all prefix encodings stripped from it.  */
28362 const char *
28363 arm_strip_name_encoding (const char *name)
28364 {
28365   int skip;
28366
28367   while ((skip = arm_get_strip_length (* name)))
28368     name += skip;
28369
28370   return name;
28371 }
28372
28373 /* If there is a '*' anywhere in the name's prefix, then
28374    emit the stripped name verbatim, otherwise prepend an
28375    underscore if leading underscores are being used.  */
28376 void
28377 arm_asm_output_labelref (FILE *stream, const char *name)
28378 {
28379   int skip;
28380   int verbatim = 0;
28381
28382   while ((skip = arm_get_strip_length (* name)))
28383     {
28384       verbatim |= (*name == '*');
28385       name += skip;
28386     }
28387
28388   if (verbatim)
28389     fputs (name, stream);
28390   else
28391     asm_fprintf (stream, "%U%s", name);
28392 }
28393
28394 /* This function is used to emit an EABI tag and its associated value.
28395    We emit the numerical value of the tag in case the assembler does not
28396    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28397    the tag name in a comment so that anyone reading the assembler output
28398    will know which tag is being set.
28399
28400    This function is not static because arm-c.c needs it too.  */
28401
28402 void
28403 arm_emit_eabi_attribute (const char *name, int num, int val)
28404 {
28405   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28406   if (flag_verbose_asm || flag_debug_asm)
28407     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28408   asm_fprintf (asm_out_file, "\n");
28409 }
28410
28411 static void
28412 arm_file_start (void)
28413 {
28414   int val;
28415
28416   if (TARGET_UNIFIED_ASM)
28417     asm_fprintf (asm_out_file, "\t.syntax unified\n");
28418
28419   if (TARGET_BPABI)
28420     {
28421       const char *fpu_name;
28422       if (arm_selected_arch)
28423         {
28424           /* armv7ve doesn't support any extensions.  */
28425           if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28426             {
28427               /* Keep backward compatability for assemblers
28428                  which don't support armv7ve.  */
28429               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28430               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28431               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28432               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28433               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28434             }
28435           else
28436             {
28437               const char* pos = strchr (arm_selected_arch->name, '+');
28438               if (pos)
28439                 {
28440                   char buf[15];
28441                   gcc_assert (strlen (arm_selected_arch->name)
28442                               <= sizeof (buf) / sizeof (*pos));
28443                   strncpy (buf, arm_selected_arch->name,
28444                                 (pos - arm_selected_arch->name) * sizeof (*pos));
28445                   buf[pos - arm_selected_arch->name] = '\0';
28446                   asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28447                   asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28448                 }
28449               else
28450                 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28451             }
28452         }
28453       else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28454         asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28455       else
28456         {
28457           const char* truncated_name
28458             = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28459           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28460         }
28461
28462       if (TARGET_SOFT_FLOAT)
28463         {
28464           fpu_name = "softvfp";
28465         }
28466       else
28467         {
28468           fpu_name = arm_fpu_desc->name;
28469           if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28470             {
28471               if (TARGET_HARD_FLOAT)
28472                 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28473               if (TARGET_HARD_FLOAT_ABI)
28474                 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28475             }
28476         }
28477       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28478
28479       /* Some of these attributes only apply when the corresponding features
28480          are used.  However we don't have any easy way of figuring this out.
28481          Conservatively record the setting that would have been used.  */
28482
28483       if (flag_rounding_math)
28484         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28485
28486       if (!flag_unsafe_math_optimizations)
28487         {
28488           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28489           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28490         }
28491       if (flag_signaling_nans)
28492         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28493
28494       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28495                            flag_finite_math_only ? 1 : 3);
28496
28497       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28498       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28499       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28500                                flag_short_enums ? 1 : 2);
28501
28502       /* Tag_ABI_optimization_goals.  */
28503       if (optimize_size)
28504         val = 4;
28505       else if (optimize >= 2)
28506         val = 2;
28507       else if (optimize)
28508         val = 1;
28509       else
28510         val = 6;
28511       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28512
28513       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28514                                unaligned_access);
28515
28516       if (arm_fp16_format)
28517         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28518                              (int) arm_fp16_format);
28519
28520       if (arm_lang_output_object_attributes_hook)
28521         arm_lang_output_object_attributes_hook();
28522     }
28523
28524   default_file_start ();
28525 }
28526
28527 static void
28528 arm_file_end (void)
28529 {
28530   int regno;
28531
28532   if (NEED_INDICATE_EXEC_STACK)
28533     /* Add .note.GNU-stack.  */
28534     file_end_indicate_exec_stack ();
28535
28536   if (! thumb_call_reg_needed)
28537     return;
28538
28539   switch_to_section (text_section);
28540   asm_fprintf (asm_out_file, "\t.code 16\n");
28541   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28542
28543   for (regno = 0; regno < LR_REGNUM; regno++)
28544     {
28545       rtx label = thumb_call_via_label[regno];
28546
28547       if (label != 0)
28548         {
28549           targetm.asm_out.internal_label (asm_out_file, "L",
28550                                           CODE_LABEL_NUMBER (label));
28551           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28552         }
28553     }
28554 }
28555
28556 #ifndef ARM_PE
28557 /* Symbols in the text segment can be accessed without indirecting via the
28558    constant pool; it may take an extra binary operation, but this is still
28559    faster than indirecting via memory.  Don't do this when not optimizing,
28560    since we won't be calculating al of the offsets necessary to do this
28561    simplification.  */
28562
28563 static void
28564 arm_encode_section_info (tree decl, rtx rtl, int first)
28565 {
28566   if (optimize > 0 && TREE_CONSTANT (decl))
28567     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28568
28569   default_encode_section_info (decl, rtl, first);
28570 }
28571 #endif /* !ARM_PE */
28572
28573 static void
28574 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28575 {
28576   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28577       && !strcmp (prefix, "L"))
28578     {
28579       arm_ccfsm_state = 0;
28580       arm_target_insn = NULL;
28581     }
28582   default_internal_label (stream, prefix, labelno);
28583 }
28584
28585 /* Output code to add DELTA to the first argument, and then jump
28586    to FUNCTION.  Used for C++ multiple inheritance.  */
28587 static void
28588 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28589                      HOST_WIDE_INT delta,
28590                      HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28591                      tree function)
28592 {
28593   static int thunk_label = 0;
28594   char label[256];
28595   char labelpc[256];
28596   int mi_delta = delta;
28597   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28598   int shift = 0;
28599   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28600                     ? 1 : 0);
28601   if (mi_delta < 0)
28602     mi_delta = - mi_delta;
28603
28604   final_start_function (emit_barrier (), file, 1);
28605
28606   if (TARGET_THUMB1)
28607     {
28608       int labelno = thunk_label++;
28609       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28610       /* Thunks are entered in arm mode when avaiable.  */
28611       if (TARGET_THUMB1_ONLY)
28612         {
28613           /* push r3 so we can use it as a temporary.  */
28614           /* TODO: Omit this save if r3 is not used.  */
28615           fputs ("\tpush {r3}\n", file);
28616           fputs ("\tldr\tr3, ", file);
28617         }
28618       else
28619         {
28620           fputs ("\tldr\tr12, ", file);
28621         }
28622       assemble_name (file, label);
28623       fputc ('\n', file);
28624       if (flag_pic)
28625         {
28626           /* If we are generating PIC, the ldr instruction below loads
28627              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
28628              the address of the add + 8, so we have:
28629
28630              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28631                  = target + 1.
28632
28633              Note that we have "+ 1" because some versions of GNU ld
28634              don't set the low bit of the result for R_ARM_REL32
28635              relocations against thumb function symbols.
28636              On ARMv6M this is +4, not +8.  */
28637           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28638           assemble_name (file, labelpc);
28639           fputs (":\n", file);
28640           if (TARGET_THUMB1_ONLY)
28641             {
28642               /* This is 2 insns after the start of the thunk, so we know it
28643                  is 4-byte aligned.  */
28644               fputs ("\tadd\tr3, pc, r3\n", file);
28645               fputs ("\tmov r12, r3\n", file);
28646             }
28647           else
28648             fputs ("\tadd\tr12, pc, r12\n", file);
28649         }
28650       else if (TARGET_THUMB1_ONLY)
28651         fputs ("\tmov r12, r3\n", file);
28652     }
28653   if (TARGET_THUMB1_ONLY)
28654     {
28655       if (mi_delta > 255)
28656         {
28657           fputs ("\tldr\tr3, ", file);
28658           assemble_name (file, label);
28659           fputs ("+4\n", file);
28660           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28661                        mi_op, this_regno, this_regno);
28662         }
28663       else if (mi_delta != 0)
28664         {
28665           /* Thumb1 unified syntax requires s suffix in instruction name when
28666              one of the operands is immediate.  */
28667           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28668                        mi_op, this_regno, this_regno,
28669                        mi_delta);
28670         }
28671     }
28672   else
28673     {
28674       /* TODO: Use movw/movt for large constants when available.  */
28675       while (mi_delta != 0)
28676         {
28677           if ((mi_delta & (3 << shift)) == 0)
28678             shift += 2;
28679           else
28680             {
28681               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28682                            mi_op, this_regno, this_regno,
28683                            mi_delta & (0xff << shift));
28684               mi_delta &= ~(0xff << shift);
28685               shift += 8;
28686             }
28687         }
28688     }
28689   if (TARGET_THUMB1)
28690     {
28691       if (TARGET_THUMB1_ONLY)
28692         fputs ("\tpop\t{r3}\n", file);
28693
28694       fprintf (file, "\tbx\tr12\n");
28695       ASM_OUTPUT_ALIGN (file, 2);
28696       assemble_name (file, label);
28697       fputs (":\n", file);
28698       if (flag_pic)
28699         {
28700           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
28701           rtx tem = XEXP (DECL_RTL (function), 0);
28702           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28703              pipeline offset is four rather than eight.  Adjust the offset
28704              accordingly.  */
28705           tem = plus_constant (GET_MODE (tem), tem,
28706                                TARGET_THUMB1_ONLY ? -3 : -7);
28707           tem = gen_rtx_MINUS (GET_MODE (tem),
28708                                tem,
28709                                gen_rtx_SYMBOL_REF (Pmode,
28710                                                    ggc_strdup (labelpc)));
28711           assemble_integer (tem, 4, BITS_PER_WORD, 1);
28712         }
28713       else
28714         /* Output ".word .LTHUNKn".  */
28715         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28716
28717       if (TARGET_THUMB1_ONLY && mi_delta > 255)
28718         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28719     }
28720   else
28721     {
28722       fputs ("\tb\t", file);
28723       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28724       if (NEED_PLT_RELOC)
28725         fputs ("(PLT)", file);
28726       fputc ('\n', file);
28727     }
28728
28729   final_end_function ();
28730 }
28731
28732 int
28733 arm_emit_vector_const (FILE *file, rtx x)
28734 {
28735   int i;
28736   const char * pattern;
28737
28738   gcc_assert (GET_CODE (x) == CONST_VECTOR);
28739
28740   switch (GET_MODE (x))
28741     {
28742     case V2SImode: pattern = "%08x"; break;
28743     case V4HImode: pattern = "%04x"; break;
28744     case V8QImode: pattern = "%02x"; break;
28745     default:       gcc_unreachable ();
28746     }
28747
28748   fprintf (file, "0x");
28749   for (i = CONST_VECTOR_NUNITS (x); i--;)
28750     {
28751       rtx element;
28752
28753       element = CONST_VECTOR_ELT (x, i);
28754       fprintf (file, pattern, INTVAL (element));
28755     }
28756
28757   return 1;
28758 }
28759
28760 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28761    HFmode constant pool entries are actually loaded with ldr.  */
28762 void
28763 arm_emit_fp16_const (rtx c)
28764 {
28765   REAL_VALUE_TYPE r;
28766   long bits;
28767
28768   REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28769   bits = real_to_target (NULL, &r, HFmode);
28770   if (WORDS_BIG_ENDIAN)
28771     assemble_zeros (2);
28772   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28773   if (!WORDS_BIG_ENDIAN)
28774     assemble_zeros (2);
28775 }
28776
28777 const char *
28778 arm_output_load_gr (rtx *operands)
28779 {
28780   rtx reg;
28781   rtx offset;
28782   rtx wcgr;
28783   rtx sum;
28784
28785   if (!MEM_P (operands [1])
28786       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28787       || !REG_P (reg = XEXP (sum, 0))
28788       || !CONST_INT_P (offset = XEXP (sum, 1))
28789       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28790     return "wldrw%?\t%0, %1";
28791
28792   /* Fix up an out-of-range load of a GR register.  */
28793   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28794   wcgr = operands[0];
28795   operands[0] = reg;
28796   output_asm_insn ("ldr%?\t%0, %1", operands);
28797
28798   operands[0] = wcgr;
28799   operands[1] = reg;
28800   output_asm_insn ("tmcr%?\t%0, %1", operands);
28801   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28802
28803   return "";
28804 }
28805
28806 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28807
28808    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28809    named arg and all anonymous args onto the stack.
28810    XXX I know the prologue shouldn't be pushing registers, but it is faster
28811    that way.  */
28812
28813 static void
28814 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28815                             machine_mode mode,
28816                             tree type,
28817                             int *pretend_size,
28818                             int second_time ATTRIBUTE_UNUSED)
28819 {
28820   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28821   int nregs;
28822
28823   cfun->machine->uses_anonymous_args = 1;
28824   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28825     {
28826       nregs = pcum->aapcs_ncrn;
28827       if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28828         nregs++;
28829     }
28830   else
28831     nregs = pcum->nregs;
28832
28833   if (nregs < NUM_ARG_REGS)
28834     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28835 }
28836
28837 /* We can't rely on the caller doing the proper promotion when
28838    using APCS or ATPCS.  */
28839
28840 static bool
28841 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28842 {
28843     return !TARGET_AAPCS_BASED;
28844 }
28845
28846 static machine_mode
28847 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28848                            machine_mode mode,
28849                            int *punsignedp ATTRIBUTE_UNUSED,
28850                            const_tree fntype ATTRIBUTE_UNUSED,
28851                            int for_return ATTRIBUTE_UNUSED)
28852 {
28853   if (GET_MODE_CLASS (mode) == MODE_INT
28854       && GET_MODE_SIZE (mode) < 4)
28855     return SImode;
28856
28857   return mode;
28858 }
28859
28860 /* AAPCS based ABIs use short enums by default.  */
28861
28862 static bool
28863 arm_default_short_enums (void)
28864 {
28865   return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28866 }
28867
28868
28869 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
28870
28871 static bool
28872 arm_align_anon_bitfield (void)
28873 {
28874   return TARGET_AAPCS_BASED;
28875 }
28876
28877
28878 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
28879
28880 static tree
28881 arm_cxx_guard_type (void)
28882 {
28883   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28884 }
28885
28886
28887 /* The EABI says test the least significant bit of a guard variable.  */
28888
28889 static bool
28890 arm_cxx_guard_mask_bit (void)
28891 {
28892   return TARGET_AAPCS_BASED;
28893 }
28894
28895
28896 /* The EABI specifies that all array cookies are 8 bytes long.  */
28897
28898 static tree
28899 arm_get_cookie_size (tree type)
28900 {
28901   tree size;
28902
28903   if (!TARGET_AAPCS_BASED)
28904     return default_cxx_get_cookie_size (type);
28905
28906   size = build_int_cst (sizetype, 8);
28907   return size;
28908 }
28909
28910
28911 /* The EABI says that array cookies should also contain the element size.  */
28912
28913 static bool
28914 arm_cookie_has_size (void)
28915 {
28916   return TARGET_AAPCS_BASED;
28917 }
28918
28919
28920 /* The EABI says constructors and destructors should return a pointer to
28921    the object constructed/destroyed.  */
28922
28923 static bool
28924 arm_cxx_cdtor_returns_this (void)
28925 {
28926   return TARGET_AAPCS_BASED;
28927 }
28928
28929 /* The EABI says that an inline function may never be the key
28930    method.  */
28931
28932 static bool
28933 arm_cxx_key_method_may_be_inline (void)
28934 {
28935   return !TARGET_AAPCS_BASED;
28936 }
28937
28938 static void
28939 arm_cxx_determine_class_data_visibility (tree decl)
28940 {
28941   if (!TARGET_AAPCS_BASED
28942       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28943     return;
28944
28945   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28946      is exported.  However, on systems without dynamic vague linkage,
28947      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
28948   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28949     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28950   else
28951     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28952   DECL_VISIBILITY_SPECIFIED (decl) = 1;
28953 }
28954
28955 static bool
28956 arm_cxx_class_data_always_comdat (void)
28957 {
28958   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28959      vague linkage if the class has no key function.  */
28960   return !TARGET_AAPCS_BASED;
28961 }
28962
28963
28964 /* The EABI says __aeabi_atexit should be used to register static
28965    destructors.  */
28966
28967 static bool
28968 arm_cxx_use_aeabi_atexit (void)
28969 {
28970   return TARGET_AAPCS_BASED;
28971 }
28972
28973
28974 void
28975 arm_set_return_address (rtx source, rtx scratch)
28976 {
28977   arm_stack_offsets *offsets;
28978   HOST_WIDE_INT delta;
28979   rtx addr;
28980   unsigned long saved_regs;
28981
28982   offsets = arm_get_frame_offsets ();
28983   saved_regs = offsets->saved_regs_mask;
28984
28985   if ((saved_regs & (1 << LR_REGNUM)) == 0)
28986     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28987   else
28988     {
28989       if (frame_pointer_needed)
28990         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28991       else
28992         {
28993           /* LR will be the first saved register.  */
28994           delta = offsets->outgoing_args - (offsets->frame + 4);
28995
28996
28997           if (delta >= 4096)
28998             {
28999               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29000                                      GEN_INT (delta & ~4095)));
29001               addr = scratch;
29002               delta &= 4095;
29003             }
29004           else
29005             addr = stack_pointer_rtx;
29006
29007           addr = plus_constant (Pmode, addr, delta);
29008         }
29009       /* The store needs to be marked as frame related in order to prevent
29010          DSE from deleting it as dead if it is based on fp.  */
29011       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
29012       RTX_FRAME_RELATED_P (insn) = 1;
29013       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
29014     }
29015 }
29016
29017
29018 void
29019 thumb_set_return_address (rtx source, rtx scratch)
29020 {
29021   arm_stack_offsets *offsets;
29022   HOST_WIDE_INT delta;
29023   HOST_WIDE_INT limit;
29024   int reg;
29025   rtx addr;
29026   unsigned long mask;
29027
29028   emit_use (source);
29029
29030   offsets = arm_get_frame_offsets ();
29031   mask = offsets->saved_regs_mask;
29032   if (mask & (1 << LR_REGNUM))
29033     {
29034       limit = 1024;
29035       /* Find the saved regs.  */
29036       if (frame_pointer_needed)
29037         {
29038           delta = offsets->soft_frame - offsets->saved_args;
29039           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29040           if (TARGET_THUMB1)
29041             limit = 128;
29042         }
29043       else
29044         {
29045           delta = offsets->outgoing_args - offsets->saved_args;
29046           reg = SP_REGNUM;
29047         }
29048       /* Allow for the stack frame.  */
29049       if (TARGET_THUMB1 && TARGET_BACKTRACE)
29050         delta -= 16;
29051       /* The link register is always the first saved register.  */
29052       delta -= 4;
29053
29054       /* Construct the address.  */
29055       addr = gen_rtx_REG (SImode, reg);
29056       if (delta > limit)
29057         {
29058           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29059           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29060           addr = scratch;
29061         }
29062       else
29063         addr = plus_constant (Pmode, addr, delta);
29064
29065       /* The store needs to be marked as frame related in order to prevent
29066          DSE from deleting it as dead if it is based on fp.  */
29067       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
29068       RTX_FRAME_RELATED_P (insn) = 1;
29069       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
29070     }
29071   else
29072     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29073 }
29074
29075 /* Implements target hook vector_mode_supported_p.  */
29076 bool
29077 arm_vector_mode_supported_p (machine_mode mode)
29078 {
29079   /* Neon also supports V2SImode, etc. listed in the clause below.  */
29080   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29081       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
29082     return true;
29083
29084   if ((TARGET_NEON || TARGET_IWMMXT)
29085       && ((mode == V2SImode)
29086           || (mode == V4HImode)
29087           || (mode == V8QImode)))
29088     return true;
29089
29090   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29091       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29092       || mode == V2HAmode))
29093     return true;
29094
29095   return false;
29096 }
29097
29098 /* Implements target hook array_mode_supported_p.  */
29099
29100 static bool
29101 arm_array_mode_supported_p (machine_mode mode,
29102                             unsigned HOST_WIDE_INT nelems)
29103 {
29104   if (TARGET_NEON
29105       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29106       && (nelems >= 2 && nelems <= 4))
29107     return true;
29108
29109   return false;
29110 }
29111
29112 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29113    registers when autovectorizing for Neon, at least until multiple vector
29114    widths are supported properly by the middle-end.  */
29115
29116 static machine_mode
29117 arm_preferred_simd_mode (machine_mode mode)
29118 {
29119   if (TARGET_NEON)
29120     switch (mode)
29121       {
29122       case SFmode:
29123         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29124       case SImode:
29125         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29126       case HImode:
29127         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29128       case QImode:
29129         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29130       case DImode:
29131         if (!TARGET_NEON_VECTORIZE_DOUBLE)
29132           return V2DImode;
29133         break;
29134
29135       default:;
29136       }
29137
29138   if (TARGET_REALLY_IWMMXT)
29139     switch (mode)
29140       {
29141       case SImode:
29142         return V2SImode;
29143       case HImode:
29144         return V4HImode;
29145       case QImode:
29146         return V8QImode;
29147
29148       default:;
29149       }
29150
29151   return word_mode;
29152 }
29153
29154 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29155
29156    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
29157    using r0-r4 for function arguments, r7 for the stack frame and don't have
29158    enough left over to do doubleword arithmetic.  For Thumb-2 all the
29159    potentially problematic instructions accept high registers so this is not
29160    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29161    that require many low registers.  */
29162 static bool
29163 arm_class_likely_spilled_p (reg_class_t rclass)
29164 {
29165   if ((TARGET_THUMB1 && rclass == LO_REGS)
29166       || rclass  == CC_REG)
29167     return true;
29168
29169   return false;
29170 }
29171
29172 /* Implements target hook small_register_classes_for_mode_p.  */
29173 bool
29174 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29175 {
29176   return TARGET_THUMB1;
29177 }
29178
29179 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29180    ARM insns and therefore guarantee that the shift count is modulo 256.
29181    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29182    guarantee no particular behavior for out-of-range counts.  */
29183
29184 static unsigned HOST_WIDE_INT
29185 arm_shift_truncation_mask (machine_mode mode)
29186 {
29187   return mode == SImode ? 255 : 0;
29188 }
29189
29190
29191 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29192
29193 unsigned int
29194 arm_dbx_register_number (unsigned int regno)
29195 {
29196   if (regno < 16)
29197     return regno;
29198
29199   if (IS_VFP_REGNUM (regno))
29200     {
29201       /* See comment in arm_dwarf_register_span.  */
29202       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29203         return 64 + regno - FIRST_VFP_REGNUM;
29204       else
29205         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29206     }
29207
29208   if (IS_IWMMXT_GR_REGNUM (regno))
29209     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29210
29211   if (IS_IWMMXT_REGNUM (regno))
29212     return 112 + regno - FIRST_IWMMXT_REGNUM;
29213
29214   gcc_unreachable ();
29215 }
29216
29217 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29218    GCC models tham as 64 32-bit registers, so we need to describe this to
29219    the DWARF generation code.  Other registers can use the default.  */
29220 static rtx
29221 arm_dwarf_register_span (rtx rtl)
29222 {
29223   machine_mode mode;
29224   unsigned regno;
29225   rtx parts[16];
29226   int nregs;
29227   int i;
29228
29229   regno = REGNO (rtl);
29230   if (!IS_VFP_REGNUM (regno))
29231     return NULL_RTX;
29232
29233   /* XXX FIXME: The EABI defines two VFP register ranges:
29234         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29235         256-287: D0-D31
29236      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29237      corresponding D register.  Until GDB supports this, we shall use the
29238      legacy encodings.  We also use these encodings for D0-D15 for
29239      compatibility with older debuggers.  */
29240   mode = GET_MODE (rtl);
29241   if (GET_MODE_SIZE (mode) < 8)
29242     return NULL_RTX;
29243
29244   if (VFP_REGNO_OK_FOR_SINGLE (regno))
29245     {
29246       nregs = GET_MODE_SIZE (mode) / 4;
29247       for (i = 0; i < nregs; i += 2)
29248         if (TARGET_BIG_END)
29249           {
29250             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29251             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29252           }
29253         else
29254           {
29255             parts[i] = gen_rtx_REG (SImode, regno + i);
29256             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29257           }
29258     }
29259   else
29260     {
29261       nregs = GET_MODE_SIZE (mode) / 8;
29262       for (i = 0; i < nregs; i++)
29263         parts[i] = gen_rtx_REG (DImode, regno + i);
29264     }
29265
29266   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29267 }
29268
29269 #if ARM_UNWIND_INFO
29270 /* Emit unwind directives for a store-multiple instruction or stack pointer
29271    push during alignment.
29272    These should only ever be generated by the function prologue code, so
29273    expect them to have a particular form.
29274    The store-multiple instruction sometimes pushes pc as the last register,
29275    although it should not be tracked into unwind information, or for -Os
29276    sometimes pushes some dummy registers before first register that needs
29277    to be tracked in unwind information; such dummy registers are there just
29278    to avoid separate stack adjustment, and will not be restored in the
29279    epilogue.  */
29280
29281 static void
29282 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29283 {
29284   int i;
29285   HOST_WIDE_INT offset;
29286   HOST_WIDE_INT nregs;
29287   int reg_size;
29288   unsigned reg;
29289   unsigned lastreg;
29290   unsigned padfirst = 0, padlast = 0;
29291   rtx e;
29292
29293   e = XVECEXP (p, 0, 0);
29294   gcc_assert (GET_CODE (e) == SET);
29295
29296   /* First insn will adjust the stack pointer.  */
29297   gcc_assert (GET_CODE (e) == SET
29298               && REG_P (SET_DEST (e))
29299               && REGNO (SET_DEST (e)) == SP_REGNUM
29300               && GET_CODE (SET_SRC (e)) == PLUS);
29301
29302   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29303   nregs = XVECLEN (p, 0) - 1;
29304   gcc_assert (nregs);
29305
29306   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29307   if (reg < 16)
29308     {
29309       /* For -Os dummy registers can be pushed at the beginning to
29310          avoid separate stack pointer adjustment.  */
29311       e = XVECEXP (p, 0, 1);
29312       e = XEXP (SET_DEST (e), 0);
29313       if (GET_CODE (e) == PLUS)
29314         padfirst = INTVAL (XEXP (e, 1));
29315       gcc_assert (padfirst == 0 || optimize_size);
29316       /* The function prologue may also push pc, but not annotate it as it is
29317          never restored.  We turn this into a stack pointer adjustment.  */
29318       e = XVECEXP (p, 0, nregs);
29319       e = XEXP (SET_DEST (e), 0);
29320       if (GET_CODE (e) == PLUS)
29321         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29322       else
29323         padlast = offset - 4;
29324       gcc_assert (padlast == 0 || padlast == 4);
29325       if (padlast == 4)
29326         fprintf (asm_out_file, "\t.pad #4\n");
29327       reg_size = 4;
29328       fprintf (asm_out_file, "\t.save {");
29329     }
29330   else if (IS_VFP_REGNUM (reg))
29331     {
29332       reg_size = 8;
29333       fprintf (asm_out_file, "\t.vsave {");
29334     }
29335   else
29336     /* Unknown register type.  */
29337     gcc_unreachable ();
29338
29339   /* If the stack increment doesn't match the size of the saved registers,
29340      something has gone horribly wrong.  */
29341   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29342
29343   offset = padfirst;
29344   lastreg = 0;
29345   /* The remaining insns will describe the stores.  */
29346   for (i = 1; i <= nregs; i++)
29347     {
29348       /* Expect (set (mem <addr>) (reg)).
29349          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29350       e = XVECEXP (p, 0, i);
29351       gcc_assert (GET_CODE (e) == SET
29352                   && MEM_P (SET_DEST (e))
29353                   && REG_P (SET_SRC (e)));
29354
29355       reg = REGNO (SET_SRC (e));
29356       gcc_assert (reg >= lastreg);
29357
29358       if (i != 1)
29359         fprintf (asm_out_file, ", ");
29360       /* We can't use %r for vfp because we need to use the
29361          double precision register names.  */
29362       if (IS_VFP_REGNUM (reg))
29363         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29364       else
29365         asm_fprintf (asm_out_file, "%r", reg);
29366
29367 #ifdef ENABLE_CHECKING
29368       /* Check that the addresses are consecutive.  */
29369       e = XEXP (SET_DEST (e), 0);
29370       if (GET_CODE (e) == PLUS)
29371         gcc_assert (REG_P (XEXP (e, 0))
29372                     && REGNO (XEXP (e, 0)) == SP_REGNUM
29373                     && CONST_INT_P (XEXP (e, 1))
29374                     && offset == INTVAL (XEXP (e, 1)));
29375       else
29376         gcc_assert (i == 1
29377                     && REG_P (e)
29378                     && REGNO (e) == SP_REGNUM);
29379       offset += reg_size;
29380 #endif
29381     }
29382   fprintf (asm_out_file, "}\n");
29383   if (padfirst)
29384     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29385 }
29386
29387 /*  Emit unwind directives for a SET.  */
29388
29389 static void
29390 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29391 {
29392   rtx e0;
29393   rtx e1;
29394   unsigned reg;
29395
29396   e0 = XEXP (p, 0);
29397   e1 = XEXP (p, 1);
29398   switch (GET_CODE (e0))
29399     {
29400     case MEM:
29401       /* Pushing a single register.  */
29402       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29403           || !REG_P (XEXP (XEXP (e0, 0), 0))
29404           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29405         abort ();
29406
29407       asm_fprintf (asm_out_file, "\t.save ");
29408       if (IS_VFP_REGNUM (REGNO (e1)))
29409         asm_fprintf(asm_out_file, "{d%d}\n",
29410                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29411       else
29412         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29413       break;
29414
29415     case REG:
29416       if (REGNO (e0) == SP_REGNUM)
29417         {
29418           /* A stack increment.  */
29419           if (GET_CODE (e1) != PLUS
29420               || !REG_P (XEXP (e1, 0))
29421               || REGNO (XEXP (e1, 0)) != SP_REGNUM
29422               || !CONST_INT_P (XEXP (e1, 1)))
29423             abort ();
29424
29425           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29426                        -INTVAL (XEXP (e1, 1)));
29427         }
29428       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29429         {
29430           HOST_WIDE_INT offset;
29431
29432           if (GET_CODE (e1) == PLUS)
29433             {
29434               if (!REG_P (XEXP (e1, 0))
29435                   || !CONST_INT_P (XEXP (e1, 1)))
29436                 abort ();
29437               reg = REGNO (XEXP (e1, 0));
29438               offset = INTVAL (XEXP (e1, 1));
29439               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29440                            HARD_FRAME_POINTER_REGNUM, reg,
29441                            offset);
29442             }
29443           else if (REG_P (e1))
29444             {
29445               reg = REGNO (e1);
29446               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29447                            HARD_FRAME_POINTER_REGNUM, reg);
29448             }
29449           else
29450             abort ();
29451         }
29452       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29453         {
29454           /* Move from sp to reg.  */
29455           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29456         }
29457      else if (GET_CODE (e1) == PLUS
29458               && REG_P (XEXP (e1, 0))
29459               && REGNO (XEXP (e1, 0)) == SP_REGNUM
29460               && CONST_INT_P (XEXP (e1, 1)))
29461         {
29462           /* Set reg to offset from sp.  */
29463           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29464                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29465         }
29466       else
29467         abort ();
29468       break;
29469
29470     default:
29471       abort ();
29472     }
29473 }
29474
29475
29476 /* Emit unwind directives for the given insn.  */
29477
29478 static void
29479 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29480 {
29481   rtx note, pat;
29482   bool handled_one = false;
29483
29484   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29485     return;
29486
29487   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29488       && (TREE_NOTHROW (current_function_decl)
29489           || crtl->all_throwers_are_sibcalls))
29490     return;
29491
29492   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29493     return;
29494
29495   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29496     {
29497       switch (REG_NOTE_KIND (note))
29498         {
29499         case REG_FRAME_RELATED_EXPR:
29500           pat = XEXP (note, 0);
29501           goto found;
29502
29503         case REG_CFA_REGISTER:
29504           pat = XEXP (note, 0);
29505           if (pat == NULL)
29506             {
29507               pat = PATTERN (insn);
29508               if (GET_CODE (pat) == PARALLEL)
29509                 pat = XVECEXP (pat, 0, 0);
29510             }
29511
29512           /* Only emitted for IS_STACKALIGN re-alignment.  */
29513           {
29514             rtx dest, src;
29515             unsigned reg;
29516
29517             src = SET_SRC (pat);
29518             dest = SET_DEST (pat);
29519
29520             gcc_assert (src == stack_pointer_rtx);
29521             reg = REGNO (dest);
29522             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29523                          reg + 0x90, reg);
29524           }
29525           handled_one = true;
29526           break;
29527
29528         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
29529            to get correct dwarf information for shrink-wrap.  We should not
29530            emit unwind information for it because these are used either for
29531            pretend arguments or notes to adjust sp and restore registers from
29532            stack.  */
29533         case REG_CFA_DEF_CFA:
29534         case REG_CFA_ADJUST_CFA:
29535         case REG_CFA_RESTORE:
29536           return;
29537
29538         case REG_CFA_EXPRESSION:
29539         case REG_CFA_OFFSET:
29540           /* ??? Only handling here what we actually emit.  */
29541           gcc_unreachable ();
29542
29543         default:
29544           break;
29545         }
29546     }
29547   if (handled_one)
29548     return;
29549   pat = PATTERN (insn);
29550  found:
29551
29552   switch (GET_CODE (pat))
29553     {
29554     case SET:
29555       arm_unwind_emit_set (asm_out_file, pat);
29556       break;
29557
29558     case SEQUENCE:
29559       /* Store multiple.  */
29560       arm_unwind_emit_sequence (asm_out_file, pat);
29561       break;
29562
29563     default:
29564       abort();
29565     }
29566 }
29567
29568
29569 /* Output a reference from a function exception table to the type_info
29570    object X.  The EABI specifies that the symbol should be relocated by
29571    an R_ARM_TARGET2 relocation.  */
29572
29573 static bool
29574 arm_output_ttype (rtx x)
29575 {
29576   fputs ("\t.word\t", asm_out_file);
29577   output_addr_const (asm_out_file, x);
29578   /* Use special relocations for symbol references.  */
29579   if (!CONST_INT_P (x))
29580     fputs ("(TARGET2)", asm_out_file);
29581   fputc ('\n', asm_out_file);
29582
29583   return TRUE;
29584 }
29585
29586 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
29587
29588 static void
29589 arm_asm_emit_except_personality (rtx personality)
29590 {
29591   fputs ("\t.personality\t", asm_out_file);
29592   output_addr_const (asm_out_file, personality);
29593   fputc ('\n', asm_out_file);
29594 }
29595
29596 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
29597
29598 static void
29599 arm_asm_init_sections (void)
29600 {
29601   exception_section = get_unnamed_section (0, output_section_asm_op,
29602                                            "\t.handlerdata");
29603 }
29604 #endif /* ARM_UNWIND_INFO */
29605
29606 /* Output unwind directives for the start/end of a function.  */
29607
29608 void
29609 arm_output_fn_unwind (FILE * f, bool prologue)
29610 {
29611   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29612     return;
29613
29614   if (prologue)
29615     fputs ("\t.fnstart\n", f);
29616   else
29617     {
29618       /* If this function will never be unwound, then mark it as such.
29619          The came condition is used in arm_unwind_emit to suppress
29620          the frame annotations.  */
29621       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29622           && (TREE_NOTHROW (current_function_decl)
29623               || crtl->all_throwers_are_sibcalls))
29624         fputs("\t.cantunwind\n", f);
29625
29626       fputs ("\t.fnend\n", f);
29627     }
29628 }
29629
29630 static bool
29631 arm_emit_tls_decoration (FILE *fp, rtx x)
29632 {
29633   enum tls_reloc reloc;
29634   rtx val;
29635
29636   val = XVECEXP (x, 0, 0);
29637   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29638
29639   output_addr_const (fp, val);
29640
29641   switch (reloc)
29642     {
29643     case TLS_GD32:
29644       fputs ("(tlsgd)", fp);
29645       break;
29646     case TLS_LDM32:
29647       fputs ("(tlsldm)", fp);
29648       break;
29649     case TLS_LDO32:
29650       fputs ("(tlsldo)", fp);
29651       break;
29652     case TLS_IE32:
29653       fputs ("(gottpoff)", fp);
29654       break;
29655     case TLS_LE32:
29656       fputs ("(tpoff)", fp);
29657       break;
29658     case TLS_DESCSEQ:
29659       fputs ("(tlsdesc)", fp);
29660       break;
29661     default:
29662       gcc_unreachable ();
29663     }
29664
29665   switch (reloc)
29666     {
29667     case TLS_GD32:
29668     case TLS_LDM32:
29669     case TLS_IE32:
29670     case TLS_DESCSEQ:
29671       fputs (" + (. - ", fp);
29672       output_addr_const (fp, XVECEXP (x, 0, 2));
29673       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29674       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29675       output_addr_const (fp, XVECEXP (x, 0, 3));
29676       fputc (')', fp);
29677       break;
29678     default:
29679       break;
29680     }
29681
29682   return TRUE;
29683 }
29684
29685 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
29686
29687 static void
29688 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29689 {
29690   gcc_assert (size == 4);
29691   fputs ("\t.word\t", file);
29692   output_addr_const (file, x);
29693   fputs ("(tlsldo)", file);
29694 }
29695
29696 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
29697
29698 static bool
29699 arm_output_addr_const_extra (FILE *fp, rtx x)
29700 {
29701   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29702     return arm_emit_tls_decoration (fp, x);
29703   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29704     {
29705       char label[256];
29706       int labelno = INTVAL (XVECEXP (x, 0, 0));
29707
29708       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29709       assemble_name_raw (fp, label);
29710
29711       return TRUE;
29712     }
29713   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29714     {
29715       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29716       if (GOT_PCREL)
29717         fputs ("+.", fp);
29718       fputs ("-(", fp);
29719       output_addr_const (fp, XVECEXP (x, 0, 0));
29720       fputc (')', fp);
29721       return TRUE;
29722     }
29723   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29724     {
29725       output_addr_const (fp, XVECEXP (x, 0, 0));
29726       if (GOT_PCREL)
29727         fputs ("+.", fp);
29728       fputs ("-(", fp);
29729       output_addr_const (fp, XVECEXP (x, 0, 1));
29730       fputc (')', fp);
29731       return TRUE;
29732     }
29733   else if (GET_CODE (x) == CONST_VECTOR)
29734     return arm_emit_vector_const (fp, x);
29735
29736   return FALSE;
29737 }
29738
29739 /* Output assembly for a shift instruction.
29740    SET_FLAGS determines how the instruction modifies the condition codes.
29741    0 - Do not set condition codes.
29742    1 - Set condition codes.
29743    2 - Use smallest instruction.  */
29744 const char *
29745 arm_output_shift(rtx * operands, int set_flags)
29746 {
29747   char pattern[100];
29748   static const char flag_chars[3] = {'?', '.', '!'};
29749   const char *shift;
29750   HOST_WIDE_INT val;
29751   char c;
29752
29753   c = flag_chars[set_flags];
29754   if (TARGET_UNIFIED_ASM)
29755     {
29756       shift = shift_op(operands[3], &val);
29757       if (shift)
29758         {
29759           if (val != -1)
29760             operands[2] = GEN_INT(val);
29761           sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29762         }
29763       else
29764         sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29765     }
29766   else
29767     sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29768   output_asm_insn (pattern, operands);
29769   return "";
29770 }
29771
29772 /* Output assembly for a WMMX immediate shift instruction.  */
29773 const char *
29774 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29775 {
29776   int shift = INTVAL (operands[2]);
29777   char templ[50];
29778   machine_mode opmode = GET_MODE (operands[0]);
29779
29780   gcc_assert (shift >= 0);
29781
29782   /* If the shift value in the register versions is > 63 (for D qualifier),
29783      31 (for W qualifier) or 15 (for H qualifier).  */
29784   if (((opmode == V4HImode) && (shift > 15))
29785         || ((opmode == V2SImode) && (shift > 31))
29786         || ((opmode == DImode) && (shift > 63)))
29787   {
29788     if (wror_or_wsra)
29789       {
29790         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29791         output_asm_insn (templ, operands);
29792         if (opmode == DImode)
29793           {
29794             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29795             output_asm_insn (templ, operands);
29796           }
29797       }
29798     else
29799       {
29800         /* The destination register will contain all zeros.  */
29801         sprintf (templ, "wzero\t%%0");
29802         output_asm_insn (templ, operands);
29803       }
29804     return "";
29805   }
29806
29807   if ((opmode == DImode) && (shift > 32))
29808     {
29809       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29810       output_asm_insn (templ, operands);
29811       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29812       output_asm_insn (templ, operands);
29813     }
29814   else
29815     {
29816       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29817       output_asm_insn (templ, operands);
29818     }
29819   return "";
29820 }
29821
29822 /* Output assembly for a WMMX tinsr instruction.  */
29823 const char *
29824 arm_output_iwmmxt_tinsr (rtx *operands)
29825 {
29826   int mask = INTVAL (operands[3]);
29827   int i;
29828   char templ[50];
29829   int units = mode_nunits[GET_MODE (operands[0])];
29830   gcc_assert ((mask & (mask - 1)) == 0);
29831   for (i = 0; i < units; ++i)
29832     {
29833       if ((mask & 0x01) == 1)
29834         {
29835           break;
29836         }
29837       mask >>= 1;
29838     }
29839   gcc_assert (i < units);
29840   {
29841     switch (GET_MODE (operands[0]))
29842       {
29843       case V8QImode:
29844         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29845         break;
29846       case V4HImode:
29847         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29848         break;
29849       case V2SImode:
29850         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29851         break;
29852       default:
29853         gcc_unreachable ();
29854         break;
29855       }
29856     output_asm_insn (templ, operands);
29857   }
29858   return "";
29859 }
29860
29861 /* Output a Thumb-1 casesi dispatch sequence.  */
29862 const char *
29863 thumb1_output_casesi (rtx *operands)
29864 {
29865   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29866
29867   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29868
29869   switch (GET_MODE(diff_vec))
29870     {
29871     case QImode:
29872       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29873               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29874     case HImode:
29875       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29876               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29877     case SImode:
29878       return "bl\t%___gnu_thumb1_case_si";
29879     default:
29880       gcc_unreachable ();
29881     }
29882 }
29883
29884 /* Output a Thumb-2 casesi instruction.  */
29885 const char *
29886 thumb2_output_casesi (rtx *operands)
29887 {
29888   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
29889
29890   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29891
29892   output_asm_insn ("cmp\t%0, %1", operands);
29893   output_asm_insn ("bhi\t%l3", operands);
29894   switch (GET_MODE(diff_vec))
29895     {
29896     case QImode:
29897       return "tbb\t[%|pc, %0]";
29898     case HImode:
29899       return "tbh\t[%|pc, %0, lsl #1]";
29900     case SImode:
29901       if (flag_pic)
29902         {
29903           output_asm_insn ("adr\t%4, %l2", operands);
29904           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29905           output_asm_insn ("add\t%4, %4, %5", operands);
29906           return "bx\t%4";
29907         }
29908       else
29909         {
29910           output_asm_insn ("adr\t%4, %l2", operands);
29911           return "ldr\t%|pc, [%4, %0, lsl #2]";
29912         }
29913     default:
29914       gcc_unreachable ();
29915     }
29916 }
29917
29918 /* Most ARM cores are single issue, but some newer ones can dual issue.
29919    The scheduler descriptions rely on this being correct.  */
29920 static int
29921 arm_issue_rate (void)
29922 {
29923   switch (arm_tune)
29924     {
29925     case cortexa15:
29926     case cortexa57:
29927       return 3;
29928
29929     case cortexr4:
29930     case cortexr4f:
29931     case cortexr5:
29932     case genericv7a:
29933     case cortexa5:
29934     case cortexa7:
29935     case cortexa8:
29936     case cortexa9:
29937     case cortexa12:
29938     case cortexa53:
29939     case fa726te:
29940     case marvell_pj4:
29941       return 2;
29942
29943     default:
29944       return 1;
29945     }
29946 }
29947
29948 /* A table and a function to perform ARM-specific name mangling for
29949    NEON vector types in order to conform to the AAPCS (see "Procedure
29950    Call Standard for the ARM Architecture", Appendix A).  To qualify
29951    for emission with the mangled names defined in that document, a
29952    vector type must not only be of the correct mode but also be
29953    composed of NEON vector element types (e.g. __builtin_neon_qi).  */
29954 typedef struct
29955 {
29956   machine_mode mode;
29957   const char *element_type_name;
29958   const char *aapcs_name;
29959 } arm_mangle_map_entry;
29960
29961 static arm_mangle_map_entry arm_mangle_map[] = {
29962   /* 64-bit containerized types.  */
29963   { V8QImode,  "__builtin_neon_qi",     "15__simd64_int8_t" },
29964   { V8QImode,  "__builtin_neon_uqi",    "16__simd64_uint8_t" },
29965   { V4HImode,  "__builtin_neon_hi",     "16__simd64_int16_t" },
29966   { V4HImode,  "__builtin_neon_uhi",    "17__simd64_uint16_t" },
29967   { V4HFmode,  "__builtin_neon_hf",     "18__simd64_float16_t" },
29968   { V2SImode,  "__builtin_neon_si",     "16__simd64_int32_t" },
29969   { V2SImode,  "__builtin_neon_usi",    "17__simd64_uint32_t" },
29970   { V2SFmode,  "__builtin_neon_sf",     "18__simd64_float32_t" },
29971   { V8QImode,  "__builtin_neon_poly8",  "16__simd64_poly8_t" },
29972   { V4HImode,  "__builtin_neon_poly16", "17__simd64_poly16_t" },
29973
29974   /* 128-bit containerized types.  */
29975   { V16QImode, "__builtin_neon_qi",     "16__simd128_int8_t" },
29976   { V16QImode, "__builtin_neon_uqi",    "17__simd128_uint8_t" },
29977   { V8HImode,  "__builtin_neon_hi",     "17__simd128_int16_t" },
29978   { V8HImode,  "__builtin_neon_uhi",    "18__simd128_uint16_t" },
29979   { V4SImode,  "__builtin_neon_si",     "17__simd128_int32_t" },
29980   { V4SImode,  "__builtin_neon_usi",    "18__simd128_uint32_t" },
29981   { V4SFmode,  "__builtin_neon_sf",     "19__simd128_float32_t" },
29982   { V16QImode, "__builtin_neon_poly8",  "17__simd128_poly8_t" },
29983   { V8HImode,  "__builtin_neon_poly16", "18__simd128_poly16_t" },
29984   { VOIDmode, NULL, NULL }
29985 };
29986
29987 const char *
29988 arm_mangle_type (const_tree type)
29989 {
29990   arm_mangle_map_entry *pos = arm_mangle_map;
29991
29992   /* The ARM ABI documents (10th October 2008) say that "__va_list"
29993      has to be managled as if it is in the "std" namespace.  */
29994   if (TARGET_AAPCS_BASED
29995       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29996     return "St9__va_list";
29997
29998   /* Half-precision float.  */
29999   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30000     return "Dh";
30001
30002   if (TREE_CODE (type) != VECTOR_TYPE)
30003     return NULL;
30004
30005   /* Check the mode of the vector type, and the name of the vector
30006      element type, against the table.  */
30007   while (pos->mode != VOIDmode)
30008     {
30009       tree elt_type = TREE_TYPE (type);
30010
30011       if (pos->mode == TYPE_MODE (type)
30012           && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
30013           && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
30014                       pos->element_type_name))
30015         return pos->aapcs_name;
30016
30017       pos++;
30018     }
30019
30020   /* Use the default mangling for unrecognized (possibly user-defined)
30021      vector types.  */
30022   return NULL;
30023 }
30024
30025 /* Order of allocation of core registers for Thumb: this allocation is
30026    written over the corresponding initial entries of the array
30027    initialized with REG_ALLOC_ORDER.  We allocate all low registers
30028    first.  Saving and restoring a low register is usually cheaper than
30029    using a call-clobbered high register.  */
30030
30031 static const int thumb_core_reg_alloc_order[] =
30032 {
30033    3,  2,  1,  0,  4,  5,  6,  7,
30034   14, 12,  8,  9, 10, 11
30035 };
30036
30037 /* Adjust register allocation order when compiling for Thumb.  */
30038
30039 void
30040 arm_order_regs_for_local_alloc (void)
30041 {
30042   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30043   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30044   if (TARGET_THUMB)
30045     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30046             sizeof (thumb_core_reg_alloc_order));
30047 }
30048
30049 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
30050
30051 bool
30052 arm_frame_pointer_required (void)
30053 {
30054   return (cfun->has_nonlocal_label
30055           || SUBTARGET_FRAME_POINTER_REQUIRED
30056           || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
30057 }
30058
30059 /* Only thumb1 can't support conditional execution, so return true if
30060    the target is not thumb1.  */
30061 static bool
30062 arm_have_conditional_execution (void)
30063 {
30064   return !TARGET_THUMB1;
30065 }
30066
30067 tree
30068 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
30069 {
30070   machine_mode in_mode, out_mode;
30071   int in_n, out_n;
30072   bool out_unsigned_p = TYPE_UNSIGNED (type_out);
30073
30074   if (TREE_CODE (type_out) != VECTOR_TYPE
30075       || TREE_CODE (type_in) != VECTOR_TYPE)
30076     return NULL_TREE;
30077
30078   out_mode = TYPE_MODE (TREE_TYPE (type_out));
30079   out_n = TYPE_VECTOR_SUBPARTS (type_out);
30080   in_mode = TYPE_MODE (TREE_TYPE (type_in));
30081   in_n = TYPE_VECTOR_SUBPARTS (type_in);
30082
30083 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
30084    decl of the vectorized builtin for the appropriate vector mode.
30085    NULL_TREE is returned if no such builtin is available.  */
30086 #undef ARM_CHECK_BUILTIN_MODE
30087 #define ARM_CHECK_BUILTIN_MODE(C)    \
30088   (TARGET_NEON && TARGET_FPU_ARMV8   \
30089    && flag_unsafe_math_optimizations \
30090    && ARM_CHECK_BUILTIN_MODE_1 (C))
30091
30092 #undef ARM_CHECK_BUILTIN_MODE_1
30093 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30094   (out_mode == SFmode && out_n == C \
30095    && in_mode == SFmode && in_n == C)
30096
30097 #undef ARM_FIND_VRINT_VARIANT
30098 #define ARM_FIND_VRINT_VARIANT(N) \
30099   (ARM_CHECK_BUILTIN_MODE (2) \
30100     ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
30101     : (ARM_CHECK_BUILTIN_MODE (4) \
30102       ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
30103       : NULL_TREE))
30104
30105   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
30106     {
30107       enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
30108       switch (fn)
30109         {
30110           case BUILT_IN_FLOORF:
30111             return ARM_FIND_VRINT_VARIANT (vrintm);
30112           case BUILT_IN_CEILF:
30113             return ARM_FIND_VRINT_VARIANT (vrintp);
30114           case BUILT_IN_TRUNCF:
30115             return ARM_FIND_VRINT_VARIANT (vrintz);
30116           case BUILT_IN_ROUNDF:
30117             return ARM_FIND_VRINT_VARIANT (vrinta);
30118 #undef ARM_CHECK_BUILTIN_MODE_1
30119 #define ARM_CHECK_BUILTIN_MODE_1(C) \
30120   (out_mode == SImode && out_n == C \
30121    && in_mode == SFmode && in_n == C)
30122
30123 #define ARM_FIND_VCVT_VARIANT(N) \
30124   (ARM_CHECK_BUILTIN_MODE (2) \
30125    ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
30126    : (ARM_CHECK_BUILTIN_MODE (4) \
30127      ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
30128      : NULL_TREE))
30129
30130 #define ARM_FIND_VCVTU_VARIANT(N) \
30131   (ARM_CHECK_BUILTIN_MODE (2) \
30132    ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
30133    : (ARM_CHECK_BUILTIN_MODE (4) \
30134      ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
30135      : NULL_TREE))
30136           case BUILT_IN_LROUNDF:
30137             return out_unsigned_p
30138                      ? ARM_FIND_VCVTU_VARIANT (vcvta)
30139                      : ARM_FIND_VCVT_VARIANT (vcvta);
30140           case BUILT_IN_LCEILF:
30141             return out_unsigned_p
30142                      ? ARM_FIND_VCVTU_VARIANT (vcvtp)
30143                      : ARM_FIND_VCVT_VARIANT (vcvtp);
30144           case BUILT_IN_LFLOORF:
30145             return out_unsigned_p
30146                      ? ARM_FIND_VCVTU_VARIANT (vcvtm)
30147                      : ARM_FIND_VCVT_VARIANT (vcvtm);
30148 #undef ARM_CHECK_BUILTIN_MODE
30149 #define ARM_CHECK_BUILTIN_MODE(C, N) \
30150   (out_mode == N##mode && out_n == C \
30151    && in_mode == N##mode && in_n == C)
30152           case BUILT_IN_BSWAP16:
30153             if (ARM_CHECK_BUILTIN_MODE (4, HI))
30154               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
30155             else if (ARM_CHECK_BUILTIN_MODE (8, HI))
30156               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
30157             else
30158               return NULL_TREE;
30159           case BUILT_IN_BSWAP32:
30160             if (ARM_CHECK_BUILTIN_MODE (2, SI))
30161               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
30162             else if (ARM_CHECK_BUILTIN_MODE (4, SI))
30163               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
30164             else
30165               return NULL_TREE;
30166           case BUILT_IN_BSWAP64:
30167             if (ARM_CHECK_BUILTIN_MODE (2, DI))
30168               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
30169             else
30170               return NULL_TREE;
30171           case BUILT_IN_COPYSIGNF:
30172             if (ARM_CHECK_BUILTIN_MODE (2, SF))
30173               return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
30174             else if (ARM_CHECK_BUILTIN_MODE (4, SF))
30175               return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
30176             else
30177               return NULL_TREE;
30178
30179           default:
30180             return NULL_TREE;
30181         }
30182     }
30183   return NULL_TREE;
30184 }
30185 #undef ARM_FIND_VCVT_VARIANT
30186 #undef ARM_FIND_VCVTU_VARIANT
30187 #undef ARM_CHECK_BUILTIN_MODE
30188 #undef ARM_FIND_VRINT_VARIANT
30189
30190
30191 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
30192 static HOST_WIDE_INT
30193 arm_vector_alignment (const_tree type)
30194 {
30195   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30196
30197   if (TARGET_AAPCS_BASED)
30198     align = MIN (align, 64);
30199
30200   return align;
30201 }
30202
30203 static unsigned int
30204 arm_autovectorize_vector_sizes (void)
30205 {
30206   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
30207 }
30208
30209 static bool
30210 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30211 {
30212   /* Vectors which aren't in packed structures will not be less aligned than
30213      the natural alignment of their element type, so this is safe.  */
30214   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30215     return !is_packed;
30216
30217   return default_builtin_vector_alignment_reachable (type, is_packed);
30218 }
30219
30220 static bool
30221 arm_builtin_support_vector_misalignment (machine_mode mode,
30222                                          const_tree type, int misalignment,
30223                                          bool is_packed)
30224 {
30225   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30226     {
30227       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30228
30229       if (is_packed)
30230         return align == 1;
30231
30232       /* If the misalignment is unknown, we should be able to handle the access
30233          so long as it is not to a member of a packed data structure.  */
30234       if (misalignment == -1)
30235         return true;
30236
30237       /* Return true if the misalignment is a multiple of the natural alignment
30238          of the vector's element type.  This is probably always going to be
30239          true in practice, since we've already established that this isn't a
30240          packed access.  */
30241       return ((misalignment % align) == 0);
30242     }
30243
30244   return default_builtin_support_vector_misalignment (mode, type, misalignment,
30245                                                       is_packed);
30246 }
30247
30248 static void
30249 arm_conditional_register_usage (void)
30250 {
30251   int regno;
30252
30253   if (TARGET_THUMB1 && optimize_size)
30254     {
30255       /* When optimizing for size on Thumb-1, it's better not
30256         to use the HI regs, because of the overhead of
30257         stacking them.  */
30258       for (regno = FIRST_HI_REGNUM;
30259            regno <= LAST_HI_REGNUM; ++regno)
30260         fixed_regs[regno] = call_used_regs[regno] = 1;
30261     }
30262
30263   /* The link register can be clobbered by any branch insn,
30264      but we have no way to track that at present, so mark
30265      it as unavailable.  */
30266   if (TARGET_THUMB1)
30267     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30268
30269   if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
30270     {
30271       /* VFPv3 registers are disabled when earlier VFP
30272          versions are selected due to the definition of
30273          LAST_VFP_REGNUM.  */
30274       for (regno = FIRST_VFP_REGNUM;
30275            regno <= LAST_VFP_REGNUM; ++ regno)
30276         {
30277           fixed_regs[regno] = 0;
30278           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30279             || regno >= FIRST_VFP_REGNUM + 32;
30280         }
30281     }
30282
30283   if (TARGET_REALLY_IWMMXT)
30284     {
30285       regno = FIRST_IWMMXT_GR_REGNUM;
30286       /* The 2002/10/09 revision of the XScale ABI has wCG0
30287          and wCG1 as call-preserved registers.  The 2002/11/21
30288          revision changed this so that all wCG registers are
30289          scratch registers.  */
30290       for (regno = FIRST_IWMMXT_GR_REGNUM;
30291            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30292         fixed_regs[regno] = 0;
30293       /* The XScale ABI has wR0 - wR9 as scratch registers,
30294          the rest as call-preserved registers.  */
30295       for (regno = FIRST_IWMMXT_REGNUM;
30296            regno <= LAST_IWMMXT_REGNUM; ++ regno)
30297         {
30298           fixed_regs[regno] = 0;
30299           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30300         }
30301     }
30302
30303   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30304     {
30305       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30306       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30307     }
30308   else if (TARGET_APCS_STACK)
30309     {
30310       fixed_regs[10]     = 1;
30311       call_used_regs[10] = 1;
30312     }
30313   /* -mcaller-super-interworking reserves r11 for calls to
30314      _interwork_r11_call_via_rN().  Making the register global
30315      is an easy way of ensuring that it remains valid for all
30316      calls.  */
30317   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30318       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30319     {
30320       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30321       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30322       if (TARGET_CALLER_INTERWORKING)
30323         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30324     }
30325   SUBTARGET_CONDITIONAL_REGISTER_USAGE
30326 }
30327
30328 static reg_class_t
30329 arm_preferred_rename_class (reg_class_t rclass)
30330 {
30331   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30332      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
30333      and code size can be reduced.  */
30334   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30335     return LO_REGS;
30336   else
30337     return NO_REGS;
30338 }
30339
30340 /* Compute the atrribute "length" of insn "*push_multi".
30341    So this function MUST be kept in sync with that insn pattern.  */
30342 int
30343 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30344 {
30345   int i, regno, hi_reg;
30346   int num_saves = XVECLEN (parallel_op, 0);
30347
30348   /* ARM mode.  */
30349   if (TARGET_ARM)
30350     return 4;
30351   /* Thumb1 mode.  */
30352   if (TARGET_THUMB1)
30353     return 2;
30354
30355   /* Thumb2 mode.  */
30356   regno = REGNO (first_op);
30357   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30358   for (i = 1; i < num_saves && !hi_reg; i++)
30359     {
30360       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30361       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30362     }
30363
30364   if (!hi_reg)
30365     return 2;
30366   return 4;
30367 }
30368
30369 /* Compute the number of instructions emitted by output_move_double.  */
30370 int
30371 arm_count_output_move_double_insns (rtx *operands)
30372 {
30373   int count;
30374   rtx ops[2];
30375   /* output_move_double may modify the operands array, so call it
30376      here on a copy of the array.  */
30377   ops[0] = operands[0];
30378   ops[1] = operands[1];
30379   output_move_double (ops, false, &count);
30380   return count;
30381 }
30382
30383 int
30384 vfp3_const_double_for_fract_bits (rtx operand)
30385 {
30386   REAL_VALUE_TYPE r0;
30387
30388   if (!CONST_DOUBLE_P (operand))
30389     return 0;
30390
30391   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30392   if (exact_real_inverse (DFmode, &r0))
30393     {
30394       if (exact_real_truncate (DFmode, &r0))
30395         {
30396           HOST_WIDE_INT value = real_to_integer (&r0);
30397           value = value & 0xffffffff;
30398           if ((value != 0) && ( (value & (value - 1)) == 0))
30399             return int_log2 (value);
30400         }
30401     }
30402   return 0;
30403 }
30404
30405 int
30406 vfp3_const_double_for_bits (rtx operand)
30407 {
30408   REAL_VALUE_TYPE r0;
30409
30410   if (!CONST_DOUBLE_P (operand))
30411     return 0;
30412
30413   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30414   if (exact_real_truncate (DFmode, &r0))
30415     {
30416       HOST_WIDE_INT value = real_to_integer (&r0);
30417       value = value & 0xffffffff;
30418       if ((value != 0) && ( (value & (value - 1)) == 0))
30419         return int_log2 (value);
30420     }
30421
30422   return 0;
30423 }
30424 \f
30425 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
30426
30427 static void
30428 arm_pre_atomic_barrier (enum memmodel model)
30429 {
30430   if (need_atomic_barrier_p (model, true))
30431     emit_insn (gen_memory_barrier ());
30432 }
30433
30434 static void
30435 arm_post_atomic_barrier (enum memmodel model)
30436 {
30437   if (need_atomic_barrier_p (model, false))
30438     emit_insn (gen_memory_barrier ());
30439 }
30440
30441 /* Emit the load-exclusive and store-exclusive instructions.
30442    Use acquire and release versions if necessary.  */
30443
30444 static void
30445 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30446 {
30447   rtx (*gen) (rtx, rtx);
30448
30449   if (acq)
30450     {
30451       switch (mode)
30452         {
30453         case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30454         case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30455         case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30456         case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30457         default:
30458           gcc_unreachable ();
30459         }
30460     }
30461   else
30462     {
30463       switch (mode)
30464         {
30465         case QImode: gen = gen_arm_load_exclusiveqi; break;
30466         case HImode: gen = gen_arm_load_exclusivehi; break;
30467         case SImode: gen = gen_arm_load_exclusivesi; break;
30468         case DImode: gen = gen_arm_load_exclusivedi; break;
30469         default:
30470           gcc_unreachable ();
30471         }
30472     }
30473
30474   emit_insn (gen (rval, mem));
30475 }
30476
30477 static void
30478 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30479                           rtx mem, bool rel)
30480 {
30481   rtx (*gen) (rtx, rtx, rtx);
30482
30483   if (rel)
30484     {
30485       switch (mode)
30486         {
30487         case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30488         case HImode: gen = gen_arm_store_release_exclusivehi; break;
30489         case SImode: gen = gen_arm_store_release_exclusivesi; break;
30490         case DImode: gen = gen_arm_store_release_exclusivedi; break;
30491         default:
30492           gcc_unreachable ();
30493         }
30494     }
30495   else
30496     {
30497       switch (mode)
30498         {
30499         case QImode: gen = gen_arm_store_exclusiveqi; break;
30500         case HImode: gen = gen_arm_store_exclusivehi; break;
30501         case SImode: gen = gen_arm_store_exclusivesi; break;
30502         case DImode: gen = gen_arm_store_exclusivedi; break;
30503         default:
30504           gcc_unreachable ();
30505         }
30506     }
30507
30508   emit_insn (gen (bval, rval, mem));
30509 }
30510
30511 /* Mark the previous jump instruction as unlikely.  */
30512
30513 static void
30514 emit_unlikely_jump (rtx insn)
30515 {
30516   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30517
30518   insn = emit_jump_insn (insn);
30519   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30520 }
30521
30522 /* Expand a compare and swap pattern.  */
30523
30524 void
30525 arm_expand_compare_and_swap (rtx operands[])
30526 {
30527   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30528   machine_mode mode;
30529   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30530
30531   bval = operands[0];
30532   rval = operands[1];
30533   mem = operands[2];
30534   oldval = operands[3];
30535   newval = operands[4];
30536   is_weak = operands[5];
30537   mod_s = operands[6];
30538   mod_f = operands[7];
30539   mode = GET_MODE (mem);
30540
30541   /* Normally the succ memory model must be stronger than fail, but in the
30542      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30543      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
30544
30545   if (TARGET_HAVE_LDACQ
30546       && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30547       && INTVAL (mod_s) == MEMMODEL_RELEASE)
30548     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30549
30550   switch (mode)
30551     {
30552     case QImode:
30553     case HImode:
30554       /* For narrow modes, we're going to perform the comparison in SImode,
30555          so do the zero-extension now.  */
30556       rval = gen_reg_rtx (SImode);
30557       oldval = convert_modes (SImode, mode, oldval, true);
30558       /* FALLTHRU */
30559
30560     case SImode:
30561       /* Force the value into a register if needed.  We waited until after
30562          the zero-extension above to do this properly.  */
30563       if (!arm_add_operand (oldval, SImode))
30564         oldval = force_reg (SImode, oldval);
30565       break;
30566
30567     case DImode:
30568       if (!cmpdi_operand (oldval, mode))
30569         oldval = force_reg (mode, oldval);
30570       break;
30571
30572     default:
30573       gcc_unreachable ();
30574     }
30575
30576   switch (mode)
30577     {
30578     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30579     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30580     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30581     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30582     default:
30583       gcc_unreachable ();
30584     }
30585
30586   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30587
30588   if (mode == QImode || mode == HImode)
30589     emit_move_insn (operands[1], gen_lowpart (mode, rval));
30590
30591   /* In all cases, we arrange for success to be signaled by Z set.
30592      This arrangement allows for the boolean result to be used directly
30593      in a subsequent branch, post optimization.  */
30594   x = gen_rtx_REG (CCmode, CC_REGNUM);
30595   x = gen_rtx_EQ (SImode, x, const0_rtx);
30596   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30597 }
30598
30599 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
30600    another memory store between the load-exclusive and store-exclusive can
30601    reset the monitor from Exclusive to Open state.  This means we must wait
30602    until after reload to split the pattern, lest we get a register spill in
30603    the middle of the atomic sequence.  */
30604
30605 void
30606 arm_split_compare_and_swap (rtx operands[])
30607 {
30608   rtx rval, mem, oldval, newval, scratch;
30609   machine_mode mode;
30610   enum memmodel mod_s, mod_f;
30611   bool is_weak;
30612   rtx_code_label *label1, *label2;
30613   rtx x, cond;
30614
30615   rval = operands[0];
30616   mem = operands[1];
30617   oldval = operands[2];
30618   newval = operands[3];
30619   is_weak = (operands[4] != const0_rtx);
30620   mod_s = (enum memmodel) INTVAL (operands[5]);
30621   mod_f = (enum memmodel) INTVAL (operands[6]);
30622   scratch = operands[7];
30623   mode = GET_MODE (mem);
30624
30625   bool use_acquire = TARGET_HAVE_LDACQ
30626                      && !(mod_s == MEMMODEL_RELAXED
30627                           || mod_s == MEMMODEL_CONSUME
30628                           || mod_s == MEMMODEL_RELEASE);
30629
30630   bool use_release = TARGET_HAVE_LDACQ
30631                      && !(mod_s == MEMMODEL_RELAXED
30632                           || mod_s == MEMMODEL_CONSUME
30633                           || mod_s == MEMMODEL_ACQUIRE);
30634
30635   /* Checks whether a barrier is needed and emits one accordingly.  */
30636   if (!(use_acquire || use_release))
30637     arm_pre_atomic_barrier (mod_s);
30638
30639   label1 = NULL;
30640   if (!is_weak)
30641     {
30642       label1 = gen_label_rtx ();
30643       emit_label (label1);
30644     }
30645   label2 = gen_label_rtx ();
30646
30647   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30648
30649   cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30650   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30651   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30652                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30653   emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30654
30655   arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30656
30657   /* Weak or strong, we want EQ to be true for success, so that we
30658      match the flags that we got from the compare above.  */
30659   cond = gen_rtx_REG (CCmode, CC_REGNUM);
30660   x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30661   emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30662
30663   if (!is_weak)
30664     {
30665       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30666       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30667                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30668       emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30669     }
30670
30671   if (mod_f != MEMMODEL_RELAXED)
30672     emit_label (label2);
30673
30674   /* Checks whether a barrier is needed and emits one accordingly.  */
30675   if (!(use_acquire || use_release))
30676     arm_post_atomic_barrier (mod_s);
30677
30678   if (mod_f == MEMMODEL_RELAXED)
30679     emit_label (label2);
30680 }
30681
30682 void
30683 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30684                      rtx value, rtx model_rtx, rtx cond)
30685 {
30686   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30687   machine_mode mode = GET_MODE (mem);
30688   machine_mode wmode = (mode == DImode ? DImode : SImode);
30689   rtx_code_label *label;
30690   rtx x;
30691
30692   bool use_acquire = TARGET_HAVE_LDACQ
30693                      && !(model == MEMMODEL_RELAXED
30694                           || model == MEMMODEL_CONSUME
30695                           || model == MEMMODEL_RELEASE);
30696
30697   bool use_release = TARGET_HAVE_LDACQ
30698                      && !(model == MEMMODEL_RELAXED
30699                           || model == MEMMODEL_CONSUME
30700                           || model == MEMMODEL_ACQUIRE);
30701
30702   /* Checks whether a barrier is needed and emits one accordingly.  */
30703   if (!(use_acquire || use_release))
30704     arm_pre_atomic_barrier (model);
30705
30706   label = gen_label_rtx ();
30707   emit_label (label);
30708
30709   if (new_out)
30710     new_out = gen_lowpart (wmode, new_out);
30711   if (old_out)
30712     old_out = gen_lowpart (wmode, old_out);
30713   else
30714     old_out = new_out;
30715   value = simplify_gen_subreg (wmode, value, mode, 0);
30716
30717   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30718
30719   switch (code)
30720     {
30721     case SET:
30722       new_out = value;
30723       break;
30724
30725     case NOT:
30726       x = gen_rtx_AND (wmode, old_out, value);
30727       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30728       x = gen_rtx_NOT (wmode, new_out);
30729       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30730       break;
30731
30732     case MINUS:
30733       if (CONST_INT_P (value))
30734         {
30735           value = GEN_INT (-INTVAL (value));
30736           code = PLUS;
30737         }
30738       /* FALLTHRU */
30739
30740     case PLUS:
30741       if (mode == DImode)
30742         {
30743           /* DImode plus/minus need to clobber flags.  */
30744           /* The adddi3 and subdi3 patterns are incorrectly written so that
30745              they require matching operands, even when we could easily support
30746              three operands.  Thankfully, this can be fixed up post-splitting,
30747              as the individual add+adc patterns do accept three operands and
30748              post-reload cprop can make these moves go away.  */
30749           emit_move_insn (new_out, old_out);
30750           if (code == PLUS)
30751             x = gen_adddi3 (new_out, new_out, value);
30752           else
30753             x = gen_subdi3 (new_out, new_out, value);
30754           emit_insn (x);
30755           break;
30756         }
30757       /* FALLTHRU */
30758
30759     default:
30760       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30761       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30762       break;
30763     }
30764
30765   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30766                             use_release);
30767
30768   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30769   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30770
30771   /* Checks whether a barrier is needed and emits one accordingly.  */
30772   if (!(use_acquire || use_release))
30773     arm_post_atomic_barrier (model);
30774 }
30775 \f
30776 #define MAX_VECT_LEN 16
30777
30778 struct expand_vec_perm_d
30779 {
30780   rtx target, op0, op1;
30781   unsigned char perm[MAX_VECT_LEN];
30782   machine_mode vmode;
30783   unsigned char nelt;
30784   bool one_vector_p;
30785   bool testing_p;
30786 };
30787
30788 /* Generate a variable permutation.  */
30789
30790 static void
30791 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30792 {
30793   machine_mode vmode = GET_MODE (target);
30794   bool one_vector_p = rtx_equal_p (op0, op1);
30795
30796   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30797   gcc_checking_assert (GET_MODE (op0) == vmode);
30798   gcc_checking_assert (GET_MODE (op1) == vmode);
30799   gcc_checking_assert (GET_MODE (sel) == vmode);
30800   gcc_checking_assert (TARGET_NEON);
30801
30802   if (one_vector_p)
30803     {
30804       if (vmode == V8QImode)
30805         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30806       else
30807         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30808     }
30809   else
30810     {
30811       rtx pair;
30812
30813       if (vmode == V8QImode)
30814         {
30815           pair = gen_reg_rtx (V16QImode);
30816           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30817           pair = gen_lowpart (TImode, pair);
30818           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30819         }
30820       else
30821         {
30822           pair = gen_reg_rtx (OImode);
30823           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30824           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30825         }
30826     }
30827 }
30828
30829 void
30830 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30831 {
30832   machine_mode vmode = GET_MODE (target);
30833   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30834   bool one_vector_p = rtx_equal_p (op0, op1);
30835   rtx rmask[MAX_VECT_LEN], mask;
30836
30837   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
30838      numbering of elements for big-endian, we must reverse the order.  */
30839   gcc_checking_assert (!BYTES_BIG_ENDIAN);
30840
30841   /* The VTBL instruction does not use a modulo index, so we must take care
30842      of that ourselves.  */
30843   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30844   for (i = 0; i < nelt; ++i)
30845     rmask[i] = mask;
30846   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30847   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30848
30849   arm_expand_vec_perm_1 (target, op0, op1, sel);
30850 }
30851
30852 /* Generate or test for an insn that supports a constant permutation.  */
30853
30854 /* Recognize patterns for the VUZP insns.  */
30855
30856 static bool
30857 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30858 {
30859   unsigned int i, odd, mask, nelt = d->nelt;
30860   rtx out0, out1, in0, in1, x;
30861   rtx (*gen)(rtx, rtx, rtx, rtx);
30862
30863   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30864     return false;
30865
30866   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
30867   if (d->perm[0] == 0)
30868     odd = 0;
30869   else if (d->perm[0] == 1)
30870     odd = 1;
30871   else
30872     return false;
30873   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30874
30875   for (i = 0; i < nelt; i++)
30876     {
30877       unsigned elt = (i * 2 + odd) & mask;
30878       if (d->perm[i] != elt)
30879         return false;
30880     }
30881
30882   /* Success!  */
30883   if (d->testing_p)
30884     return true;
30885
30886   switch (d->vmode)
30887     {
30888     case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30889     case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
30890     case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
30891     case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
30892     case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
30893     case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
30894     case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
30895     case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
30896     default:
30897       gcc_unreachable ();
30898     }
30899
30900   in0 = d->op0;
30901   in1 = d->op1;
30902   if (BYTES_BIG_ENDIAN)
30903     {
30904       x = in0, in0 = in1, in1 = x;
30905       odd = !odd;
30906     }
30907
30908   out0 = d->target;
30909   out1 = gen_reg_rtx (d->vmode);
30910   if (odd)
30911     x = out0, out0 = out1, out1 = x;
30912
30913   emit_insn (gen (out0, in0, in1, out1));
30914   return true;
30915 }
30916
30917 /* Recognize patterns for the VZIP insns.  */
30918
30919 static bool
30920 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30921 {
30922   unsigned int i, high, mask, nelt = d->nelt;
30923   rtx out0, out1, in0, in1, x;
30924   rtx (*gen)(rtx, rtx, rtx, rtx);
30925
30926   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30927     return false;
30928
30929   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
30930   high = nelt / 2;
30931   if (d->perm[0] == high)
30932     ;
30933   else if (d->perm[0] == 0)
30934     high = 0;
30935   else
30936     return false;
30937   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30938
30939   for (i = 0; i < nelt / 2; i++)
30940     {
30941       unsigned elt = (i + high) & mask;
30942       if (d->perm[i * 2] != elt)
30943         return false;
30944       elt = (elt + nelt) & mask;
30945       if (d->perm[i * 2 + 1] != elt)
30946         return false;
30947     }
30948
30949   /* Success!  */
30950   if (d->testing_p)
30951     return true;
30952
30953   switch (d->vmode)
30954     {
30955     case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30956     case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
30957     case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
30958     case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
30959     case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
30960     case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
30961     case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
30962     case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
30963     default:
30964       gcc_unreachable ();
30965     }
30966
30967   in0 = d->op0;
30968   in1 = d->op1;
30969   if (BYTES_BIG_ENDIAN)
30970     {
30971       x = in0, in0 = in1, in1 = x;
30972       high = !high;
30973     }
30974
30975   out0 = d->target;
30976   out1 = gen_reg_rtx (d->vmode);
30977   if (high)
30978     x = out0, out0 = out1, out1 = x;
30979
30980   emit_insn (gen (out0, in0, in1, out1));
30981   return true;
30982 }
30983
30984 /* Recognize patterns for the VREV insns.  */
30985
30986 static bool
30987 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30988 {
30989   unsigned int i, j, diff, nelt = d->nelt;
30990   rtx (*gen)(rtx, rtx, rtx);
30991
30992   if (!d->one_vector_p)
30993     return false;
30994
30995   diff = d->perm[0];
30996   switch (diff)
30997     {
30998     case 7:
30999       switch (d->vmode)
31000         {
31001         case V16QImode: gen = gen_neon_vrev64v16qi; break;
31002         case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
31003         default:
31004           return false;
31005         }
31006       break;
31007     case 3:
31008       switch (d->vmode)
31009         {
31010         case V16QImode: gen = gen_neon_vrev32v16qi; break;
31011         case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
31012         case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
31013         case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
31014         default:
31015           return false;
31016         }
31017       break;
31018     case 1:
31019       switch (d->vmode)
31020         {
31021         case V16QImode: gen = gen_neon_vrev16v16qi; break;
31022         case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
31023         case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
31024         case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
31025         case V4SImode:  gen = gen_neon_vrev64v4si;  break;
31026         case V2SImode:  gen = gen_neon_vrev64v2si;  break;
31027         case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
31028         case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
31029         default:
31030           return false;
31031         }
31032       break;
31033     default:
31034       return false;
31035     }
31036
31037   for (i = 0; i < nelt ; i += diff + 1)
31038     for (j = 0; j <= diff; j += 1)
31039       {
31040         /* This is guaranteed to be true as the value of diff
31041            is 7, 3, 1 and we should have enough elements in the
31042            queue to generate this. Getting a vector mask with a
31043            value of diff other than these values implies that
31044            something is wrong by the time we get here.  */
31045         gcc_assert (i + j < nelt);
31046         if (d->perm[i + j] != i + diff - j)
31047           return false;
31048       }
31049
31050   /* Success! */
31051   if (d->testing_p)
31052     return true;
31053
31054   /* ??? The third operand is an artifact of the builtin infrastructure
31055      and is ignored by the actual instruction.  */
31056   emit_insn (gen (d->target, d->op0, const0_rtx));
31057   return true;
31058 }
31059
31060 /* Recognize patterns for the VTRN insns.  */
31061
31062 static bool
31063 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31064 {
31065   unsigned int i, odd, mask, nelt = d->nelt;
31066   rtx out0, out1, in0, in1, x;
31067   rtx (*gen)(rtx, rtx, rtx, rtx);
31068
31069   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31070     return false;
31071
31072   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
31073   if (d->perm[0] == 0)
31074     odd = 0;
31075   else if (d->perm[0] == 1)
31076     odd = 1;
31077   else
31078     return false;
31079   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31080
31081   for (i = 0; i < nelt; i += 2)
31082     {
31083       if (d->perm[i] != i + odd)
31084         return false;
31085       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31086         return false;
31087     }
31088
31089   /* Success!  */
31090   if (d->testing_p)
31091     return true;
31092
31093   switch (d->vmode)
31094     {
31095     case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
31096     case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
31097     case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
31098     case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
31099     case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
31100     case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
31101     case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
31102     case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
31103     default:
31104       gcc_unreachable ();
31105     }
31106
31107   in0 = d->op0;
31108   in1 = d->op1;
31109   if (BYTES_BIG_ENDIAN)
31110     {
31111       x = in0, in0 = in1, in1 = x;
31112       odd = !odd;
31113     }
31114
31115   out0 = d->target;
31116   out1 = gen_reg_rtx (d->vmode);
31117   if (odd)
31118     x = out0, out0 = out1, out1 = x;
31119
31120   emit_insn (gen (out0, in0, in1, out1));
31121   return true;
31122 }
31123
31124 /* Recognize patterns for the VEXT insns.  */
31125
31126 static bool
31127 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31128 {
31129   unsigned int i, nelt = d->nelt;
31130   rtx (*gen) (rtx, rtx, rtx, rtx);
31131   rtx offset;
31132
31133   unsigned int location;
31134
31135   unsigned int next  = d->perm[0] + 1;
31136
31137   /* TODO: Handle GCC's numbering of elements for big-endian.  */
31138   if (BYTES_BIG_ENDIAN)
31139     return false;
31140
31141   /* Check if the extracted indexes are increasing by one.  */
31142   for (i = 1; i < nelt; next++, i++)
31143     {
31144       /* If we hit the most significant element of the 2nd vector in
31145          the previous iteration, no need to test further.  */
31146       if (next == 2 * nelt)
31147         return false;
31148
31149       /* If we are operating on only one vector: it could be a
31150          rotation.  If there are only two elements of size < 64, let
31151          arm_evpc_neon_vrev catch it.  */
31152       if (d->one_vector_p && (next == nelt))
31153         {
31154           if ((nelt == 2) && (d->vmode != V2DImode))
31155             return false;
31156           else
31157             next = 0;
31158         }
31159
31160       if (d->perm[i] != next)
31161         return false;
31162     }
31163
31164   location = d->perm[0];
31165
31166   switch (d->vmode)
31167     {
31168     case V16QImode: gen = gen_neon_vextv16qi; break;
31169     case V8QImode: gen = gen_neon_vextv8qi; break;
31170     case V4HImode: gen = gen_neon_vextv4hi; break;
31171     case V8HImode: gen = gen_neon_vextv8hi; break;
31172     case V2SImode: gen = gen_neon_vextv2si; break;
31173     case V4SImode: gen = gen_neon_vextv4si; break;
31174     case V2SFmode: gen = gen_neon_vextv2sf; break;
31175     case V4SFmode: gen = gen_neon_vextv4sf; break;
31176     case V2DImode: gen = gen_neon_vextv2di; break;
31177     default:
31178       return false;
31179     }
31180
31181   /* Success! */
31182   if (d->testing_p)
31183     return true;
31184
31185   offset = GEN_INT (location);
31186   emit_insn (gen (d->target, d->op0, d->op1, offset));
31187   return true;
31188 }
31189
31190 /* The NEON VTBL instruction is a fully variable permuation that's even
31191    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
31192    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
31193    can do slightly better by expanding this as a constant where we don't
31194    have to apply a mask.  */
31195
31196 static bool
31197 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31198 {
31199   rtx rperm[MAX_VECT_LEN], sel;
31200   machine_mode vmode = d->vmode;
31201   unsigned int i, nelt = d->nelt;
31202
31203   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31204      numbering of elements for big-endian, we must reverse the order.  */
31205   if (BYTES_BIG_ENDIAN)
31206     return false;
31207
31208   if (d->testing_p)
31209     return true;
31210
31211   /* Generic code will try constant permutation twice.  Once with the
31212      original mode and again with the elements lowered to QImode.
31213      So wait and don't do the selector expansion ourselves.  */
31214   if (vmode != V8QImode && vmode != V16QImode)
31215     return false;
31216
31217   for (i = 0; i < nelt; ++i)
31218     rperm[i] = GEN_INT (d->perm[i]);
31219   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31220   sel = force_reg (vmode, sel);
31221
31222   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31223   return true;
31224 }
31225
31226 static bool
31227 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31228 {
31229   /* Check if the input mask matches vext before reordering the
31230      operands.  */
31231   if (TARGET_NEON)
31232     if (arm_evpc_neon_vext (d))
31233       return true;
31234
31235   /* The pattern matching functions above are written to look for a small
31236      number to begin the sequence (0, 1, N/2).  If we begin with an index
31237      from the second operand, we can swap the operands.  */
31238   if (d->perm[0] >= d->nelt)
31239     {
31240       unsigned i, nelt = d->nelt;
31241       rtx x;
31242
31243       for (i = 0; i < nelt; ++i)
31244         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
31245
31246       x = d->op0;
31247       d->op0 = d->op1;
31248       d->op1 = x;
31249     }
31250
31251   if (TARGET_NEON)
31252     {
31253       if (arm_evpc_neon_vuzp (d))
31254         return true;
31255       if (arm_evpc_neon_vzip (d))
31256         return true;
31257       if (arm_evpc_neon_vrev (d))
31258         return true;
31259       if (arm_evpc_neon_vtrn (d))
31260         return true;
31261       return arm_evpc_neon_vtbl (d);
31262     }
31263   return false;
31264 }
31265
31266 /* Expand a vec_perm_const pattern.  */
31267
31268 bool
31269 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
31270 {
31271   struct expand_vec_perm_d d;
31272   int i, nelt, which;
31273
31274   d.target = target;
31275   d.op0 = op0;
31276   d.op1 = op1;
31277
31278   d.vmode = GET_MODE (target);
31279   gcc_assert (VECTOR_MODE_P (d.vmode));
31280   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31281   d.testing_p = false;
31282
31283   for (i = which = 0; i < nelt; ++i)
31284     {
31285       rtx e = XVECEXP (sel, 0, i);
31286       int ei = INTVAL (e) & (2 * nelt - 1);
31287       which |= (ei < nelt ? 1 : 2);
31288       d.perm[i] = ei;
31289     }
31290
31291   switch (which)
31292     {
31293     default:
31294       gcc_unreachable();
31295
31296     case 3:
31297       d.one_vector_p = false;
31298       if (!rtx_equal_p (op0, op1))
31299         break;
31300
31301       /* The elements of PERM do not suggest that only the first operand
31302          is used, but both operands are identical.  Allow easier matching
31303          of the permutation by folding the permutation into the single
31304          input vector.  */
31305       /* FALLTHRU */
31306     case 2:
31307       for (i = 0; i < nelt; ++i)
31308         d.perm[i] &= nelt - 1;
31309       d.op0 = op1;
31310       d.one_vector_p = true;
31311       break;
31312
31313     case 1:
31314       d.op1 = op0;
31315       d.one_vector_p = true;
31316       break;
31317     }
31318
31319   return arm_expand_vec_perm_const_1 (&d);
31320 }
31321
31322 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
31323
31324 static bool
31325 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
31326                                  const unsigned char *sel)
31327 {
31328   struct expand_vec_perm_d d;
31329   unsigned int i, nelt, which;
31330   bool ret;
31331
31332   d.vmode = vmode;
31333   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31334   d.testing_p = true;
31335   memcpy (d.perm, sel, nelt);
31336
31337   /* Categorize the set of elements in the selector.  */
31338   for (i = which = 0; i < nelt; ++i)
31339     {
31340       unsigned char e = d.perm[i];
31341       gcc_assert (e < 2 * nelt);
31342       which |= (e < nelt ? 1 : 2);
31343     }
31344
31345   /* For all elements from second vector, fold the elements to first.  */
31346   if (which == 2)
31347     for (i = 0; i < nelt; ++i)
31348       d.perm[i] -= nelt;
31349
31350   /* Check whether the mask can be applied to the vector type.  */
31351   d.one_vector_p = (which != 3);
31352
31353   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31354   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31355   if (!d.one_vector_p)
31356     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31357
31358   start_sequence ();
31359   ret = arm_expand_vec_perm_const_1 (&d);
31360   end_sequence ();
31361
31362   return ret;
31363 }
31364
31365 bool
31366 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31367 {
31368   /* If we are soft float and we do not have ldrd
31369      then all auto increment forms are ok.  */
31370   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31371     return true;
31372
31373   switch (code)
31374     {
31375       /* Post increment and Pre Decrement are supported for all
31376          instruction forms except for vector forms.  */
31377     case ARM_POST_INC:
31378     case ARM_PRE_DEC:
31379       if (VECTOR_MODE_P (mode))
31380         {
31381           if (code != ARM_PRE_DEC)
31382             return true;
31383           else
31384             return false;
31385         }
31386
31387       return true;
31388
31389     case ARM_POST_DEC:
31390     case ARM_PRE_INC:
31391       /* Without LDRD and mode size greater than
31392          word size, there is no point in auto-incrementing
31393          because ldm and stm will not have these forms.  */
31394       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31395         return false;
31396
31397       /* Vector and floating point modes do not support
31398          these auto increment forms.  */
31399       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31400         return false;
31401
31402       return true;
31403
31404     default:
31405       return false;
31406
31407     }
31408
31409   return false;
31410 }
31411
31412 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31413    on ARM, since we know that shifts by negative amounts are no-ops.
31414    Additionally, the default expansion code is not available or suitable
31415    for post-reload insn splits (this can occur when the register allocator
31416    chooses not to do a shift in NEON).
31417
31418    This function is used in both initial expand and post-reload splits, and
31419    handles all kinds of 64-bit shifts.
31420
31421    Input requirements:
31422     - It is safe for the input and output to be the same register, but
31423       early-clobber rules apply for the shift amount and scratch registers.
31424     - Shift by register requires both scratch registers.  In all other cases
31425       the scratch registers may be NULL.
31426     - Ashiftrt by a register also clobbers the CC register.  */
31427 void
31428 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31429                                rtx amount, rtx scratch1, rtx scratch2)
31430 {
31431   rtx out_high = gen_highpart (SImode, out);
31432   rtx out_low = gen_lowpart (SImode, out);
31433   rtx in_high = gen_highpart (SImode, in);
31434   rtx in_low = gen_lowpart (SImode, in);
31435
31436   /* Terminology:
31437         in = the register pair containing the input value.
31438         out = the destination register pair.
31439         up = the high- or low-part of each pair.
31440         down = the opposite part to "up".
31441      In a shift, we can consider bits to shift from "up"-stream to
31442      "down"-stream, so in a left-shift "up" is the low-part and "down"
31443      is the high-part of each register pair.  */
31444
31445   rtx out_up   = code == ASHIFT ? out_low : out_high;
31446   rtx out_down = code == ASHIFT ? out_high : out_low;
31447   rtx in_up   = code == ASHIFT ? in_low : in_high;
31448   rtx in_down = code == ASHIFT ? in_high : in_low;
31449
31450   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31451   gcc_assert (out
31452               && (REG_P (out) || GET_CODE (out) == SUBREG)
31453               && GET_MODE (out) == DImode);
31454   gcc_assert (in
31455               && (REG_P (in) || GET_CODE (in) == SUBREG)
31456               && GET_MODE (in) == DImode);
31457   gcc_assert (amount
31458               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31459                    && GET_MODE (amount) == SImode)
31460                   || CONST_INT_P (amount)));
31461   gcc_assert (scratch1 == NULL
31462               || (GET_CODE (scratch1) == SCRATCH)
31463               || (GET_MODE (scratch1) == SImode
31464                   && REG_P (scratch1)));
31465   gcc_assert (scratch2 == NULL
31466               || (GET_CODE (scratch2) == SCRATCH)
31467               || (GET_MODE (scratch2) == SImode
31468                   && REG_P (scratch2)));
31469   gcc_assert (!REG_P (out) || !REG_P (amount)
31470               || !HARD_REGISTER_P (out)
31471               || (REGNO (out) != REGNO (amount)
31472                   && REGNO (out) + 1 != REGNO (amount)));
31473
31474   /* Macros to make following code more readable.  */
31475   #define SUB_32(DEST,SRC) \
31476             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31477   #define RSB_32(DEST,SRC) \
31478             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31479   #define SUB_S_32(DEST,SRC) \
31480             gen_addsi3_compare0 ((DEST), (SRC), \
31481                                  GEN_INT (-32))
31482   #define SET(DEST,SRC) \
31483             gen_rtx_SET (SImode, (DEST), (SRC))
31484   #define SHIFT(CODE,SRC,AMOUNT) \
31485             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31486   #define LSHIFT(CODE,SRC,AMOUNT) \
31487             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31488                             SImode, (SRC), (AMOUNT))
31489   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31490             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31491                             SImode, (SRC), (AMOUNT))
31492   #define ORR(A,B) \
31493             gen_rtx_IOR (SImode, (A), (B))
31494   #define BRANCH(COND,LABEL) \
31495             gen_arm_cond_branch ((LABEL), \
31496                                  gen_rtx_ ## COND (CCmode, cc_reg, \
31497                                                    const0_rtx), \
31498                                  cc_reg)
31499
31500   /* Shifts by register and shifts by constant are handled separately.  */
31501   if (CONST_INT_P (amount))
31502     {
31503       /* We have a shift-by-constant.  */
31504
31505       /* First, handle out-of-range shift amounts.
31506          In both cases we try to match the result an ARM instruction in a
31507          shift-by-register would give.  This helps reduce execution
31508          differences between optimization levels, but it won't stop other
31509          parts of the compiler doing different things.  This is "undefined
31510          behaviour, in any case.  */
31511       if (INTVAL (amount) <= 0)
31512         emit_insn (gen_movdi (out, in));
31513       else if (INTVAL (amount) >= 64)
31514         {
31515           if (code == ASHIFTRT)
31516             {
31517               rtx const31_rtx = GEN_INT (31);
31518               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31519               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31520             }
31521           else
31522             emit_insn (gen_movdi (out, const0_rtx));
31523         }
31524
31525       /* Now handle valid shifts. */
31526       else if (INTVAL (amount) < 32)
31527         {
31528           /* Shifts by a constant less than 32.  */
31529           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31530
31531           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31532           emit_insn (SET (out_down,
31533                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
31534                                out_down)));
31535           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31536         }
31537       else
31538         {
31539           /* Shifts by a constant greater than 31.  */
31540           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31541
31542           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31543           if (code == ASHIFTRT)
31544             emit_insn (gen_ashrsi3 (out_up, in_up,
31545                                     GEN_INT (31)));
31546           else
31547             emit_insn (SET (out_up, const0_rtx));
31548         }
31549     }
31550   else
31551     {
31552       /* We have a shift-by-register.  */
31553       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31554
31555       /* This alternative requires the scratch registers.  */
31556       gcc_assert (scratch1 && REG_P (scratch1));
31557       gcc_assert (scratch2 && REG_P (scratch2));
31558
31559       /* We will need the values "amount-32" and "32-amount" later.
31560          Swapping them around now allows the later code to be more general. */
31561       switch (code)
31562         {
31563         case ASHIFT:
31564           emit_insn (SUB_32 (scratch1, amount));
31565           emit_insn (RSB_32 (scratch2, amount));
31566           break;
31567         case ASHIFTRT:
31568           emit_insn (RSB_32 (scratch1, amount));
31569           /* Also set CC = amount > 32.  */
31570           emit_insn (SUB_S_32 (scratch2, amount));
31571           break;
31572         case LSHIFTRT:
31573           emit_insn (RSB_32 (scratch1, amount));
31574           emit_insn (SUB_32 (scratch2, amount));
31575           break;
31576         default:
31577           gcc_unreachable ();
31578         }
31579
31580       /* Emit code like this:
31581
31582          arithmetic-left:
31583             out_down = in_down << amount;
31584             out_down = (in_up << (amount - 32)) | out_down;
31585             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31586             out_up = in_up << amount;
31587
31588          arithmetic-right:
31589             out_down = in_down >> amount;
31590             out_down = (in_up << (32 - amount)) | out_down;
31591             if (amount < 32)
31592               out_down = ((signed)in_up >> (amount - 32)) | out_down;
31593             out_up = in_up << amount;
31594
31595          logical-right:
31596             out_down = in_down >> amount;
31597             out_down = (in_up << (32 - amount)) | out_down;
31598             if (amount < 32)
31599               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31600             out_up = in_up << amount;
31601
31602           The ARM and Thumb2 variants are the same but implemented slightly
31603           differently.  If this were only called during expand we could just
31604           use the Thumb2 case and let combine do the right thing, but this
31605           can also be called from post-reload splitters.  */
31606
31607       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31608
31609       if (!TARGET_THUMB2)
31610         {
31611           /* Emit code for ARM mode.  */
31612           emit_insn (SET (out_down,
31613                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31614           if (code == ASHIFTRT)
31615             {
31616               rtx_code_label *done_label = gen_label_rtx ();
31617               emit_jump_insn (BRANCH (LT, done_label));
31618               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31619                                              out_down)));
31620               emit_label (done_label);
31621             }
31622           else
31623             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31624                                            out_down)));
31625         }
31626       else
31627         {
31628           /* Emit code for Thumb2 mode.
31629              Thumb2 can't do shift and or in one insn.  */
31630           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31631           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31632
31633           if (code == ASHIFTRT)
31634             {
31635               rtx_code_label *done_label = gen_label_rtx ();
31636               emit_jump_insn (BRANCH (LT, done_label));
31637               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31638               emit_insn (SET (out_down, ORR (out_down, scratch2)));
31639               emit_label (done_label);
31640             }
31641           else
31642             {
31643               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31644               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31645             }
31646         }
31647
31648       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31649     }
31650
31651   #undef SUB_32
31652   #undef RSB_32
31653   #undef SUB_S_32
31654   #undef SET
31655   #undef SHIFT
31656   #undef LSHIFT
31657   #undef REV_LSHIFT
31658   #undef ORR
31659   #undef BRANCH
31660 }
31661
31662
31663 /* Returns true if a valid comparison operation and makes
31664    the operands in a form that is valid.  */
31665 bool
31666 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31667 {
31668   enum rtx_code code = GET_CODE (*comparison);
31669   int code_int;
31670   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31671     ? GET_MODE (*op2) : GET_MODE (*op1);
31672
31673   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31674
31675   if (code == UNEQ || code == LTGT)
31676     return false;
31677
31678   code_int = (int)code;
31679   arm_canonicalize_comparison (&code_int, op1, op2, 0);
31680   PUT_CODE (*comparison, (enum rtx_code)code_int);
31681
31682   switch (mode)
31683     {
31684     case SImode:
31685       if (!arm_add_operand (*op1, mode))
31686         *op1 = force_reg (mode, *op1);
31687       if (!arm_add_operand (*op2, mode))
31688         *op2 = force_reg (mode, *op2);
31689       return true;
31690
31691     case DImode:
31692       if (!cmpdi_operand (*op1, mode))
31693         *op1 = force_reg (mode, *op1);
31694       if (!cmpdi_operand (*op2, mode))
31695         *op2 = force_reg (mode, *op2);
31696       return true;
31697
31698     case SFmode:
31699     case DFmode:
31700       if (!arm_float_compare_operand (*op1, mode))
31701         *op1 = force_reg (mode, *op1);
31702       if (!arm_float_compare_operand (*op2, mode))
31703         *op2 = force_reg (mode, *op2);
31704       return true;
31705     default:
31706       break;
31707     }
31708
31709   return false;
31710
31711 }
31712
31713 /* Maximum number of instructions to set block of memory.  */
31714 static int
31715 arm_block_set_max_insns (void)
31716 {
31717   if (optimize_function_for_size_p (cfun))
31718     return 4;
31719   else
31720     return current_tune->max_insns_inline_memset;
31721 }
31722
31723 /* Return TRUE if it's profitable to set block of memory for
31724    non-vectorized case.  VAL is the value to set the memory
31725    with.  LENGTH is the number of bytes to set.  ALIGN is the
31726    alignment of the destination memory in bytes.  UNALIGNED_P
31727    is TRUE if we can only set the memory with instructions
31728    meeting alignment requirements.  USE_STRD_P is TRUE if we
31729    can use strd to set the memory.  */
31730 static bool
31731 arm_block_set_non_vect_profit_p (rtx val,
31732                                  unsigned HOST_WIDE_INT length,
31733                                  unsigned HOST_WIDE_INT align,
31734                                  bool unaligned_p, bool use_strd_p)
31735 {
31736   int num = 0;
31737   /* For leftovers in bytes of 0-7, we can set the memory block using
31738      strb/strh/str with minimum instruction number.  */
31739   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31740
31741   if (unaligned_p)
31742     {
31743       num = arm_const_inline_cost (SET, val);
31744       num += length / align + length % align;
31745     }
31746   else if (use_strd_p)
31747     {
31748       num = arm_const_double_inline_cost (val);
31749       num += (length >> 3) + leftover[length & 7];
31750     }
31751   else
31752     {
31753       num = arm_const_inline_cost (SET, val);
31754       num += (length >> 2) + leftover[length & 3];
31755     }
31756
31757   /* We may be able to combine last pair STRH/STRB into a single STR
31758      by shifting one byte back.  */
31759   if (unaligned_access && length > 3 && (length & 3) == 3)
31760     num--;
31761
31762   return (num <= arm_block_set_max_insns ());
31763 }
31764
31765 /* Return TRUE if it's profitable to set block of memory for
31766    vectorized case.  LENGTH is the number of bytes to set.
31767    ALIGN is the alignment of destination memory in bytes.
31768    MODE is the vector mode used to set the memory.  */
31769 static bool
31770 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
31771                              unsigned HOST_WIDE_INT align,
31772                              machine_mode mode)
31773 {
31774   int num;
31775   bool unaligned_p = ((align & 3) != 0);
31776   unsigned int nelt = GET_MODE_NUNITS (mode);
31777
31778   /* Instruction loading constant value.  */
31779   num = 1;
31780   /* Instructions storing the memory.  */
31781   num += (length + nelt - 1) / nelt;
31782   /* Instructions adjusting the address expression.  Only need to
31783      adjust address expression if it's 4 bytes aligned and bytes
31784      leftover can only be stored by mis-aligned store instruction.  */
31785   if (!unaligned_p && (length & 3) != 0)
31786     num++;
31787
31788   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
31789   if (!unaligned_p && mode == V16QImode)
31790     num--;
31791
31792   return (num <= arm_block_set_max_insns ());
31793 }
31794
31795 /* Set a block of memory using vectorization instructions for the
31796    unaligned case.  We fill the first LENGTH bytes of the memory
31797    area starting from DSTBASE with byte constant VALUE.  ALIGN is
31798    the alignment requirement of memory.  Return TRUE if succeeded.  */
31799 static bool
31800 arm_block_set_unaligned_vect (rtx dstbase,
31801                               unsigned HOST_WIDE_INT length,
31802                               unsigned HOST_WIDE_INT value,
31803                               unsigned HOST_WIDE_INT align)
31804 {
31805   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
31806   rtx dst, mem;
31807   rtx val_elt, val_vec, reg;
31808   rtx rval[MAX_VECT_LEN];
31809   rtx (*gen_func) (rtx, rtx);
31810   machine_mode mode;
31811   unsigned HOST_WIDE_INT v = value;
31812
31813   gcc_assert ((align & 0x3) != 0);
31814   nelt_v8 = GET_MODE_NUNITS (V8QImode);
31815   nelt_v16 = GET_MODE_NUNITS (V16QImode);
31816   if (length >= nelt_v16)
31817     {
31818       mode = V16QImode;
31819       gen_func = gen_movmisalignv16qi;
31820     }
31821   else
31822     {
31823       mode = V8QImode;
31824       gen_func = gen_movmisalignv8qi;
31825     }
31826   nelt_mode = GET_MODE_NUNITS (mode);
31827   gcc_assert (length >= nelt_mode);
31828   /* Skip if it isn't profitable.  */
31829   if (!arm_block_set_vect_profit_p (length, align, mode))
31830     return false;
31831
31832   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31833   mem = adjust_automodify_address (dstbase, mode, dst, 0);
31834
31835   v = sext_hwi (v, BITS_PER_WORD);
31836   val_elt = GEN_INT (v);
31837   for (j = 0; j < nelt_mode; j++)
31838     rval[j] = val_elt;
31839
31840   reg = gen_reg_rtx (mode);
31841   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31842   /* Emit instruction loading the constant value.  */
31843   emit_move_insn (reg, val_vec);
31844
31845   /* Handle nelt_mode bytes in a vector.  */
31846   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
31847     {
31848       emit_insn ((*gen_func) (mem, reg));
31849       if (i + 2 * nelt_mode <= length)
31850         emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
31851     }
31852
31853   /* If there are not less than nelt_v8 bytes leftover, we must be in
31854      V16QI mode.  */
31855   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
31856
31857   /* Handle (8, 16) bytes leftover.  */
31858   if (i + nelt_v8 < length)
31859     {
31860       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
31861       /* We are shifting bytes back, set the alignment accordingly.  */
31862       if ((length & 1) != 0 && align >= 2)
31863         set_mem_align (mem, BITS_PER_UNIT);
31864
31865       emit_insn (gen_movmisalignv16qi (mem, reg));
31866     }
31867   /* Handle (0, 8] bytes leftover.  */
31868   else if (i < length && i + nelt_v8 >= length)
31869     {
31870       if (mode == V16QImode)
31871         {
31872           reg = gen_lowpart (V8QImode, reg);
31873           mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
31874         }
31875       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
31876                                               + (nelt_mode - nelt_v8))));
31877       /* We are shifting bytes back, set the alignment accordingly.  */
31878       if ((length & 1) != 0 && align >= 2)
31879         set_mem_align (mem, BITS_PER_UNIT);
31880
31881       emit_insn (gen_movmisalignv8qi (mem, reg));
31882     }
31883
31884   return true;
31885 }
31886
31887 /* Set a block of memory using vectorization instructions for the
31888    aligned case.  We fill the first LENGTH bytes of the memory area
31889    starting from DSTBASE with byte constant VALUE.  ALIGN is the
31890    alignment requirement of memory.  Return TRUE if succeeded.  */
31891 static bool
31892 arm_block_set_aligned_vect (rtx dstbase,
31893                             unsigned HOST_WIDE_INT length,
31894                             unsigned HOST_WIDE_INT value,
31895                             unsigned HOST_WIDE_INT align)
31896 {
31897   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
31898   rtx dst, addr, mem;
31899   rtx val_elt, val_vec, reg;
31900   rtx rval[MAX_VECT_LEN];
31901   machine_mode mode;
31902   unsigned HOST_WIDE_INT v = value;
31903
31904   gcc_assert ((align & 0x3) == 0);
31905   nelt_v8 = GET_MODE_NUNITS (V8QImode);
31906   nelt_v16 = GET_MODE_NUNITS (V16QImode);
31907   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
31908     mode = V16QImode;
31909   else
31910     mode = V8QImode;
31911
31912   nelt_mode = GET_MODE_NUNITS (mode);
31913   gcc_assert (length >= nelt_mode);
31914   /* Skip if it isn't profitable.  */
31915   if (!arm_block_set_vect_profit_p (length, align, mode))
31916     return false;
31917
31918   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31919
31920   v = sext_hwi (v, BITS_PER_WORD);
31921   val_elt = GEN_INT (v);
31922   for (j = 0; j < nelt_mode; j++)
31923     rval[j] = val_elt;
31924
31925   reg = gen_reg_rtx (mode);
31926   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
31927   /* Emit instruction loading the constant value.  */
31928   emit_move_insn (reg, val_vec);
31929
31930   i = 0;
31931   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
31932   if (mode == V16QImode)
31933     {
31934       mem = adjust_automodify_address (dstbase, mode, dst, 0);
31935       emit_insn (gen_movmisalignv16qi (mem, reg));
31936       i += nelt_mode;
31937       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
31938       if (i + nelt_v8 < length && i + nelt_v16 > length)
31939         {
31940           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31941           mem = adjust_automodify_address (dstbase, mode, dst, 0);
31942           /* We are shifting bytes back, set the alignment accordingly.  */
31943           if ((length & 0x3) == 0)
31944             set_mem_align (mem, BITS_PER_UNIT * 4);
31945           else if ((length & 0x1) == 0)
31946             set_mem_align (mem, BITS_PER_UNIT * 2);
31947           else
31948             set_mem_align (mem, BITS_PER_UNIT);
31949
31950           emit_insn (gen_movmisalignv16qi (mem, reg));
31951           return true;
31952         }
31953       /* Fall through for bytes leftover.  */
31954       mode = V8QImode;
31955       nelt_mode = GET_MODE_NUNITS (mode);
31956       reg = gen_lowpart (V8QImode, reg);
31957     }
31958
31959   /* Handle 8 bytes in a vector.  */
31960   for (; (i + nelt_mode <= length); i += nelt_mode)
31961     {
31962       addr = plus_constant (Pmode, dst, i);
31963       mem = adjust_automodify_address (dstbase, mode, addr, i);
31964       emit_move_insn (mem, reg);
31965     }
31966
31967   /* Handle single word leftover by shifting 4 bytes back.  We can
31968      use aligned access for this case.  */
31969   if (i + UNITS_PER_WORD == length)
31970     {
31971       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
31972       mem = adjust_automodify_address (dstbase, mode,
31973                                        addr, i - UNITS_PER_WORD);
31974       /* We are shifting 4 bytes back, set the alignment accordingly.  */
31975       if (align > UNITS_PER_WORD)
31976         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
31977
31978       emit_move_insn (mem, reg);
31979     }
31980   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
31981      We have to use unaligned access for this case.  */
31982   else if (i < length)
31983     {
31984       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31985       mem = adjust_automodify_address (dstbase, mode, dst, 0);
31986       /* We are shifting bytes back, set the alignment accordingly.  */
31987       if ((length & 1) == 0)
31988         set_mem_align (mem, BITS_PER_UNIT * 2);
31989       else
31990         set_mem_align (mem, BITS_PER_UNIT);
31991
31992       emit_insn (gen_movmisalignv8qi (mem, reg));
31993     }
31994
31995   return true;
31996 }
31997
31998 /* Set a block of memory using plain strh/strb instructions, only
31999    using instructions allowed by ALIGN on processor.  We fill the
32000    first LENGTH bytes of the memory area starting from DSTBASE
32001    with byte constant VALUE.  ALIGN is the alignment requirement
32002    of memory.  */
32003 static bool
32004 arm_block_set_unaligned_non_vect (rtx dstbase,
32005                                   unsigned HOST_WIDE_INT length,
32006                                   unsigned HOST_WIDE_INT value,
32007                                   unsigned HOST_WIDE_INT align)
32008 {
32009   unsigned int i;
32010   rtx dst, addr, mem;
32011   rtx val_exp, val_reg, reg;
32012   machine_mode mode;
32013   HOST_WIDE_INT v = value;
32014
32015   gcc_assert (align == 1 || align == 2);
32016
32017   if (align == 2)
32018     v |= (value << BITS_PER_UNIT);
32019
32020   v = sext_hwi (v, BITS_PER_WORD);
32021   val_exp = GEN_INT (v);
32022   /* Skip if it isn't profitable.  */
32023   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32024                                         align, true, false))
32025     return false;
32026
32027   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32028   mode = (align == 2 ? HImode : QImode);
32029   val_reg = force_reg (SImode, val_exp);
32030   reg = gen_lowpart (mode, val_reg);
32031
32032   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32033     {
32034       addr = plus_constant (Pmode, dst, i);
32035       mem = adjust_automodify_address (dstbase, mode, addr, i);
32036       emit_move_insn (mem, reg);
32037     }
32038
32039   /* Handle single byte leftover.  */
32040   if (i + 1 == length)
32041     {
32042       reg = gen_lowpart (QImode, val_reg);
32043       addr = plus_constant (Pmode, dst, i);
32044       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32045       emit_move_insn (mem, reg);
32046       i++;
32047     }
32048
32049   gcc_assert (i == length);
32050   return true;
32051 }
32052
32053 /* Set a block of memory using plain strd/str/strh/strb instructions,
32054    to permit unaligned copies on processors which support unaligned
32055    semantics for those instructions.  We fill the first LENGTH bytes
32056    of the memory area starting from DSTBASE with byte constant VALUE.
32057    ALIGN is the alignment requirement of memory.  */
32058 static bool
32059 arm_block_set_aligned_non_vect (rtx dstbase,
32060                                 unsigned HOST_WIDE_INT length,
32061                                 unsigned HOST_WIDE_INT value,
32062                                 unsigned HOST_WIDE_INT align)
32063 {
32064   unsigned int i;
32065   rtx dst, addr, mem;
32066   rtx val_exp, val_reg, reg;
32067   unsigned HOST_WIDE_INT v;
32068   bool use_strd_p;
32069
32070   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32071                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32072
32073   v = (value | (value << 8) | (value << 16) | (value << 24));
32074   if (length < UNITS_PER_WORD)
32075     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32076
32077   if (use_strd_p)
32078     v |= (v << BITS_PER_WORD);
32079   else
32080     v = sext_hwi (v, BITS_PER_WORD);
32081
32082   val_exp = GEN_INT (v);
32083   /* Skip if it isn't profitable.  */
32084   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32085                                         align, false, use_strd_p))
32086     {
32087       if (!use_strd_p)
32088         return false;
32089
32090       /* Try without strd.  */
32091       v = (v >> BITS_PER_WORD);
32092       v = sext_hwi (v, BITS_PER_WORD);
32093       val_exp = GEN_INT (v);
32094       use_strd_p = false;
32095       if (!arm_block_set_non_vect_profit_p (val_exp, length,
32096                                             align, false, use_strd_p))
32097         return false;
32098     }
32099
32100   i = 0;
32101   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32102   /* Handle double words using strd if possible.  */
32103   if (use_strd_p)
32104     {
32105       val_reg = force_reg (DImode, val_exp);
32106       reg = val_reg;
32107       for (; (i + 8 <= length); i += 8)
32108         {
32109           addr = plus_constant (Pmode, dst, i);
32110           mem = adjust_automodify_address (dstbase, DImode, addr, i);
32111           emit_move_insn (mem, reg);
32112         }
32113     }
32114   else
32115     val_reg = force_reg (SImode, val_exp);
32116
32117   /* Handle words.  */
32118   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32119   for (; (i + 4 <= length); i += 4)
32120     {
32121       addr = plus_constant (Pmode, dst, i);
32122       mem = adjust_automodify_address (dstbase, SImode, addr, i);
32123       if ((align & 3) == 0)
32124         emit_move_insn (mem, reg);
32125       else
32126         emit_insn (gen_unaligned_storesi (mem, reg));
32127     }
32128
32129   /* Merge last pair of STRH and STRB into a STR if possible.  */
32130   if (unaligned_access && i > 0 && (i + 3) == length)
32131     {
32132       addr = plus_constant (Pmode, dst, i - 1);
32133       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32134       /* We are shifting one byte back, set the alignment accordingly.  */
32135       if ((align & 1) == 0)
32136         set_mem_align (mem, BITS_PER_UNIT);
32137
32138       /* Most likely this is an unaligned access, and we can't tell at
32139          compilation time.  */
32140       emit_insn (gen_unaligned_storesi (mem, reg));
32141       return true;
32142     }
32143
32144   /* Handle half word leftover.  */
32145   if (i + 2 <= length)
32146     {
32147       reg = gen_lowpart (HImode, val_reg);
32148       addr = plus_constant (Pmode, dst, i);
32149       mem = adjust_automodify_address (dstbase, HImode, addr, i);
32150       if ((align & 1) == 0)
32151         emit_move_insn (mem, reg);
32152       else
32153         emit_insn (gen_unaligned_storehi (mem, reg));
32154
32155       i += 2;
32156     }
32157
32158   /* Handle single byte leftover.  */
32159   if (i + 1 == length)
32160     {
32161       reg = gen_lowpart (QImode, val_reg);
32162       addr = plus_constant (Pmode, dst, i);
32163       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32164       emit_move_insn (mem, reg);
32165     }
32166
32167   return true;
32168 }
32169
32170 /* Set a block of memory using vectorization instructions for both
32171    aligned and unaligned cases.  We fill the first LENGTH bytes of
32172    the memory area starting from DSTBASE with byte constant VALUE.
32173    ALIGN is the alignment requirement of memory.  */
32174 static bool
32175 arm_block_set_vect (rtx dstbase,
32176                     unsigned HOST_WIDE_INT length,
32177                     unsigned HOST_WIDE_INT value,
32178                     unsigned HOST_WIDE_INT align)
32179 {
32180   /* Check whether we need to use unaligned store instruction.  */
32181   if (((align & 3) != 0 || (length & 3) != 0)
32182       /* Check whether unaligned store instruction is available.  */
32183       && (!unaligned_access || BYTES_BIG_ENDIAN))
32184     return false;
32185
32186   if ((align & 3) == 0)
32187     return arm_block_set_aligned_vect (dstbase, length, value, align);
32188   else
32189     return arm_block_set_unaligned_vect (dstbase, length, value, align);
32190 }
32191
32192 /* Expand string store operation.  Firstly we try to do that by using
32193    vectorization instructions, then try with ARM unaligned access and
32194    double-word store if profitable.  OPERANDS[0] is the destination,
32195    OPERANDS[1] is the number of bytes, operands[2] is the value to
32196    initialize the memory, OPERANDS[3] is the known alignment of the
32197    destination.  */
32198 bool
32199 arm_gen_setmem (rtx *operands)
32200 {
32201   rtx dstbase = operands[0];
32202   unsigned HOST_WIDE_INT length;
32203   unsigned HOST_WIDE_INT value;
32204   unsigned HOST_WIDE_INT align;
32205
32206   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32207     return false;
32208
32209   length = UINTVAL (operands[1]);
32210   if (length > 64)
32211     return false;
32212
32213   value = (UINTVAL (operands[2]) & 0xFF);
32214   align = UINTVAL (operands[3]);
32215   if (TARGET_NEON && length >= 8
32216       && current_tune->string_ops_prefer_neon
32217       && arm_block_set_vect (dstbase, length, value, align))
32218     return true;
32219
32220   if (!unaligned_access && (align & 3) != 0)
32221     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32222
32223   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32224 }
32225
32226 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
32227
32228 static unsigned HOST_WIDE_INT
32229 arm_asan_shadow_offset (void)
32230 {
32231   return (unsigned HOST_WIDE_INT) 1 << 29;
32232 }
32233
32234
32235 /* This is a temporary fix for PR60655.  Ideally we need
32236    to handle most of these cases in the generic part but
32237    currently we reject minus (..) (sym_ref).  We try to
32238    ameliorate the case with minus (sym_ref1) (sym_ref2)
32239    where they are in the same section.  */
32240
32241 static bool
32242 arm_const_not_ok_for_debug_p (rtx p)
32243 {
32244   tree decl_op0 = NULL;
32245   tree decl_op1 = NULL;
32246
32247   if (GET_CODE (p) == MINUS)
32248     {
32249       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32250         {
32251           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32252           if (decl_op1
32253               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32254               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32255             {
32256               if ((TREE_CODE (decl_op1) == VAR_DECL
32257                    || TREE_CODE (decl_op1) == CONST_DECL)
32258                   && (TREE_CODE (decl_op0) == VAR_DECL
32259                       || TREE_CODE (decl_op0) == CONST_DECL))
32260                 return (get_variable_section (decl_op1, false)
32261                         != get_variable_section (decl_op0, false));
32262
32263               if (TREE_CODE (decl_op1) == LABEL_DECL
32264                   && TREE_CODE (decl_op0) == LABEL_DECL)
32265                 return (DECL_CONTEXT (decl_op1)
32266                         != DECL_CONTEXT (decl_op0));
32267             }
32268
32269           return true;
32270         }
32271     }
32272
32273   return false;
32274 }
32275
32276 static void
32277 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
32278 {
32279   const unsigned ARM_FE_INVALID = 1;
32280   const unsigned ARM_FE_DIVBYZERO = 2;
32281   const unsigned ARM_FE_OVERFLOW = 4;
32282   const unsigned ARM_FE_UNDERFLOW = 8;
32283   const unsigned ARM_FE_INEXACT = 16;
32284   const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
32285                                                     | ARM_FE_DIVBYZERO
32286                                                     | ARM_FE_OVERFLOW
32287                                                     | ARM_FE_UNDERFLOW
32288                                                     | ARM_FE_INEXACT);
32289   const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
32290   tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
32291   tree new_fenv_var, reload_fenv, restore_fnenv;
32292   tree update_call, atomic_feraiseexcept, hold_fnclex;
32293
32294   if (!TARGET_VFP || !TARGET_HARD_FLOAT)
32295     return;
32296
32297   /* Generate the equivalent of :
32298        unsigned int fenv_var;
32299        fenv_var = __builtin_arm_get_fpscr ();
32300
32301        unsigned int masked_fenv;
32302        masked_fenv = fenv_var & mask;
32303
32304        __builtin_arm_set_fpscr (masked_fenv);  */
32305
32306   fenv_var = create_tmp_var (unsigned_type_node, NULL);
32307   get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
32308   set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
32309   mask = build_int_cst (unsigned_type_node,
32310                         ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
32311                           | ARM_FE_ALL_EXCEPT));
32312   ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
32313                     fenv_var, build_call_expr (get_fpscr, 0));
32314   masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
32315   hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
32316   *hold = build2 (COMPOUND_EXPR, void_type_node,
32317                   build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
32318                   hold_fnclex);
32319
32320   /* Store the value of masked_fenv to clear the exceptions:
32321      __builtin_arm_set_fpscr (masked_fenv);  */
32322
32323   *clear = build_call_expr (set_fpscr, 1, masked_fenv);
32324
32325   /* Generate the equivalent of :
32326        unsigned int new_fenv_var;
32327        new_fenv_var = __builtin_arm_get_fpscr ();
32328
32329        __builtin_arm_set_fpscr (fenv_var);
32330
32331        __atomic_feraiseexcept (new_fenv_var);  */
32332
32333   new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
32334   reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
32335                         build_call_expr (get_fpscr, 0));
32336   restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
32337   atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
32338   update_call = build_call_expr (atomic_feraiseexcept, 1,
32339                                  fold_convert (integer_type_node, new_fenv_var));
32340   *update = build2 (COMPOUND_EXPR, void_type_node,
32341                     build2 (COMPOUND_EXPR, void_type_node,
32342                             reload_fenv, restore_fnenv), update_call);
32343 }
32344
32345 /* return TRUE if x is a reference to a value in a constant pool */
32346 extern bool
32347 arm_is_constant_pool_ref (rtx x)
32348 {
32349   return (MEM_P (x)
32350           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32351           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32352 }
32353
32354 #include "gt-arm.h"